Source code for polyglotdb.config

import configparser
import logging
import os

CONFIG_DIR = os.environ.get("PGDB_HOME", os.path.expanduser("~/.pgdb"))

BASE_DIR = os.path.join(CONFIG_DIR, "data")

CONFIG_PATH = os.path.join(CONFIG_DIR, "config.ini")

CONFIG = configparser.ConfigParser()
if os.path.exists(CONFIG_PATH):
    CONFIG.read(CONFIG_PATH)
    BASE_DIR = os.path.expanduser(os.path.join(CONFIG["Data"]["directory"], "data"))


def setup_logger(logger_name, log_file, level=logging.INFO):
    """
    Set up a Python logger to use for error/debug/info messages

    Parameters
    ----------
    logger_name : str
        Name of the logger
    log_file : str
        Path to the log file to save into
    level : int
        Minimum level to log
    """
    logger = logging.getLogger(logger_name)
    formatter = logging.Formatter("%(asctime)s : %(message)s")
    fileHandler = logging.FileHandler(log_file, mode="a")
    fileHandler.setFormatter(formatter)
    streamHandler = logging.StreamHandler()
    streamHandler.setLevel(logging.ERROR)
    streamHandler.setFormatter(formatter)

    logger.setLevel(level)
    logger.addHandler(fileHandler)
    logger.addHandler(streamHandler)
    logger.info("---------INIT-----------")


[docs] class CorpusConfig(object): """ Class for storing configuration information about a corpus. Parameters ---------- corpus_name : str Identifier for the corpus kwargs : keyword arguments All keywords will be converted to attributes of the object Attributes ---------- corpus_name : str Identifier of the corpus graph_user : str Username for connecting to the graph database graph_password : str Password for connecting to the graph database graph_host : str Host for the graph database graph_port : int Port for connecting to the graph database engine : str Type of SQL database base_dir : str Base directory to store information and temporary files for the corpus defaults to ".pgdb" under the current user's home directory """ def __init__(self, corpus_name, data_dir=None, **kwargs): self.corpus_name = corpus_name self.acoustic_user = None self.acoustic_password = None self.acoustic_http_port = 8086 self.graph_user = None self.graph_password = None self.host = "localhost" self.query_behavior = "speaker" self.graph_http_port = 7474 self.graph_bolt_port = 7687 self.debug = False if data_dir is None: data_dir = BASE_DIR self.base_dir = os.path.join(data_dir, self.corpus_name) os.makedirs(self.base_dir, exist_ok=True) self.log_level = logging.DEBUG self.log_dir = os.path.join(self.base_dir, "logs") os.makedirs(self.log_dir, exist_ok=True) self.temp_dir = os.path.join(self.base_dir, "temp") os.makedirs(self.temp_dir, exist_ok=True) self.data_dir = os.path.join(self.base_dir, "data") os.makedirs(self.data_dir, exist_ok=True) self.audio_dir = os.path.join(self.data_dir, "audio") os.makedirs(self.audio_dir, exist_ok=True) self.engine = "sqlite" self.db_path = os.path.join(self.data_dir, self.corpus_name) self.time_sampling = 0.01 for k, v in kwargs.items(): setattr(self, k, v) def temporary_directory(self, name): """ Create a temporary directory for use in the corpus, and return the path name. All temporary directories deleted upon successful exit of the context manager. Returns ------- str: Full path to temporary directory """ temp = os.path.join(self.temp_dir, name) os.makedirs(temp, exist_ok=True) return temp @property def acoustic_connection_kwargs(self): """ Return connection parameters to use for connecting to an InfluxDB database Returns ------- dict Connection parameters """ kwargs = { "host": self.host, "port": self.acoustic_http_port, "database": self.corpus_name, } if self.acoustic_user is not None: kwargs["username"] = self.acoustic_user if self.acoustic_password is not None: kwargs["password"] = self.acoustic_password return kwargs @property def graph_connection_string(self): """ Construct a connection string to use for Neo4j Returns ------- str Connection string """ return "bolt://{}:{}".format(self.host, self.graph_bolt_port)