Source code for polyglotdb.io.parsers.timit

import os

from polyglotdb.io.helper import find_wav_path
from polyglotdb.io.parsers.base import BaseParser, DiscourseData
from polyglotdb.io.parsers.speaker import DirectorySpeakerParser



[docs]
class TimitParser(BaseParser):
    """
    Parser for the TIMIT corpus.

    Has annotation types for word labels and surface transcription labels.

    Parameters
    ----------
    annotation_tiers: list
        Annotation types of the files to parse
    hierarchy : :class:`~polyglotdb.structure.Hierarchy`
        Details of how linguistic types relate to one another
    stop_check : callable, optional
        Function to check whether to halt parsing
    call_back : callable, optional
        Function to output progress messages
    """

    _extensions = [".wrd", ".WRD"]

    def __init__(self, annotation_tiers, hierarchy, stop_check=None, call_back=None):
        super(TimitParser, self).__init__(
            annotation_tiers,
            hierarchy,
            make_transcription=True,
            make_label=False,
            stop_check=stop_check,
            call_back=call_back,
        )
        self.speaker_parser = DirectorySpeakerParser()

    def parse_discourse(self, word_path, types_only=False):
        """
        Parse a TIMIT file for later importing.

        Parameters
        ----------
        word_path : str
            Path to TIMIT .wrd file
        types_only : bool
            Flag for whether to only save type information, ignoring the token information

        Returns
        -------
        :class:`~polyglotdb.io.discoursedata.DiscourseData`
            Parsed data from the file
        """

        name, ext = os.path.splitext(os.path.split(word_path)[1])
        if ext == ".WRD":
            phone_path = os.path.splitext(word_path)[0] + ".PHN"
        else:
            phone_path = os.path.splitext(word_path)[0] + ".phn"

        if self.speaker_parser is not None:
            speaker = self.speaker_parser.parse_path(word_path)
            name = speaker + "_" + name
        else:
            speaker = None

        for a in self.annotation_tiers:
            a.reset()
            a.speaker = speaker

        if self.call_back is not None:
            self.call_back("Reading files...")
            self.call_back(0, 0)
        words = read_words(word_path)
        phones = read_phones(phone_path)
        if words[-1]["end"] != phones[-1][2]:
            words.append({"spelling": "sil", "begin": words[-1]["end"], "end": phones[-1][2]})

        self.annotation_tiers[0].add((x["spelling"], x["begin"], x["end"]) for x in words)
        self.annotation_tiers[1].add(phones)

        pg_annotations = self._parse_annotations(types_only)

        data = DiscourseData(name, pg_annotations, self.hierarchy)
        for a in self.annotation_tiers:
            a.reset()

        data.wav_path = find_wav_path(word_path)

        return data



def read_phones(path):
    """
    From a TIMIT file, reads the phone lines, appends label, begin, and end to output

    Parameters
    ----------
    path : str
        path to file

    Returns
    -------
    list of tuples
        each tuple is label, begin, end for a phone

    """
    output = []
    sr = 16000
    with open(path, "r") as file_handle:
        for line in file_handle:
            line = line.strip().split(" ")
            begin = float(line[0]) / sr
            end = float(line[1]) / sr
            label = line[2]
            output.append((label, begin, end))
    return output


def read_words(path):
    """
    From a TIMIT file, reads the word info

    Parameters
    ----------
    path : str
        path to file

    Returns
    -------
    list of dicts
        each dict has spelling, begin, end

    """
    output = []
    sr = 16000
    prev = None
    with open(path, "r") as file_handle:
        for line in file_handle:
            line = line.strip().split(" ")
            begin = float(line[0]) / sr
            end = float(line[1]) / sr
            word = line[2]
            if prev is not None and begin != prev:
                output.append({"spelling": "<SIL>", "begin": prev, "end": begin})
            elif prev is None and begin != 0:
                output.append({"spelling": "<SIL>", "begin": 0, "end": begin})
            output.append({"spelling": word, "begin": begin, "end": end})
            prev = end
    return output