Source code for polyglotdb.io.parsers.timit

import os

from polyglotdb.io.helper import find_wav_path
from polyglotdb.io.parsers.base import BaseParser, DiscourseData
from polyglotdb.io.parsers.speaker import DirectorySpeakerParser


[docs] class TimitParser(BaseParser): """ Parser for the TIMIT corpus. Has annotation types for word labels and surface transcription labels. Parameters ---------- annotation_tiers: list Annotation types of the files to parse hierarchy : :class:`~polyglotdb.structure.Hierarchy` Details of how linguistic types relate to one another stop_check : callable, optional Function to check whether to halt parsing call_back : callable, optional Function to output progress messages """ _extensions = [".wrd", ".WRD"] def __init__(self, annotation_tiers, hierarchy, stop_check=None, call_back=None): super(TimitParser, self).__init__( annotation_tiers, hierarchy, make_transcription=True, make_label=False, stop_check=stop_check, call_back=call_back, ) self.speaker_parser = DirectorySpeakerParser() def parse_discourse(self, word_path, types_only=False): """ Parse a TIMIT file for later importing. Parameters ---------- word_path : str Path to TIMIT .wrd file types_only : bool Flag for whether to only save type information, ignoring the token information Returns ------- :class:`~polyglotdb.io.discoursedata.DiscourseData` Parsed data from the file """ name, ext = os.path.splitext(os.path.split(word_path)[1]) if ext == ".WRD": phone_path = os.path.splitext(word_path)[0] + ".PHN" else: phone_path = os.path.splitext(word_path)[0] + ".phn" if self.speaker_parser is not None: speaker = self.speaker_parser.parse_path(word_path) name = speaker + "_" + name else: speaker = None for a in self.annotation_tiers: a.reset() a.speaker = speaker if self.call_back is not None: self.call_back("Reading files...") self.call_back(0, 0) words = read_words(word_path) phones = read_phones(phone_path) if words[-1]["end"] != phones[-1][2]: words.append({"spelling": "sil", "begin": words[-1]["end"], "end": phones[-1][2]}) self.annotation_tiers[0].add((x["spelling"], x["begin"], x["end"]) for x in words) self.annotation_tiers[1].add(phones) pg_annotations = self._parse_annotations(types_only) data = DiscourseData(name, pg_annotations, self.hierarchy) for a in self.annotation_tiers: a.reset() data.wav_path = find_wav_path(word_path) return data
def read_phones(path): """ From a TIMIT file, reads the phone lines, appends label, begin, and end to output Parameters ---------- path : str path to file Returns ------- list of tuples each tuple is label, begin, end for a phone """ output = [] sr = 16000 with open(path, "r") as file_handle: for line in file_handle: line = line.strip().split(" ") begin = float(line[0]) / sr end = float(line[1]) / sr label = line[2] output.append((label, begin, end)) return output def read_words(path): """ From a TIMIT file, reads the word info Parameters ---------- path : str path to file Returns ------- list of dicts each dict has spelling, begin, end """ output = [] sr = 16000 prev = None with open(path, "r") as file_handle: for line in file_handle: line = line.strip().split(" ") begin = float(line[0]) / sr end = float(line[1]) / sr word = line[2] if prev is not None and begin != prev: output.append({"spelling": "<SIL>", "begin": prev, "end": begin}) elif prev is None and begin != 0: output.append({"spelling": "<SIL>", "begin": 0, "end": begin}) output.append({"spelling": word, "begin": begin, "end": end}) prev = end return output