Source code for polyglotdb.acoustics.formants.helper

import os
import re
from statistics import mean

import numpy as np
import scipy
from conch import analyze_segments
from conch.analysis.formants import (
    FormantTrackFunction,
    PraatSegmentFormantPointFunction,
    PraatSegmentFormantTrackFunction,
)
from conch.analysis.praat import PraatAnalysisFunction
from conch.analysis.segments import SegmentMapping
from pyraat.parse_outputs import parse_point_script_output

from polyglotdb.acoustics.io import point_measures_from_csv, point_measures_to_csv
from polyglotdb.exceptions import AcousticError


def sanitize_bandwidths(value):
    """Cleans bandwidth data from dictionary form.

    Parameters
    ----------
    value : dict
        Observation values produced by reading out from Praat.

    Returns
    -------
    float
        The first bandwidth.
    float
        The second bandwidth.
    float
        The third bandwidth.
    """
    try:
        b1 = value["B1"][0]
    except TypeError:
        b1 = value["B1"]
    if b1 is None:
        b1 = 0
    try:
        b2 = value["B2"][0]
    except TypeError:
        b2 = value["B2"]
    if b2 is None:
        b2 = 0
    try:
        b3 = value["B3"][0]
    except TypeError:
        b3 = value["B3"]
    if b3 is None:
        b3 = 0
    return b1, b2, b3


def track_nformants(track):
    """Gets the number of formants used to arrive at a given track.

    Parameters
    ----------
    track : dict
        The measured track.

    Returns
    -------
    int
        The number of formants used to measure that track
    """
    numbers = set(int(x[1]) for x in track.keys() if x.startswith("F"))
    return max(numbers)


def parse_multiple_formant_output(output):
    output = output.replace(r"\r\n", r"\n")
    listing_list = re.split(r"\r?\n\r?\n", output)
    to_return = {}
    for item in listing_list:
        output = parse_point_script_output(item)
        reported_nformants = output.pop("num_formants")
        to_return[reported_nformants] = output
    return to_return


[docs] def generate_variable_formants_point_function(corpus_context, min_formants, max_formants): """Generates a function used to call Praat to measure formants and bandwidths with variable num_formants. This specific function returns a single point per formant at a third of the way through the segment Parameters ---------- corpus_context : :class:`~polyglot.corpus.context.CorpusContext` The CorpusContext object of the corpus. min_formants : int The minimum number of formants to measure with on subsequent passes (default is 4). max_formants : int The maximum number of formants to measure with on subsequent passes (default is 7). Returns ------- formant_function : Partial function object The function used to call Praat. """ max_freq = 5500 script_dir = os.path.dirname(os.path.abspath(__file__)) script = os.path.join(script_dir, "multiple_num_formants.praat") formant_function = PraatAnalysisFunction( script, praat_path=corpus_context.config.praat_path, arguments=[0.01, 0.025, min_formants, max_formants, max_freq], ) formant_function._function._output_parse_function = parse_multiple_formant_output return formant_function
[docs] def generate_formants_point_function(corpus_context, gender=None): """Generates a function used to call Praat to measure formants and bandwidths with variable num_formants. Parameters ---------- corpus_context : :class:`~polyglot.corpus.context.CorpusContext` The CorpusContext object of the corpus. min_formants : int The minimum number of formants to measure with on subsequent passes (default is 4). max_formants : int The maximum number of formants to measure with on subsequent passes (default is 7). Returns ------- formant_function : Partial function object The function used to call Praat. """ max_freq = 5500 formant_function = PraatSegmentFormantPointFunction( praat_path=corpus_context.config.praat_path, max_frequency=max_freq, num_formants=5, window_length=0.025, time_step=0.01, ) return formant_function
def get_mean_SD(data, prototype_parameters=None): """Generates per-vowel-class means and covariance matrices for an arbitrary set of parameters (such as F1, F2, F3, B1, B2, B3) . Parameters ---------- corpus_context : :class:`~polyglot.corpus.context.CorpusContext` The CorpusContext object of the corpus. data : dict Track data from which means and covariance matrices will be generated. Returns ------- metadata : dict Means and covariance matrices per vowel class. """ if prototype_parameters is None: prototype_parameters = ["F1", "F2", "F3", "B1", "B2", "B3"] metadata = {} phones = set() for seg, value in data.items(): phones.add(seg["label"]) for phone in phones: observation_list = [] for seg, value in data.items(): if seg["label"] == phone: observation = [value[pp] for pp in prototype_parameters] # observation = [ # value['F1'], # value['F2'], # value['F3'], # value['B1'], # value['B2'], # value['B3'] # ] observation_list.append([x if x else 0 for x in observation]) # f1_mean, f2_mean, f3_mean = mean(x[0] for x in observation_list), mean(x[1] for x in observation_list), mean( # x[2] for x in observation_list) # b1_mean, b2_mean, b3_mean = mean(x[3] for x in observation_list), mean(x[4] for x in observation_list), mean( # x[5] for x in observation_list) # all_means = [f1_mean, f2_mean, f3_mean, b1_mean, b2_mean, b3_mean] all_means = [ mean(x[i] for x in observation_list) for i, pp in enumerate(prototype_parameters) ] observation_list = np.array(observation_list) cov = np.cov(observation_list.T) measurements = [all_means, cov.tolist()] metadata[phone] = measurements return metadata def get_mahalanobis(prototype, observation, inverse_covariance): """Gets the Mahalanobis distance between an observation and the prototype. Parameters ---------- prototype : list Prototype data. observation : list Given observation of a vowel instance. inverse_covariance : list The inverse of the covariance matrix for the vowel class. Returns ------- distance : float The Mahalanobis distance for the observation. """ prototype = np.array(prototype) observation = np.array(observation) inverse_covariance = np.array(inverse_covariance) distance = scipy.spatial.distance.mahalanobis(prototype, observation, inverse_covariance) return distance def save_formant_point_data(corpus_context, data, num_formants=False): header = [ "id", "F1", "F2", "F3", "B1", "B2", "B3", "A1", "A2", "A3", "Ax", "drop_formant", ] if num_formants: header += ["num_formants"] point_measures_to_csv(corpus_context, data, header) header_info = {} for h in header: if h == "id": continue if h != "num_formants" or h != "drop_formant": header_info[h] = float # elif h != 'Fx': # header_info[h] = str else: header_info[h] = int point_measures_from_csv(corpus_context, header_info) def extract_and_save_formant_tracks( corpus_context, data, num_formants=False, stop_check=None, multiprocessing=True ): """This function takes a dictionary with the best parameters for each vowels, then recalculates the formants as tracks rather than as points""" # Dictionary of segment mapping objects where each n_formants has its own segment mapping object segment_mappings = {} save_padding = 0.02 for k, v in data.items(): k.begin -= save_padding k.end += save_padding if "num_formants" in v: n_formants = v["num_formants"] else: # There was not enough samples, so we use the default n n_formants = 5 if n_formants not in segment_mappings: segment_mappings[n_formants] = SegmentMapping() segment_mappings[n_formants].segments.append(k) outputs = {} for n_formants in segment_mappings: func = PraatSegmentFormantTrackFunction( praat_path=corpus_context.config.praat_path, max_frequency=5500, num_formants=n_formants, window_length=0.025, time_step=0.01, ) output = analyze_segments( segment_mappings[n_formants], func, stop_check=stop_check, multiprocessing=multiprocessing, ) # Analyze the phone outputs.update(output) formant_tracks = ["F1", "F2", "F3", "B1", "B2", "B3"] tracks = {} for k, v in outputs.items(): track = {} for time, formants in v.items(): track[time] = {f: formants[f] for f in formant_tracks} if not k["speaker"] in tracks: tracks[k["speaker"]] = {} tracks[k["speaker"]][k] = track if "formants" not in corpus_context.hierarchy.acoustics: corpus_context.hierarchy.add_acoustic_properties( corpus_context, "formants", [(x, float) for x in formant_tracks] ) for speaker, track_dict in tracks.items(): corpus_context.save_acoustic_tracks("formants", track_dict, speaker)
[docs] def generate_base_formants_function(corpus_context, gender=None, source="praat"): """ Parameters ---------- corpus_context : :class:`polyglot.corpus.context.CorpusContext` The CorpusContext object of the corpus. gender : str The gender to use for the function, if "M"(male) then the max frequency is 5000 Hz, otherwise 5500 source : str The source of the function, if it is "praat" then the formants will be calculated with Praat over each segment otherwise it will simply be tracks Returns ------- formant_function : Partial function object The function used to call Praat. """ max_freq = 5500 if gender == "M": max_freq = 5000 if source == "praat": if getattr(corpus_context.config, "praat_path", None) is None: raise (AcousticError("Could not find the Praat executable")) formant_function = PraatSegmentFormantTrackFunction( praat_path=corpus_context.config.praat_path, max_frequency=max_freq, num_formants=5, window_length=0.025, time_step=0.01, ) else: formant_function = FormantTrackFunction( max_frequency=max_freq, time_step=0.01, num_formants=5, window_length=0.025 ) return formant_function