Source code for polyglotdb.corpus.pause

from polyglotdb.corpus.importable import ImportContext


[docs] class PauseContext(ImportContext): """ Class that contains methods for dealing specifically with non-speech elements """ @property def has_pauses(self): """ Check whether corpus has encoded pauses Returns ------- bool True if pause is in the subsets available for words """ return "pause" in self.hierarchy.subset_tokens[self.word_name] def encode_pauses(self, pause_words, call_back=None, stop_check=None): """ Set words to be pauses, as opposed to speech. Parameters ---------- pause_words : str, list, tuple, or set Either a list of words that are pauses or a string containing a regular expression that specifies pause words call_back : callable Function to monitor progress stop_check : callable Function to check whether process should be terminated early """ self.reset_pauses() word = getattr(self, self.word_name) for s in self.speakers: discourses = self.get_discourses_of_speaker(s) for d in discourses: q = self.query_graph(word) q = q.filter(word.speaker.name == s) q = q.filter(word.discourse.name == d) if call_back is not None: q.call_back = call_back if stop_check is not None: q.stop_check = stop_check if isinstance(pause_words, (list, tuple, set)): q = q.filter(word.label.in_(pause_words)) elif isinstance(pause_words, str): q = q.filter(word.label.regex(pause_words)) else: raise NotImplementedError q.set_pause() if call_back is not None: call_back("Finishing up...") for s in self.speakers: discourses = self.get_discourses_of_speaker(s) for d in discourses: statement = f"""MATCH (prec:{self.cypher_safe_name}:{self.word_name}:speech)-[:spoken_by]->(s:Speaker:{self.cypher_safe_name}), (prec)-[:spoken_in]->(d:Discourse:{self.cypher_safe_name}) WHERE not (prec)-[:precedes]->() AND s.name = $speaker AND d.name = $discourse WITH prec MATCH p = (prec)-[:precedes_pause*]->(foll:{self.cypher_safe_name}:{self.word_name}:speech) WITH prec, foll, p WHERE NONE (x in nodes(p)[1..-1] where x:speech) MERGE (prec)-[:precedes]->(foll)""" self.execute_cypher(statement, speaker=s, discourse=d) statement = f"""MATCH (s:Speaker:{self.cypher_safe_name})<-[:spoken_by]-(w:{self.word_name}:{self.cypher_safe_name}:speech)-[:spoken_in]->(d:Discourse:{self.cypher_safe_name}) WHERE s.name = $speaker AND d.name = $discourse with d, max(w.end) as speech_end, min(w.begin) as speech_begin set d.speech_begin = speech_begin, d.speech_end = speech_end""" self.execute_cypher(statement, speaker=s, discourse=d) self.hierarchy.add_token_subsets(self, self.word_name, ["pause"]) self.hierarchy.add_discourse_properties( self, [("speech_begin", float), ("speech_end", float)] ) self.encode_hierarchy() def reset_pauses(self): """ Revert all words marked as pauses to regular words marked as speech """ for s in self.speakers: discourses = self.get_discourses_of_speaker(s) for d in discourses: statement = """MATCH (n:{corpus}:{word_type}:speech)-[r:precedes]->(m:{corpus}:{word_type}:speech), (m)-[:spoken_by]->(s:Speaker:{corpus}), (m)-[:spoken_in]->(d:Discourse:{corpus}) WHERE (n)-[:precedes_pause]->() AND s.name = $speaker AND d.name = $discourse DELETE r""".format( corpus=self.cypher_safe_name, word_type=self.word_name ) self.execute_cypher(statement, speaker=s, discourse=d) statement = """MATCH (n:{corpus}:{word_type})-[r:precedes_pause]->(m:{corpus}:{word_type}), (m)-[:spoken_by]->(s:Speaker:{corpus}), (m)-[:spoken_in]->(d:Discourse:{corpus}) WHERE s.name = $speaker AND d.name = $discourse MERGE (n)-[:precedes]->(m) DELETE r""".format( corpus=self.cypher_safe_name, word_type=self.word_name ) self.execute_cypher(statement, speaker=s, discourse=d) statement = """MATCH (n:pause:{corpus})-[:spoken_by]->(s:Speaker:{corpus}), (n)-[:spoken_in]->(d:Discourse:{corpus}) WHERE s.name = $speaker AND d.name = $discourse SET n :speech REMOVE n:pause""".format( corpus=self.cypher_safe_name ) self.execute_cypher(statement, speaker=s, discourse=d) try: self.hierarchy.subset_tokens[self.word_name].remove("pause") self.encode_hierarchy() except (KeyError, ValueError): pass