import pandas as pd import pronouncing import re import numpy as np import random mapping = { s.split()[0]: s.split()[1] for s in """ AA AA AE AE AH UX AO AO AW AW AY AY B b CH C D d DH D EH EH ER UXr EY EY F f G g HH h IH IH IY IY JH J K k L l M m N n NG N OW OW OY OY P p R r S s SH S T t TH T UH UH UW UW V v W w Y y Z z ZH Z """.strip().split('\n') } apple_phonemes = [ '%', '@', 'AE', 'EY', 'AO', 'AX', 'IY', 'EH', 'IH', 'AY', 'IX', 'AA', 'UW', 'UH', 'UX', 'OW', 'AW', 'OY', 'b', 'C', 'd', 'D', 'f', 'g', 'h', 'J', 'k', 'l', 'm', 'n', 'N', 'p', 'r', 's', 'S', 't', 'T', 'v', 'w', 'y', 'z', 'Z' ] sim_mat = pd.read_csv('./similarity.csv', header=0, index_col=0) def convert_ph(ph): stress_level = re.search("(\w+)([0-9])", ph) if stress_level: return stress_level.group(2) + mapping[stress_level.group(1)] else: return mapping[ph] def sim_mat_to_apple_table(smt): colnames = [convert_ph(ph) for ph in smt.index.tolist()] smt = pd.DataFrame(np.nan_to_num(smt.values)) fsmt = (smt.T + smt) np.fill_diagonal(fsmt.values, 100.0) asmt = pd.DataFrame.copy(fsmt) asmt.columns = colnames asmt.index = colnames apple_sim_table = asmt.stack().reset_index() apple_sim_table.columns = ['q', 'r', 's'] return apple_sim_table apple_sim_table = sim_mat_to_apple_table(sim_mat) def top_match(ph): selected = apple_sim_table[(apple_sim_table.q == ph) & (apple_sim_table.s < 100) & (apple_sim_table.s >= 70)] tm = ph if len(selected) > 0: tm = pd.DataFrame.sort_values(selected, 's', ascending=False).iloc[0].r return tm class ApplePhoneme(object): """docstring for ApplePhoneme.""" def __init__(self, phone, stress, vowel=False): super(ApplePhoneme, self).__init__() self.phone = phone self.stress = stress self.vowel = vowel def __str__(self): return (str(self.stress) if (self.vowel and self.stress>0) else '') + self.phone def __repr__(self): return "'{}'".format(str(self)) def adjust_stress(self): self.stress = random.choice([i for i in range(3) if i != self.stress]) def parse_apple_phonemes(ph_str): for i in range(len(ph_str)): pref, rest = ph_str[:i + 1], ph_str[i + 1:] if pref in apple_phonemes: vowel = pref[0] in 'AEIOU' return [ApplePhoneme(pref, 0, vowel)] + parse_apple_phonemes(rest) elif pref[0].isdigit() and pref[1:] in apple_phonemes: return [ApplePhoneme(pref[1:], int(pref[0]) , True)] + parse_apple_phonemes(rest) elif not pref.isalnum(): return [ApplePhoneme(pref, 0, False)] + parse_apple_phonemes(rest) return [] def similar_phoneme(ph_str): phons = parse_apple_phonemes(ph_str) vowels = [i for i in phons if i.vowel] random.choice(vowels).adjust_stress() return ''.join([str(i) for i in phons]) def similar_word(word_str): similar = pronouncing.rhymes(word_str) return random.choice(similar) if len(similar) > 0 else word_str