import pandas as pd import pronouncing import re import numpy as np import random # mapping = { # s.split()[0]: s.split()[1] # for s in """ # AA AA # AE AE # AH UX # AO AO # AW AW # AY AY # B b # CH C # D d # DH D # EH EH # ER UXr # EY EY # F f # G g # HH h # IH IH # IY IY # JH J # K k # L l # M m # N n # NG N # OW OW # OY OY # P p # R r # S s # SH S # T t # TH T # UH UH # UW UW # V v # W w # Y y # Z z # ZH Z # """.strip().split('\n') # } # sim_mat = pd.read_csv('./similarity.csv', header=0, index_col=0) # # # def convert_ph(ph): # stress_level = re.search("(\w+)([0-9])", ph) # if stress_level: # return stress_level.group(2) + mapping[stress_level.group(1)] # else: # return mapping[ph] # # # def sim_mat_to_apple_table(smt): # colnames = [convert_ph(ph) for ph in smt.index.tolist()] # smt = pd.DataFrame(np.nan_to_num(smt.values)) # fsmt = (smt.T + smt) # np.fill_diagonal(fsmt.values, 100.0) # asmt = pd.DataFrame.copy(fsmt) # asmt.columns = colnames # asmt.index = colnames # apple_sim_table = asmt.stack().reset_index() # apple_sim_table.columns = ['q', 'r', 's'] # return apple_sim_table # # # apple_sim_table = sim_mat_to_apple_table(sim_mat) # # # def top_match(ph): # selected = apple_sim_table[(apple_sim_table.q == ph) # & (apple_sim_table.s < 100) & # (apple_sim_table.s >= 70)] # tm = ph # if len(selected) > 0: # tm = pd.DataFrame.sort_values(selected, 's', ascending=False).iloc[0].r # return tm apple_phonemes = [ '%', '@', 'AE', 'EY', 'AO', 'AX', 'IY', 'EH', 'IH', 'AY', 'IX', 'AA', 'UW', 'UH', 'UX', 'OW', 'AW', 'OY', 'b', 'C', 'd', 'D', 'f', 'g', 'h', 'J', 'k', 'l', 'm', 'n', 'N', 'p', 'r', 's', 'S', 't', 'T', 'v', 'w', 'y', 'z', 'Z' ] class ApplePhoneme(object): """docstring for ApplePhoneme.""" def __init__(self, phone, stress, vowel=False): super(ApplePhoneme, self).__init__() self.phone = phone self.stress = stress self.vowel = vowel def __str__(self): return (str(self.stress) if (self.vowel and self.stress>0) else '') + self.phone def __repr__(self): return "'{}'".format(str(self)) def adjust_stress(self): self.stress = random.choice([i for i in range(3) if i != self.stress]) def parse_apple_phonemes(ph_str): for i in range(len(ph_str)): pref, rest = ph_str[:i + 1], ph_str[i + 1:] if pref in apple_phonemes: vowel = pref[0] in 'AEIOU' return [ApplePhoneme(pref, 0, vowel)] + parse_apple_phonemes(rest) elif pref[0].isdigit() and pref[1:] in apple_phonemes: return [ApplePhoneme(pref[1:], int(pref[0]) , True)] + parse_apple_phonemes(rest) elif not pref.isalnum(): return [ApplePhoneme(pref, -1, False)] + parse_apple_phonemes(rest) return [] def segmentable_phoneme(ph_str): return [p for p in parse_apple_phonemes(ph_str) if p.stress >=0] def similar_phoneme_word(ph_str): phons = parse_apple_phonemes(ph_str) vowels = [i for i in phons if i.vowel] random.choice(vowels).adjust_stress() return ''.join([str(i) for i in phons]) def similar_phoneme_phrase(ph_str): return ' '.join([similar_phoneme_word(w) for w in ph_str.split()]) def similar_word(word_str): similar = pronouncing.rhymes(word_str) return random.choice(similar) if len(similar) > 0 else word_str def similar_phrase(ph_str): return ' '.join([similar_word(w) for w in ph_str.split()])