2017-10-05 12:07:49 +00:00
|
|
|
import pandas as pd
|
2017-10-27 13:23:22 +00:00
|
|
|
import pronouncing
|
2017-10-06 05:31:16 +00:00
|
|
|
import re
|
2017-10-27 13:23:22 +00:00
|
|
|
import numpy as np
|
|
|
|
|
import random
|
|
|
|
|
|
|
|
|
|
mapping = {
|
|
|
|
|
s.split()[0]: s.split()[1]
|
|
|
|
|
for s in """
|
2017-10-06 05:31:16 +00:00
|
|
|
AA AA
|
|
|
|
|
AE AE
|
|
|
|
|
AH UX
|
|
|
|
|
AO AO
|
|
|
|
|
AW AW
|
|
|
|
|
AY AY
|
|
|
|
|
B b
|
|
|
|
|
CH C
|
|
|
|
|
D d
|
|
|
|
|
DH D
|
|
|
|
|
EH EH
|
|
|
|
|
ER UXr
|
|
|
|
|
EY EY
|
|
|
|
|
F f
|
|
|
|
|
G g
|
|
|
|
|
HH h
|
|
|
|
|
IH IH
|
|
|
|
|
IY IY
|
|
|
|
|
JH J
|
|
|
|
|
K k
|
|
|
|
|
L l
|
|
|
|
|
M m
|
|
|
|
|
N n
|
|
|
|
|
NG N
|
|
|
|
|
OW OW
|
|
|
|
|
OY OY
|
|
|
|
|
P p
|
|
|
|
|
R r
|
|
|
|
|
S s
|
|
|
|
|
SH S
|
|
|
|
|
T t
|
|
|
|
|
TH T
|
|
|
|
|
UH UH
|
|
|
|
|
UW UW
|
|
|
|
|
V v
|
|
|
|
|
W w
|
|
|
|
|
Y y
|
|
|
|
|
Z z
|
|
|
|
|
ZH Z
|
2017-10-27 13:23:22 +00:00
|
|
|
""".strip().split('\n')
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
apple_phonemes = [
|
|
|
|
|
'%', '@', 'AE', 'EY', 'AO', 'AX', 'IY', 'EH', 'IH', 'AY', 'IX', 'AA', 'UW',
|
|
|
|
|
'UH', 'UX', 'OW', 'AW', 'OY', 'b', 'C', 'd', 'D', 'f', 'g', 'h', 'J', 'k',
|
|
|
|
|
'l', 'm', 'n', 'N', 'p', 'r', 's', 'S', 't', 'T', 'v', 'w', 'y', 'z', 'Z'
|
|
|
|
|
]
|
|
|
|
|
|
|
|
|
|
sim_mat = pd.read_csv('./similarity.csv', header=0, index_col=0)
|
2017-10-05 12:07:49 +00:00
|
|
|
|
2017-10-13 11:10:57 +00:00
|
|
|
|
|
|
|
|
def convert_ph(ph):
|
2017-10-27 13:23:22 +00:00
|
|
|
stress_level = re.search("(\w+)([0-9])", ph)
|
2017-10-13 11:10:57 +00:00
|
|
|
if stress_level:
|
2017-10-27 13:23:22 +00:00
|
|
|
return stress_level.group(2) + mapping[stress_level.group(1)]
|
2017-10-13 11:10:57 +00:00
|
|
|
else:
|
|
|
|
|
return mapping[ph]
|
|
|
|
|
|
2017-10-27 13:23:22 +00:00
|
|
|
|
2017-10-13 11:10:57 +00:00
|
|
|
def sim_mat_to_apple_table(smt):
|
|
|
|
|
colnames = [convert_ph(ph) for ph in smt.index.tolist()]
|
|
|
|
|
smt = pd.DataFrame(np.nan_to_num(smt.values))
|
2017-10-27 13:23:22 +00:00
|
|
|
fsmt = (smt.T + smt)
|
|
|
|
|
np.fill_diagonal(fsmt.values, 100.0)
|
2017-10-13 11:10:57 +00:00
|
|
|
asmt = pd.DataFrame.copy(fsmt)
|
|
|
|
|
asmt.columns = colnames
|
|
|
|
|
asmt.index = colnames
|
2017-10-27 13:23:22 +00:00
|
|
|
apple_sim_table = asmt.stack().reset_index()
|
|
|
|
|
apple_sim_table.columns = ['q', 'r', 's']
|
|
|
|
|
return apple_sim_table
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
apple_sim_table = sim_mat_to_apple_table(sim_mat)
|
2017-10-13 11:10:57 +00:00
|
|
|
|
|
|
|
|
|
|
|
|
|
def top_match(ph):
|
2017-10-27 13:23:22 +00:00
|
|
|
selected = apple_sim_table[(apple_sim_table.q == ph)
|
|
|
|
|
& (apple_sim_table.s < 100) &
|
|
|
|
|
(apple_sim_table.s >= 70)]
|
2017-10-13 11:10:57 +00:00
|
|
|
tm = ph
|
|
|
|
|
if len(selected) > 0:
|
2017-10-27 13:23:22 +00:00
|
|
|
tm = pd.DataFrame.sort_values(selected, 's', ascending=False).iloc[0].r
|
2017-10-13 11:10:57 +00:00
|
|
|
return tm
|
|
|
|
|
|
2017-10-27 13:23:22 +00:00
|
|
|
|
|
|
|
|
class ApplePhoneme(object):
|
|
|
|
|
"""docstring for ApplePhoneme."""
|
|
|
|
|
|
|
|
|
|
def __init__(self, phone, stress, vowel=False):
|
|
|
|
|
super(ApplePhoneme, self).__init__()
|
|
|
|
|
self.phone = phone
|
|
|
|
|
self.stress = stress
|
|
|
|
|
self.vowel = vowel
|
|
|
|
|
|
|
|
|
|
def __str__(self):
|
|
|
|
|
return (str(self.stress) if (self.vowel and self.stress>0) else '') + self.phone
|
|
|
|
|
|
|
|
|
|
def __repr__(self):
|
|
|
|
|
return "'{}'".format(str(self))
|
|
|
|
|
|
|
|
|
|
def adjust_stress(self):
|
|
|
|
|
self.stress = random.choice([i for i in range(3) if i != self.stress])
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def parse_apple_phonemes(ph_str):
|
|
|
|
|
for i in range(len(ph_str)):
|
|
|
|
|
pref, rest = ph_str[:i + 1], ph_str[i + 1:]
|
|
|
|
|
if pref in apple_phonemes:
|
|
|
|
|
vowel = pref[0] in 'AEIOU'
|
|
|
|
|
return [ApplePhoneme(pref, 0, vowel)] + parse_apple_phonemes(rest)
|
|
|
|
|
elif pref[0].isdigit() and pref[1:] in apple_phonemes:
|
|
|
|
|
return [ApplePhoneme(pref[1:], int(pref[0]) , True)] + parse_apple_phonemes(rest)
|
|
|
|
|
elif not pref.isalnum():
|
|
|
|
|
return [ApplePhoneme(pref, 0, False)] + parse_apple_phonemes(rest)
|
|
|
|
|
return []
|
|
|
|
|
|
|
|
|
|
|
2017-10-13 11:10:57 +00:00
|
|
|
def similar_phoneme(ph_str):
|
2017-10-27 13:23:22 +00:00
|
|
|
phons = parse_apple_phonemes(ph_str)
|
|
|
|
|
vowels = [i for i in phons if i.vowel]
|
|
|
|
|
random.choice(vowels).adjust_stress()
|
|
|
|
|
return ''.join([str(i) for i in phons])
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def similar_word(word_str):
|
|
|
|
|
similar = pronouncing.rhymes(word_str)
|
|
|
|
|
return random.choice(similar) if len(similar) > 0 else word_str
|