removed legacy similarity

master
Malar Kannan 2017-10-27 18:56:37 +05:30
parent 938a9cf0a8
commit 307b4ce1c2
1 changed files with 81 additions and 81 deletions

View File

@ -4,50 +4,87 @@ import re
import numpy as np import numpy as np
import random import random
mapping = { # mapping = {
s.split()[0]: s.split()[1] # s.split()[0]: s.split()[1]
for s in """ # for s in """
AA AA # AA AA
AE AE # AE AE
AH UX # AH UX
AO AO # AO AO
AW AW # AW AW
AY AY # AY AY
B b # B b
CH C # CH C
D d # D d
DH D # DH D
EH EH # EH EH
ER UXr # ER UXr
EY EY # EY EY
F f # F f
G g # G g
HH h # HH h
IH IH # IH IH
IY IY # IY IY
JH J # JH J
K k # K k
L l # L l
M m # M m
N n # N n
NG N # NG N
OW OW # OW OW
OY OY # OY OY
P p # P p
R r # R r
S s # S s
SH S # SH S
T t # T t
TH T # TH T
UH UH # UH UH
UW UW # UW UW
V v # V v
W w # W w
Y y # Y y
Z z # Z z
ZH Z # ZH Z
""".strip().split('\n') # """.strip().split('\n')
} # }
# sim_mat = pd.read_csv('./similarity.csv', header=0, index_col=0)
#
#
# def convert_ph(ph):
# stress_level = re.search("(\w+)([0-9])", ph)
# if stress_level:
# return stress_level.group(2) + mapping[stress_level.group(1)]
# else:
# return mapping[ph]
#
#
# def sim_mat_to_apple_table(smt):
# colnames = [convert_ph(ph) for ph in smt.index.tolist()]
# smt = pd.DataFrame(np.nan_to_num(smt.values))
# fsmt = (smt.T + smt)
# np.fill_diagonal(fsmt.values, 100.0)
# asmt = pd.DataFrame.copy(fsmt)
# asmt.columns = colnames
# asmt.index = colnames
# apple_sim_table = asmt.stack().reset_index()
# apple_sim_table.columns = ['q', 'r', 's']
# return apple_sim_table
#
#
# apple_sim_table = sim_mat_to_apple_table(sim_mat)
#
#
# def top_match(ph):
# selected = apple_sim_table[(apple_sim_table.q == ph)
# & (apple_sim_table.s < 100) &
# (apple_sim_table.s >= 70)]
# tm = ph
# if len(selected) > 0:
# tm = pd.DataFrame.sort_values(selected, 's', ascending=False).iloc[0].r
# return tm
apple_phonemes = [ apple_phonemes = [
'%', '@', 'AE', 'EY', 'AO', 'AX', 'IY', 'EH', 'IH', 'AY', 'IX', 'AA', 'UW', '%', '@', 'AE', 'EY', 'AO', 'AX', 'IY', 'EH', 'IH', 'AY', 'IX', 'AA', 'UW',
@ -55,43 +92,6 @@ apple_phonemes = [
'l', 'm', 'n', 'N', 'p', 'r', 's', 'S', 't', 'T', 'v', 'w', 'y', 'z', 'Z' 'l', 'm', 'n', 'N', 'p', 'r', 's', 'S', 't', 'T', 'v', 'w', 'y', 'z', 'Z'
] ]
sim_mat = pd.read_csv('./similarity.csv', header=0, index_col=0)
def convert_ph(ph):
stress_level = re.search("(\w+)([0-9])", ph)
if stress_level:
return stress_level.group(2) + mapping[stress_level.group(1)]
else:
return mapping[ph]
def sim_mat_to_apple_table(smt):
colnames = [convert_ph(ph) for ph in smt.index.tolist()]
smt = pd.DataFrame(np.nan_to_num(smt.values))
fsmt = (smt.T + smt)
np.fill_diagonal(fsmt.values, 100.0)
asmt = pd.DataFrame.copy(fsmt)
asmt.columns = colnames
asmt.index = colnames
apple_sim_table = asmt.stack().reset_index()
apple_sim_table.columns = ['q', 'r', 's']
return apple_sim_table
apple_sim_table = sim_mat_to_apple_table(sim_mat)
def top_match(ph):
selected = apple_sim_table[(apple_sim_table.q == ph)
& (apple_sim_table.s < 100) &
(apple_sim_table.s >= 70)]
tm = ph
if len(selected) > 0:
tm = pd.DataFrame.sort_values(selected, 's', ascending=False).iloc[0].r
return tm
class ApplePhoneme(object): class ApplePhoneme(object):
"""docstring for ApplePhoneme.""" """docstring for ApplePhoneme."""