speech-scoring/generate_similar.py

49 lines
467 B
Python

import pandas as pd
import re
mapping = {s.split()[0]: s.split()[1] for s in """
AA AA
AE AE
AH UX
AO AO
AW AW
AY AY
B b
CH C
D d
DH D
EH EH
ER UXr
EY EY
F f
G g
HH h
IH IH
IY IY
JH J
K k
L l
M m
N n
NG N
OW OW
OY OY
P p
R r
S s
SH S
T t
TH T
UH UH
UW UW
V v
W w
Y y
Z z
ZH Z
""".strip().split('\n')}
mapping
sim_mat = pd.read_csv('./similarity.csv',header=0,index_col=0)
[mapping[re.sub('[0-9]','',i)] for i in sim_mat.index.tolist()]
# sim_mat.loc