diff --git a/generate_similar.py b/generate_similar.py index 967d9df..f7ba80d 100644 --- a/generate_similar.py +++ b/generate_similar.py @@ -4,50 +4,87 @@ import re import numpy as np import random -mapping = { - s.split()[0]: s.split()[1] - for s in """ -AA AA -AE AE -AH UX -AO AO -AW AW -AY AY -B b -CH C -D d -DH D -EH EH -ER UXr -EY EY -F f -G g -HH h -IH IH -IY IY -JH J -K k -L l -M m -N n -NG N -OW OW -OY OY -P p -R r -S s -SH S -T t -TH T -UH UH -UW UW -V v -W w -Y y -Z z -ZH Z -""".strip().split('\n') -} +# mapping = { +# s.split()[0]: s.split()[1] +# for s in """ +# AA AA +# AE AE +# AH UX +# AO AO +# AW AW +# AY AY +# B b +# CH C +# D d +# DH D +# EH EH +# ER UXr +# EY EY +# F f +# G g +# HH h +# IH IH +# IY IY +# JH J +# K k +# L l +# M m +# N n +# NG N +# OW OW +# OY OY +# P p +# R r +# S s +# SH S +# T t +# TH T +# UH UH +# UW UW +# V v +# W w +# Y y +# Z z +# ZH Z +# """.strip().split('\n') +# } + +# sim_mat = pd.read_csv('./similarity.csv', header=0, index_col=0) +# +# +# def convert_ph(ph): +# stress_level = re.search("(\w+)([0-9])", ph) +# if stress_level: +# return stress_level.group(2) + mapping[stress_level.group(1)] +# else: +# return mapping[ph] +# +# +# def sim_mat_to_apple_table(smt): +# colnames = [convert_ph(ph) for ph in smt.index.tolist()] +# smt = pd.DataFrame(np.nan_to_num(smt.values)) +# fsmt = (smt.T + smt) +# np.fill_diagonal(fsmt.values, 100.0) +# asmt = pd.DataFrame.copy(fsmt) +# asmt.columns = colnames +# asmt.index = colnames +# apple_sim_table = asmt.stack().reset_index() +# apple_sim_table.columns = ['q', 'r', 's'] +# return apple_sim_table +# +# +# apple_sim_table = sim_mat_to_apple_table(sim_mat) +# +# +# def top_match(ph): +# selected = apple_sim_table[(apple_sim_table.q == ph) +# & (apple_sim_table.s < 100) & +# (apple_sim_table.s >= 70)] +# tm = ph +# if len(selected) > 0: +# tm = pd.DataFrame.sort_values(selected, 's', ascending=False).iloc[0].r +# return tm + apple_phonemes = [ '%', '@', 'AE', 'EY', 'AO', 'AX', 'IY', 'EH', 'IH', 'AY', 'IX', 'AA', 'UW', @@ -55,43 +92,6 @@ apple_phonemes = [ 'l', 'm', 'n', 'N', 'p', 'r', 's', 'S', 't', 'T', 'v', 'w', 'y', 'z', 'Z' ] -sim_mat = pd.read_csv('./similarity.csv', header=0, index_col=0) - - -def convert_ph(ph): - stress_level = re.search("(\w+)([0-9])", ph) - if stress_level: - return stress_level.group(2) + mapping[stress_level.group(1)] - else: - return mapping[ph] - - -def sim_mat_to_apple_table(smt): - colnames = [convert_ph(ph) for ph in smt.index.tolist()] - smt = pd.DataFrame(np.nan_to_num(smt.values)) - fsmt = (smt.T + smt) - np.fill_diagonal(fsmt.values, 100.0) - asmt = pd.DataFrame.copy(fsmt) - asmt.columns = colnames - asmt.index = colnames - apple_sim_table = asmt.stack().reset_index() - apple_sim_table.columns = ['q', 'r', 's'] - return apple_sim_table - - -apple_sim_table = sim_mat_to_apple_table(sim_mat) - - -def top_match(ph): - selected = apple_sim_table[(apple_sim_table.q == ph) - & (apple_sim_table.s < 100) & - (apple_sim_table.s >= 70)] - tm = ph - if len(selected) > 0: - tm = pd.DataFrame.sort_values(selected, 's', ascending=False).iloc[0].r - return tm - - class ApplePhoneme(object): """docstring for ApplePhoneme."""