removed legacy similarity

2017-10-27 18:56:37 +05:30
parent 938a9cf0a8
commit 307b4ce1c2
1 changed files with 81 additions and 81 deletions
--- a/generate_similar.py
+++ b/generate_similar.py
@@ -4,50 +4,87 @@ import re
 import numpy as np
 import random
-mapping = {
+# mapping = {
-    s.split()[0]: s.split()[1]
+#     s.split()[0]: s.split()[1]
-    for s in """
+#     for s in """
-AA AA
+# AA AA
-AE AE
+# AE AE
-AH UX
+# AH UX
-AO AO
+# AO AO
-AW AW
+# AW AW
-AY AY
+# AY AY
-B  b
+# B  b
-CH C
+# CH C
-D  d
+# D  d
-DH D
+# DH D
-EH EH
+# EH EH
-ER UXr
+# ER UXr
-EY EY
+# EY EY
-F  f
+# F  f
-G  g
+# G  g
-HH h
+# HH h
-IH IH
+# IH IH
-IY IY
+# IY IY
-JH J
+# JH J
-K  k
+# K  k
-L  l
+# L  l
-M  m
+# M  m
-N  n
+# N  n
-NG N
+# NG N
-OW OW
+# OW OW
-OY OY
+# OY OY
-P  p
+# P  p
-R  r
+# R  r
-S  s
+# S  s
-SH S
+# SH S
-T  t
+# T  t
-TH T
+# TH T
-UH UH
+# UH UH
-UW UW
+# UW UW
-V  v
+# V  v
-W  w
+# W  w
-Y  y
+# Y  y
-Z  z
+# Z  z
-ZH Z
+# ZH Z
-""".strip().split('\n')
+# """.strip().split('\n')
-}
+# }
 # sim_mat = pd.read_csv('./similarity.csv', header=0, index_col=0)
 #
 #
 # def convert_ph(ph):
 #     stress_level = re.search("(\w+)([0-9])", ph)
 #     if stress_level:
 #         return stress_level.group(2) + mapping[stress_level.group(1)]
 #     else:
 #         return mapping[ph]
 #
 #
 # def sim_mat_to_apple_table(smt):
 #     colnames = [convert_ph(ph) for ph in smt.index.tolist()]
 #     smt = pd.DataFrame(np.nan_to_num(smt.values))
 #     fsmt = (smt.T + smt)
 #     np.fill_diagonal(fsmt.values, 100.0)
 #     asmt = pd.DataFrame.copy(fsmt)
 #     asmt.columns = colnames
 #     asmt.index = colnames
 #     apple_sim_table = asmt.stack().reset_index()
 #     apple_sim_table.columns = ['q', 'r', 's']
 #     return apple_sim_table
 #
 #
 # apple_sim_table = sim_mat_to_apple_table(sim_mat)
 #
 #
 # def top_match(ph):
 #     selected = apple_sim_table[(apple_sim_table.q == ph)
 #                                & (apple_sim_table.s < 100) &
 #                                (apple_sim_table.s >= 70)]
 #     tm = ph
 #     if len(selected) > 0:
 #         tm = pd.DataFrame.sort_values(selected, 's', ascending=False).iloc[0].r
 #     return tm
 apple_phonemes = [
    '%', '@', 'AE', 'EY', 'AO', 'AX', 'IY', 'EH', 'IH', 'AY', 'IX', 'AA', 'UW',
@@ -55,43 +92,6 @@ apple_phonemes = [
    'l', 'm', 'n', 'N', 'p', 'r', 's', 'S', 't', 'T', 'v', 'w', 'y', 'z', 'Z'
 ]
 sim_mat = pd.read_csv('./similarity.csv', header=0, index_col=0)
 def convert_ph(ph):
    stress_level = re.search("(\w+)([0-9])", ph)
    if stress_level:
        return stress_level.group(2) + mapping[stress_level.group(1)]
    else:
        return mapping[ph]
 def sim_mat_to_apple_table(smt):
    colnames = [convert_ph(ph) for ph in smt.index.tolist()]
    smt = pd.DataFrame(np.nan_to_num(smt.values))
    fsmt = (smt.T + smt)
    np.fill_diagonal(fsmt.values, 100.0)
    asmt = pd.DataFrame.copy(fsmt)
    asmt.columns = colnames
    asmt.index = colnames
    apple_sim_table = asmt.stack().reset_index()
    apple_sim_table.columns = ['q', 'r', 's']
    return apple_sim_table
 apple_sim_table = sim_mat_to_apple_table(sim_mat)
 def top_match(ph):
    selected = apple_sim_table[(apple_sim_table.q == ph)
                               & (apple_sim_table.s < 100) &
                               (apple_sim_table.s >= 70)]
    tm = ph
    if len(selected) > 0:
        tm = pd.DataFrame.sort_values(selected, 's', ascending=False).iloc[0].r
    return tm
 class ApplePhoneme(object):
    """docstring for ApplePhoneme."""