diff --git a/create_triplets.py b/create_triplets.py new file mode 100644 index 0000000..068f3a0 --- /dev/null +++ b/create_triplets.py @@ -0,0 +1,10 @@ +import pandas as pd + +audio_file = pd.read_csv('./outputs/audio.csv',names=['word','voice','rate','type','filename']) +word_goups = audio_file.groupby('word') +# audio +lst = [1, 2, 3, 1, 2, 3] +s = pd.Series([1, 2, 3, 10, 20, 30], lst) +df3 = pd.DataFrame({'X' : ['A', 'B', 'A', 'B'], 'Y' : [1, 4, 3, 2]}) +df3 +s.groupby(level=0).sum() diff --git a/generate_similar.py b/generate_similar.py index e077bb4..6c4145d 100644 --- a/generate_similar.py +++ b/generate_similar.py @@ -1,3 +1,48 @@ import pandas as pd +import re +mapping = {s.split()[0]: s.split()[1] for s in """ +AA AA +AE AE +AH UX +AO AO +AW AW +AY AY +B b +CH C +D d +DH D +EH EH +ER UXr +EY EY +F f +G g +HH h +IH IH +IY IY +JH J +K k +L l +M m +N n +NG N +OW OW +OY OY +P p +R r +S s +SH S +T t +TH T +UH UH +UW UW +V v +W w +Y y +Z z +ZH Z +""".strip().split('\n')} -pd.read_csv('./similarity.csv') +mapping +sim_mat = pd.read_csv('./similarity.csv',header=0,index_col=0) +[mapping[re.sub('[0-9]','',i)] for i in sim_mat.index.tolist()] +# sim_mat.loc