From b3a6aa2f6a8b483a0580782c0e19ece2d56b01bc Mon Sep 17 00:00:00 2001 From: Malar Kannan Date: Wed, 8 Nov 2017 11:08:19 +0530 Subject: [PATCH] clean-up --- speech_data.py | 40 ++-------------------------------------- speech_siamese.py | 2 +- 2 files changed, 3 insertions(+), 39 deletions(-) diff --git a/speech_data.py b/speech_data.py index 2860961..a9a87b4 100644 --- a/speech_data.py +++ b/speech_data.py @@ -11,12 +11,8 @@ import os import random import csv import gc -# import progressbar from tqdm import tqdm -# def prog_bar(title): -# widgets = [title, progressbar.Counter(), ' [', progressbar.Bar(), '] - ', progressbar.ETA()] -# return progressbar.ProgressBar(widgets=widgets) def siamese_pairs(rightGroup, wrongGroup): group1 = [r for (i, r) in rightGroup.iterrows()] @@ -25,10 +21,7 @@ def siamese_pairs(rightGroup, wrongGroup): rightRightPairs = [i for i in itertools.combinations(group1, 2)] random.shuffle(rightWrongPairs) random.shuffle(rightRightPairs) - # return (random.sample(same,10), random.sample(diff,10)) - # return rightRightPairs[:10],rightWrongPairs[:10] return rightRightPairs[:32],rightWrongPairs[:32] - # return rightRightPairs,rightWrongPairs def create_spectrogram_tfrecords(audio_group='audio'): ''' @@ -38,14 +31,10 @@ def create_spectrogram_tfrecords(audio_group='audio'): audio_samples = pd.read_csv( './outputs/' + audio_group + '.csv' , names=['word','phonemes', 'voice', 'language', 'rate', 'variant', 'file'] , quoting=csv.QUOTE_NONE) - # audio_samples = audio_samples.loc[audio_samples['word'] == - # 'sunflowers'].reset_index(drop=True) audio_samples['file_path'] = audio_samples.loc[:, 'file'].apply(lambda x: 'outputs/' + audio_group + '/' + x) audio_samples['file_exists'] = apply_by_multiprocessing(audio_samples['file_path'], os.path.exists) audio_samples = audio_samples[audio_samples['file_exists'] == True].reset_index() - # audio_samples['rate_int'] = apply_by_multiprocessing(audio_samples['rate'], str.isdigit) - # audio_samples = audio_samples[audio_samples['rate_int'] == True].reset_index().drop(['level_0'],axis=1) - # audio_samples['rate'] = audio_samples['rate'].astype(int) + def _float_feature(value): return tf.train.Feature(float_list=tf.train.FloatList(value=value)) @@ -99,6 +88,7 @@ def create_spectrogram_tfrecords(audio_group='audio'): } )) writer.write(example.SerializeToString()) + group_prog.close() prog.close() writer.close() @@ -120,7 +110,6 @@ def find_max_n(trf): def read_siamese_tfrecords(audio_group='audio'): records_file = os.path.join('./outputs',audio_group+'.tfrecords') record_iterator = tf.python_io.tf_record_iterator(path=records_file) - # input1,input2 = [],[] input_pairs = [] output_class = [] max_n = find_max_n(records_file) @@ -128,8 +117,6 @@ def read_siamese_tfrecords(audio_group='audio'): for string_record in record_iterator: example = tf.train.Example() example.ParseFromString(string_record) - # word = example.features.feature['word'].bytes_list.value[0] - # input_words.append(word) example.features.feature['spec2'].float_list.value[0] spec_n1 = example.features.feature['spec_n1'].int64_list.value[0] spec_n2 = example.features.feature['spec_n2'].int64_list.value[0] @@ -138,20 +125,12 @@ def read_siamese_tfrecords(audio_group='audio'): spec1 = np.array(example.features.feature['spec1'].float_list.value).reshape(spec_n1,spec_w1) spec2 = np.array(example.features.feature['spec2'].float_list.value).reshape(spec_n2,spec_w2) p_spec1,p_spec2 = padd_zeros(spec1,max_n),padd_zeros(spec2,max_n) - # input1.append(spec1) - # input2.append(spec2) input_pairs.append(np.asarray([p_spec1,p_spec2])) - # input_pairs.append([spec1,spec2]) output = example.features.feature['output'].int64_list.value output_class.append(np.asarray(output)) n_features = spec_w1 - # if len(input_pairs) > 50: - # break input_data,output_data = np.asarray(input_pairs),np.asarray(output_class) - import pdb; pdb.set_trace() - # tr_x1,te_x1,tr_x2,te_x2,tr_y,te_y = train_test_split(input1,input2,output_class) tr_pairs,te_pairs,tr_y,te_y = train_test_split(input_data,output_data) - # return (tr_x1,te_x1,tr_x2,te_x2,tr_y,te_y) n_step,n_features = int(max_n),int(spec_w1) return (tr_pairs,te_pairs,tr_y,te_y,n_step,n_features) @@ -160,8 +139,6 @@ def audio_samples_word_count(audio_group='audio'): audio_samples = pd.read_csv( './outputs/' + audio_group + '.csv' , names=['word','phonemes', 'voice', 'language', 'rate', 'variant', 'file'] , quoting=csv.QUOTE_NONE) - # audio_samples = audio_samples.loc[audio_samples['word'] == - # 'sunflowers'].reset_index(drop=True) audio_samples['file_path'] = audio_samples.loc[:, 'file'].apply(lambda x: 'outputs/' + audio_group + '/' + x) audio_samples['file_exists'] = apply_by_multiprocessing(audio_samples['file_path'], os.path.exists) audio_samples = audio_samples[audio_samples['file_exists'] == True].reset_index() @@ -170,19 +147,6 @@ def audio_samples_word_count(audio_group='audio'): def fix_csv(audio_group='audio'): audio_csv_lines = open('./outputs/' + audio_group + '.csv','r').readlines() audio_csv_data = [i.strip().split(',') for i in audio_csv_lines] - # audio_samples = pd.read_csv( './outputs/story_words.csv' - # , names=['word','phonemes', 'voice', 'language', 'rate', 'variant', 'file'] - # , quoting=csv.QUOTE_NONE) - # voice_set = set(audio_samples['voice'].unique().tolist()) - # to_be_fixed = [i for i in audio_csv_data if len(i) > 7] - # def unite_words(entries): - # entries = to_be_fixed[0] - # word_entries = next(((entries[:i],entries[i:]) for (i,e) in enumerate(entries) if e in voice_set),'') - # word_entries[1] - # return - # to_be_fixed[0] - # entries = [unite_words for e in to_be_fixed] - # [i for i in entries if len(i) % 2 != 0] proper_rows = [i for i in audio_csv_data if len(i) == 7] with open('./outputs/' + audio_group + '-new.csv','w') as fixed_csv: fixed_csv_w = csv.writer(fixed_csv, quoting=csv.QUOTE_MINIMAL) diff --git a/speech_siamese.py b/speech_siamese.py index 353fa4b..9ed9591 100644 --- a/speech_siamese.py +++ b/speech_siamese.py @@ -82,7 +82,7 @@ def siamese_model(input_dim): def train_siamese(): # the data, shuffled and split between train and test sets # tr_pairs, te_pairs, tr_y_e, te_y_e = speech_model_data() - (tr_pairs,te_pairs,tr_y,te_y,n_step,n_features) = read_siamese_tfrecords('story_words') + (tr_pairs,te_pairs,tr_y,te_y,n_step,n_features) = read_siamese_tfrecords('story_words_test') # tr_y = to_categorical(tr_y_e, num_classes=2) # te_y = to_categorical(te_y_e, num_classes=2) input_dim = (n_step, n_features)