From 988f66c2c21e8cb465b3caeaf197a72c77e54e37 Mon Sep 17 00:00:00 2001 From: Malar Kannan Date: Mon, 13 Nov 2017 17:33:37 +0530 Subject: [PATCH] avoiding same voice similar variants --- pandas_parallel.py | 25 -------------- requirements-linux.txt | 1 - speech_data.py | 21 +++++++++--- speech_siamese.py | 10 ++---- speech_utils.py | 74 ++++++++++++++++++++++++++++++++++++++++++ test_siamese.py | 61 ++++++++++++++++++++++++++++++---- 6 files changed, 148 insertions(+), 44 deletions(-) delete mode 100644 pandas_parallel.py create mode 100644 speech_utils.py diff --git a/pandas_parallel.py b/pandas_parallel.py deleted file mode 100644 index 245da38..0000000 --- a/pandas_parallel.py +++ /dev/null @@ -1,25 +0,0 @@ -import multiprocessing -import pandas as pd -import numpy as np - - - -def _apply_df(args): - df, func, num, kwargs = args - return num, df.apply(func, **kwargs) - -def apply_by_multiprocessing(df,func,**kwargs): - cores = multiprocessing.cpu_count() - workers=kwargs.pop('workers') if 'workers' in kwargs else cores - pool = multiprocessing.Pool(processes=workers) - result = pool.map(_apply_df, [(d, func, i, kwargs) for i,d in enumerate(np.array_split(df, workers))]) - pool.close() - result=sorted(result,key=lambda x:x[0]) - return pd.concat([i[1] for i in result]) - -def square(x): - return x**x - -if __name__ == '__main__': - df = pd.DataFrame({'a':range(10), 'b':range(10)}) - apply_by_multiprocessing(df, square, axis=1, workers=4) diff --git a/requirements-linux.txt b/requirements-linux.txt index c533525..dba434a 100644 --- a/requirements-linux.txt +++ b/requirements-linux.txt @@ -40,7 +40,6 @@ parso==0.1.0 partd==0.3.8 pexpect==4.2.1 pickleshare==0.7.4 -pkg-resources==0.0.0 progressbar2==3.34.3 prompt-toolkit==1.0.15 protobuf==3.4.0 diff --git a/speech_data.py b/speech_data.py index 4a85b76..d11c4ee 100644 --- a/speech_data.py +++ b/speech_data.py @@ -1,5 +1,6 @@ import pandas as pd -from pandas_parallel import apply_by_multiprocessing +from speech_utils import apply_by_multiprocessing +from speech_utils import threadsafe_iter # import dask as dd # import dask.dataframe as ddf import tensorflow as tf @@ -36,7 +37,7 @@ def _int64_feature(value): def _bytes_feature(value): return tf.train.Feature(bytes_list=tf.train.BytesList(value=value)) -def create_spectrogram_tfrecords(audio_group='audio',sample_count=0): +def create_spectrogram_tfrecords(audio_group='audio',sample_count=0,train_test_ratio=0.1): ''' http://warmspringwinds.github.io/tensorflow/tf-slim/2016/12/21/tfrecords-guide/ http://www.machinelearninguru.com/deep_learning/tensorflow/basics/tfrecord/tfrecord.html @@ -60,6 +61,13 @@ def create_spectrogram_tfrecords(audio_group='audio',sample_count=0): for (output,group) in groups: group_prog = tqdm(group,desc='Writing Spectrogram') for sample1,sample2 in group_prog: + same = sample1['variant'] == sample2['variant'] + phon_same = sample1['phonemes'] == sample2['phonemes'] + voice_diff = sample1['voice'] != sample2['voice'] + if not same and phon_same: + continue + if same and not voice_diff: + continue group_prog.set_postfix(output=output ,var1=sample1['variant'] ,var2=sample2['variant']) @@ -101,7 +109,7 @@ def create_spectrogram_tfrecords(audio_group='audio',sample_count=0): word_groups = [i for i in audio_samples.groupby('word')] wg_sampled = reservoir_sample(word_groups,sample_count) if sample_count > 0 else word_groups - tr_audio_samples,te_audio_samples = train_test_split(wg_sampled,test_size=0.1) + tr_audio_samples,te_audio_samples = train_test_split(wg_sampled,test_size=train_test_ratio) write_samples(tr_audio_samples,'train') write_samples(te_audio_samples,'test') const_file = os.path.join('./outputs',audio_group+'.constants') @@ -125,13 +133,16 @@ def reservoir_sample(iterable, k): sample[j] = item # replace item with gradually decreasing probability return sample -def read_siamese_tfrecords_generator(audio_group='audio',batch_size=32,test_size=100): + +def read_siamese_tfrecords_generator(audio_group='audio',batch_size=32,test_size=0): records_file = os.path.join('./outputs',audio_group+'.train.tfrecords') input_pairs = [] output_class = [] const_file = os.path.join('./outputs',audio_group+'.constants') (n_spec,n_features,n_records) = pickle.load(open(const_file,'rb')) print('reading tfrecords({}-train)...'.format(audio_group)) + + # @threadsafe_iter def record_generator(): input_data = [] output_data = [] @@ -226,7 +237,7 @@ if __name__ == '__main__': # pickle_constants('story_words') # create_spectrogram_tfrecords('audio',sample_count=100) # create_spectrogram_tfrecords('story_all',sample_count=25) - create_spectrogram_tfrecords('story_words',sample_count=10) + create_spectrogram_tfrecords('story_words',sample_count=10,train_test_ratio=0.2) # create_spectrogram_tfrecords('audio',sample_count=50) # read_siamese_tfrecords_generator('audio') # padd_zeros_siamese_tfrecords('audio') diff --git a/speech_siamese.py b/speech_siamese.py index 307136a..c5f61b6 100644 --- a/speech_siamese.py +++ b/speech_siamese.py @@ -12,11 +12,7 @@ from keras.utils import to_categorical from keras.optimizers import RMSprop from keras.callbacks import TensorBoard, ModelCheckpoint from keras import backend as K - -def create_dir(direc): - import os - if not os.path.exists(direc): - os.makedirs(direc) +from speech_utils import create_dir # def euclidean_distance(vects): # x, y = vects @@ -95,7 +91,7 @@ def train_siamese(audio_group = 'audio'): create_dir(model_dir) log_dir = './logs/'+audio_group create_dir(log_dir) - tr_gen_fn,te_pairs,te_y,n_step,n_features,n_records = read_siamese_tfrecords_generator(audio_group,batch_size,256) + tr_gen_fn,te_pairs,te_y,n_step,n_features,n_records = read_siamese_tfrecords_generator(audio_group,batch_size=batch_size) tr_gen = tr_gen_fn() # tr_y = to_categorical(tr_y_e, num_classes=2) # te_y = to_categorical(te_y_e, num_classes=2) @@ -138,7 +134,7 @@ def train_siamese(audio_group = 'audio'): ,epochs=1000 ,steps_per_epoch=n_records//batch_size ,validation_data=([te_pairs[:, 0], te_pairs[:, 1]], te_y) - ,use_multiprocessing=True + ,use_multiprocessing=True, workers=1 ,callbacks=[tb_cb, cp_cb]) model.save(model_dir+'/siamese_speech_model-final.h5') # compute final accuracy on training and test sets diff --git a/speech_utils.py b/speech_utils.py new file mode 100644 index 0000000..2581f84 --- /dev/null +++ b/speech_utils.py @@ -0,0 +1,74 @@ +import os +import threading + +import multiprocessing +import pandas as pd +import numpy as np + + + +def _apply_df(args): + df, func, num, kwargs = args + return num, df.apply(func, **kwargs) + +def apply_by_multiprocessing(df,func,**kwargs): + cores = multiprocessing.cpu_count() + workers=kwargs.pop('workers') if 'workers' in kwargs else cores + pool = multiprocessing.Pool(processes=workers) + result = pool.map(_apply_df, [(d, func, i, kwargs) for i,d in enumerate(np.array_split(df, workers))]) + pool.close() + result=sorted(result,key=lambda x:x[0]) + return pd.concat([i[1] for i in result]) + +def square(x): + return x**x + +if __name__ == '__main__': + df = pd.DataFrame({'a':range(10), 'b':range(10)}) + apply_by_multiprocessing(df, square, axis=1, workers=4) + + +def rm_rf(d): + for path in (os.path.join(d,f) for f in os.listdir(d)): + if os.path.isdir(path): + rm_rf(path) + else: + os.unlink(path) + os.rmdir(d) + +def create_dir(direc): + if not os.path.exists(direc): + os.makedirs(direc) + else: + rm_rf(direc) + create_dir(direc) + + +#################### Now make the data generator threadsafe #################### + +class threadsafe_iter: + """Takes an iterator/generator and makes it thread-safe by + serializing call to the `next` method of given iterator/generator. + """ + def __init__(self, it): + self.it = it + self.lock = threading.Lock() + + def __iter__(self): + return self + + def __next__(self): # Py3 + with self.lock: + return next(self.it) + + def next(self): # Py2 + with self.lock: + return self.it.next() + + +def threadsafe_generator(f): + """A decorator that takes a generator function and makes it thread-safe. + """ + def g(*a, **kw): + return threadsafe_iter(f(*a, **kw)) + return g diff --git a/test_siamese.py b/test_siamese.py index 35980d6..c228cec 100644 --- a/test_siamese.py +++ b/test_siamese.py @@ -1,13 +1,14 @@ from speech_siamese import siamese_model from record_mic_speech import record_spectrogram -from importlib import reload +# from importlib import reload # import speech_data # reload(speech_data) -from speech_data import create_test_pair,get_word_pairs_data,speech_data import numpy as np - -model = siamese_model((15, 1654)) -model.load_weights('./models/siamese_speech_model-final.h5') +import os +import pickle +import tensorflow as tf +import csv +from speech_data import padd_zeros def predict_recording_with(m,sample_size=15): spec1 = record_spectrogram(n_sec=1.4) @@ -24,7 +25,55 @@ def test_with(audio_group): print(np.argmax(model.predict([X[:, 0], X[:, 1]]),axis=1)) print(Y.astype(np.int8)) -test_with('rand_edu') +def evaluate_siamese(audio_group='audio',model_file = 'siamese_speech_model-46-epoch-0.29-acc.h5'): + records_file = os.path.join('./outputs',audio_group+'.train.tfrecords') + const_file = os.path.join('./outputs',audio_group+'.constants') + model_weights_path =os.path.join('./models/story_words/',model_file) + (n_spec,n_features,n_records) = pickle.load(open(const_file,'rb')) + print('evaluating tfrecords({}-train)...'.format(audio_group)) + + model = siamese_model((n_spec, n_features)) + model.load_weights(model_weights_path) + record_iterator = tf.python_io.tf_record_iterator(path=records_file) + #tqdm(enumerate(record_iterator),total=n_records) + with open('./outputs/' + audio_group + '.results.csv','w') as result_csv: + result_csv_w = csv.writer(result_csv, quoting=csv.QUOTE_MINIMAL) + for (i,string_record) in enumerate(record_iterator): + # string_record = next(record_iterator) + example = tf.train.Example() + example.ParseFromString(string_record) + spec_n1 = example.features.feature['spec_n1'].int64_list.value[0] + spec_n2 = example.features.feature['spec_n2'].int64_list.value[0] + spec_w1 = example.features.feature['spec_w1'].int64_list.value[0] + spec_w2 = example.features.feature['spec_w2'].int64_list.value[0] + spec1 = np.array(example.features.feature['spec1'].float_list.value).reshape(spec_n1,spec_w1) + spec2 = np.array(example.features.feature['spec2'].float_list.value).reshape(spec_n2,spec_w2) + p_spec1,p_spec2 = padd_zeros(spec1,n_spec),padd_zeros(spec2,n_spec) + input_arr = np.asarray([[p_spec1,p_spec2]]) + output_arr = np.asarray([example.features.feature['output'].int64_list.value]) + y_pred = model.predict([input_arr[:, 0], input_arr[:, 1]]) + predicted = np.asarray(y_pred[0]>0.5).astype(output_arr.dtype) + expected = output_arr[0] + if np.all(predicted == expected): + continue + word = example.features.feature['word'].bytes_list.value[0].decode() + phoneme1 = example.features.feature['phoneme1'].bytes_list.value[0].decode() + phoneme2 = example.features.feature['phoneme2'].bytes_list.value[0].decode() + voice1 = example.features.feature['voice1'].bytes_list.value[0].decode() + voice2 = example.features.feature['voice2'].bytes_list.value[0].decode() + language = example.features.feature['language'].bytes_list.value[0].decode() + rate1 = example.features.feature['rate1'].int64_list.value[0] + rate2 = example.features.feature['rate2'].int64_list.value[0] + variant1 = example.features.feature['variant1'].bytes_list.value[0].decode() + variant2 = example.features.feature['variant2'].bytes_list.value[0].decode() + file1 = example.features.feature['file1'].bytes_list.value[0].decode() + file2 = example.features.feature['file2'].bytes_list.value[0].decode() + print(phoneme1,phoneme2,voice1,voice2,rate1,rate2,variant1,variant2,file1,file2) + result_csv_w.writerow([phoneme1,phoneme2,voice1,voice2,rate1,rate2,variant1,variant2,file1,file2]) + + +evaluate_siamese('story_words',model_file='siamese_speech_model-92-epoch-0.20-acc.h5') +# test_with('rand_edu') # sunflower_data,sunflower_result = get_word_pairs_data('sweater',15) # print(np.argmax(model.predict([sunflower_data[:, 0], sunflower_data[:, 1]]),axis=1)) # print(sunflower_result)