avoiding same voice similar variants
parent
d978272bdb
commit
988f66c2c2
|
|
@ -1,25 +0,0 @@
|
||||||
import multiprocessing
|
|
||||||
import pandas as pd
|
|
||||||
import numpy as np
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
def _apply_df(args):
|
|
||||||
df, func, num, kwargs = args
|
|
||||||
return num, df.apply(func, **kwargs)
|
|
||||||
|
|
||||||
def apply_by_multiprocessing(df,func,**kwargs):
|
|
||||||
cores = multiprocessing.cpu_count()
|
|
||||||
workers=kwargs.pop('workers') if 'workers' in kwargs else cores
|
|
||||||
pool = multiprocessing.Pool(processes=workers)
|
|
||||||
result = pool.map(_apply_df, [(d, func, i, kwargs) for i,d in enumerate(np.array_split(df, workers))])
|
|
||||||
pool.close()
|
|
||||||
result=sorted(result,key=lambda x:x[0])
|
|
||||||
return pd.concat([i[1] for i in result])
|
|
||||||
|
|
||||||
def square(x):
|
|
||||||
return x**x
|
|
||||||
|
|
||||||
if __name__ == '__main__':
|
|
||||||
df = pd.DataFrame({'a':range(10), 'b':range(10)})
|
|
||||||
apply_by_multiprocessing(df, square, axis=1, workers=4)
|
|
||||||
|
|
@ -40,7 +40,6 @@ parso==0.1.0
|
||||||
partd==0.3.8
|
partd==0.3.8
|
||||||
pexpect==4.2.1
|
pexpect==4.2.1
|
||||||
pickleshare==0.7.4
|
pickleshare==0.7.4
|
||||||
pkg-resources==0.0.0
|
|
||||||
progressbar2==3.34.3
|
progressbar2==3.34.3
|
||||||
prompt-toolkit==1.0.15
|
prompt-toolkit==1.0.15
|
||||||
protobuf==3.4.0
|
protobuf==3.4.0
|
||||||
|
|
|
||||||
|
|
@ -1,5 +1,6 @@
|
||||||
import pandas as pd
|
import pandas as pd
|
||||||
from pandas_parallel import apply_by_multiprocessing
|
from speech_utils import apply_by_multiprocessing
|
||||||
|
from speech_utils import threadsafe_iter
|
||||||
# import dask as dd
|
# import dask as dd
|
||||||
# import dask.dataframe as ddf
|
# import dask.dataframe as ddf
|
||||||
import tensorflow as tf
|
import tensorflow as tf
|
||||||
|
|
@ -36,7 +37,7 @@ def _int64_feature(value):
|
||||||
def _bytes_feature(value):
|
def _bytes_feature(value):
|
||||||
return tf.train.Feature(bytes_list=tf.train.BytesList(value=value))
|
return tf.train.Feature(bytes_list=tf.train.BytesList(value=value))
|
||||||
|
|
||||||
def create_spectrogram_tfrecords(audio_group='audio',sample_count=0):
|
def create_spectrogram_tfrecords(audio_group='audio',sample_count=0,train_test_ratio=0.1):
|
||||||
'''
|
'''
|
||||||
http://warmspringwinds.github.io/tensorflow/tf-slim/2016/12/21/tfrecords-guide/
|
http://warmspringwinds.github.io/tensorflow/tf-slim/2016/12/21/tfrecords-guide/
|
||||||
http://www.machinelearninguru.com/deep_learning/tensorflow/basics/tfrecord/tfrecord.html
|
http://www.machinelearninguru.com/deep_learning/tensorflow/basics/tfrecord/tfrecord.html
|
||||||
|
|
@ -60,6 +61,13 @@ def create_spectrogram_tfrecords(audio_group='audio',sample_count=0):
|
||||||
for (output,group) in groups:
|
for (output,group) in groups:
|
||||||
group_prog = tqdm(group,desc='Writing Spectrogram')
|
group_prog = tqdm(group,desc='Writing Spectrogram')
|
||||||
for sample1,sample2 in group_prog:
|
for sample1,sample2 in group_prog:
|
||||||
|
same = sample1['variant'] == sample2['variant']
|
||||||
|
phon_same = sample1['phonemes'] == sample2['phonemes']
|
||||||
|
voice_diff = sample1['voice'] != sample2['voice']
|
||||||
|
if not same and phon_same:
|
||||||
|
continue
|
||||||
|
if same and not voice_diff:
|
||||||
|
continue
|
||||||
group_prog.set_postfix(output=output
|
group_prog.set_postfix(output=output
|
||||||
,var1=sample1['variant']
|
,var1=sample1['variant']
|
||||||
,var2=sample2['variant'])
|
,var2=sample2['variant'])
|
||||||
|
|
@ -101,7 +109,7 @@ def create_spectrogram_tfrecords(audio_group='audio',sample_count=0):
|
||||||
|
|
||||||
word_groups = [i for i in audio_samples.groupby('word')]
|
word_groups = [i for i in audio_samples.groupby('word')]
|
||||||
wg_sampled = reservoir_sample(word_groups,sample_count) if sample_count > 0 else word_groups
|
wg_sampled = reservoir_sample(word_groups,sample_count) if sample_count > 0 else word_groups
|
||||||
tr_audio_samples,te_audio_samples = train_test_split(wg_sampled,test_size=0.1)
|
tr_audio_samples,te_audio_samples = train_test_split(wg_sampled,test_size=train_test_ratio)
|
||||||
write_samples(tr_audio_samples,'train')
|
write_samples(tr_audio_samples,'train')
|
||||||
write_samples(te_audio_samples,'test')
|
write_samples(te_audio_samples,'test')
|
||||||
const_file = os.path.join('./outputs',audio_group+'.constants')
|
const_file = os.path.join('./outputs',audio_group+'.constants')
|
||||||
|
|
@ -125,13 +133,16 @@ def reservoir_sample(iterable, k):
|
||||||
sample[j] = item # replace item with gradually decreasing probability
|
sample[j] = item # replace item with gradually decreasing probability
|
||||||
return sample
|
return sample
|
||||||
|
|
||||||
def read_siamese_tfrecords_generator(audio_group='audio',batch_size=32,test_size=100):
|
|
||||||
|
def read_siamese_tfrecords_generator(audio_group='audio',batch_size=32,test_size=0):
|
||||||
records_file = os.path.join('./outputs',audio_group+'.train.tfrecords')
|
records_file = os.path.join('./outputs',audio_group+'.train.tfrecords')
|
||||||
input_pairs = []
|
input_pairs = []
|
||||||
output_class = []
|
output_class = []
|
||||||
const_file = os.path.join('./outputs',audio_group+'.constants')
|
const_file = os.path.join('./outputs',audio_group+'.constants')
|
||||||
(n_spec,n_features,n_records) = pickle.load(open(const_file,'rb'))
|
(n_spec,n_features,n_records) = pickle.load(open(const_file,'rb'))
|
||||||
print('reading tfrecords({}-train)...'.format(audio_group))
|
print('reading tfrecords({}-train)...'.format(audio_group))
|
||||||
|
|
||||||
|
# @threadsafe_iter
|
||||||
def record_generator():
|
def record_generator():
|
||||||
input_data = []
|
input_data = []
|
||||||
output_data = []
|
output_data = []
|
||||||
|
|
@ -226,7 +237,7 @@ if __name__ == '__main__':
|
||||||
# pickle_constants('story_words')
|
# pickle_constants('story_words')
|
||||||
# create_spectrogram_tfrecords('audio',sample_count=100)
|
# create_spectrogram_tfrecords('audio',sample_count=100)
|
||||||
# create_spectrogram_tfrecords('story_all',sample_count=25)
|
# create_spectrogram_tfrecords('story_all',sample_count=25)
|
||||||
create_spectrogram_tfrecords('story_words',sample_count=10)
|
create_spectrogram_tfrecords('story_words',sample_count=10,train_test_ratio=0.2)
|
||||||
# create_spectrogram_tfrecords('audio',sample_count=50)
|
# create_spectrogram_tfrecords('audio',sample_count=50)
|
||||||
# read_siamese_tfrecords_generator('audio')
|
# read_siamese_tfrecords_generator('audio')
|
||||||
# padd_zeros_siamese_tfrecords('audio')
|
# padd_zeros_siamese_tfrecords('audio')
|
||||||
|
|
|
||||||
|
|
@ -12,11 +12,7 @@ from keras.utils import to_categorical
|
||||||
from keras.optimizers import RMSprop
|
from keras.optimizers import RMSprop
|
||||||
from keras.callbacks import TensorBoard, ModelCheckpoint
|
from keras.callbacks import TensorBoard, ModelCheckpoint
|
||||||
from keras import backend as K
|
from keras import backend as K
|
||||||
|
from speech_utils import create_dir
|
||||||
def create_dir(direc):
|
|
||||||
import os
|
|
||||||
if not os.path.exists(direc):
|
|
||||||
os.makedirs(direc)
|
|
||||||
|
|
||||||
# def euclidean_distance(vects):
|
# def euclidean_distance(vects):
|
||||||
# x, y = vects
|
# x, y = vects
|
||||||
|
|
@ -95,7 +91,7 @@ def train_siamese(audio_group = 'audio'):
|
||||||
create_dir(model_dir)
|
create_dir(model_dir)
|
||||||
log_dir = './logs/'+audio_group
|
log_dir = './logs/'+audio_group
|
||||||
create_dir(log_dir)
|
create_dir(log_dir)
|
||||||
tr_gen_fn,te_pairs,te_y,n_step,n_features,n_records = read_siamese_tfrecords_generator(audio_group,batch_size,256)
|
tr_gen_fn,te_pairs,te_y,n_step,n_features,n_records = read_siamese_tfrecords_generator(audio_group,batch_size=batch_size)
|
||||||
tr_gen = tr_gen_fn()
|
tr_gen = tr_gen_fn()
|
||||||
# tr_y = to_categorical(tr_y_e, num_classes=2)
|
# tr_y = to_categorical(tr_y_e, num_classes=2)
|
||||||
# te_y = to_categorical(te_y_e, num_classes=2)
|
# te_y = to_categorical(te_y_e, num_classes=2)
|
||||||
|
|
@ -138,7 +134,7 @@ def train_siamese(audio_group = 'audio'):
|
||||||
,epochs=1000
|
,epochs=1000
|
||||||
,steps_per_epoch=n_records//batch_size
|
,steps_per_epoch=n_records//batch_size
|
||||||
,validation_data=([te_pairs[:, 0], te_pairs[:, 1]], te_y)
|
,validation_data=([te_pairs[:, 0], te_pairs[:, 1]], te_y)
|
||||||
,use_multiprocessing=True
|
,use_multiprocessing=True, workers=1
|
||||||
,callbacks=[tb_cb, cp_cb])
|
,callbacks=[tb_cb, cp_cb])
|
||||||
model.save(model_dir+'/siamese_speech_model-final.h5')
|
model.save(model_dir+'/siamese_speech_model-final.h5')
|
||||||
# compute final accuracy on training and test sets
|
# compute final accuracy on training and test sets
|
||||||
|
|
|
||||||
|
|
@ -0,0 +1,74 @@
|
||||||
|
import os
|
||||||
|
import threading
|
||||||
|
|
||||||
|
import multiprocessing
|
||||||
|
import pandas as pd
|
||||||
|
import numpy as np
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
def _apply_df(args):
|
||||||
|
df, func, num, kwargs = args
|
||||||
|
return num, df.apply(func, **kwargs)
|
||||||
|
|
||||||
|
def apply_by_multiprocessing(df,func,**kwargs):
|
||||||
|
cores = multiprocessing.cpu_count()
|
||||||
|
workers=kwargs.pop('workers') if 'workers' in kwargs else cores
|
||||||
|
pool = multiprocessing.Pool(processes=workers)
|
||||||
|
result = pool.map(_apply_df, [(d, func, i, kwargs) for i,d in enumerate(np.array_split(df, workers))])
|
||||||
|
pool.close()
|
||||||
|
result=sorted(result,key=lambda x:x[0])
|
||||||
|
return pd.concat([i[1] for i in result])
|
||||||
|
|
||||||
|
def square(x):
|
||||||
|
return x**x
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
df = pd.DataFrame({'a':range(10), 'b':range(10)})
|
||||||
|
apply_by_multiprocessing(df, square, axis=1, workers=4)
|
||||||
|
|
||||||
|
|
||||||
|
def rm_rf(d):
|
||||||
|
for path in (os.path.join(d,f) for f in os.listdir(d)):
|
||||||
|
if os.path.isdir(path):
|
||||||
|
rm_rf(path)
|
||||||
|
else:
|
||||||
|
os.unlink(path)
|
||||||
|
os.rmdir(d)
|
||||||
|
|
||||||
|
def create_dir(direc):
|
||||||
|
if not os.path.exists(direc):
|
||||||
|
os.makedirs(direc)
|
||||||
|
else:
|
||||||
|
rm_rf(direc)
|
||||||
|
create_dir(direc)
|
||||||
|
|
||||||
|
|
||||||
|
#################### Now make the data generator threadsafe ####################
|
||||||
|
|
||||||
|
class threadsafe_iter:
|
||||||
|
"""Takes an iterator/generator and makes it thread-safe by
|
||||||
|
serializing call to the `next` method of given iterator/generator.
|
||||||
|
"""
|
||||||
|
def __init__(self, it):
|
||||||
|
self.it = it
|
||||||
|
self.lock = threading.Lock()
|
||||||
|
|
||||||
|
def __iter__(self):
|
||||||
|
return self
|
||||||
|
|
||||||
|
def __next__(self): # Py3
|
||||||
|
with self.lock:
|
||||||
|
return next(self.it)
|
||||||
|
|
||||||
|
def next(self): # Py2
|
||||||
|
with self.lock:
|
||||||
|
return self.it.next()
|
||||||
|
|
||||||
|
|
||||||
|
def threadsafe_generator(f):
|
||||||
|
"""A decorator that takes a generator function and makes it thread-safe.
|
||||||
|
"""
|
||||||
|
def g(*a, **kw):
|
||||||
|
return threadsafe_iter(f(*a, **kw))
|
||||||
|
return g
|
||||||
|
|
@ -1,13 +1,14 @@
|
||||||
from speech_siamese import siamese_model
|
from speech_siamese import siamese_model
|
||||||
from record_mic_speech import record_spectrogram
|
from record_mic_speech import record_spectrogram
|
||||||
from importlib import reload
|
# from importlib import reload
|
||||||
# import speech_data
|
# import speech_data
|
||||||
# reload(speech_data)
|
# reload(speech_data)
|
||||||
from speech_data import create_test_pair,get_word_pairs_data,speech_data
|
|
||||||
import numpy as np
|
import numpy as np
|
||||||
|
import os
|
||||||
model = siamese_model((15, 1654))
|
import pickle
|
||||||
model.load_weights('./models/siamese_speech_model-final.h5')
|
import tensorflow as tf
|
||||||
|
import csv
|
||||||
|
from speech_data import padd_zeros
|
||||||
|
|
||||||
def predict_recording_with(m,sample_size=15):
|
def predict_recording_with(m,sample_size=15):
|
||||||
spec1 = record_spectrogram(n_sec=1.4)
|
spec1 = record_spectrogram(n_sec=1.4)
|
||||||
|
|
@ -24,7 +25,55 @@ def test_with(audio_group):
|
||||||
print(np.argmax(model.predict([X[:, 0], X[:, 1]]),axis=1))
|
print(np.argmax(model.predict([X[:, 0], X[:, 1]]),axis=1))
|
||||||
print(Y.astype(np.int8))
|
print(Y.astype(np.int8))
|
||||||
|
|
||||||
test_with('rand_edu')
|
def evaluate_siamese(audio_group='audio',model_file = 'siamese_speech_model-46-epoch-0.29-acc.h5'):
|
||||||
|
records_file = os.path.join('./outputs',audio_group+'.train.tfrecords')
|
||||||
|
const_file = os.path.join('./outputs',audio_group+'.constants')
|
||||||
|
model_weights_path =os.path.join('./models/story_words/',model_file)
|
||||||
|
(n_spec,n_features,n_records) = pickle.load(open(const_file,'rb'))
|
||||||
|
print('evaluating tfrecords({}-train)...'.format(audio_group))
|
||||||
|
|
||||||
|
model = siamese_model((n_spec, n_features))
|
||||||
|
model.load_weights(model_weights_path)
|
||||||
|
record_iterator = tf.python_io.tf_record_iterator(path=records_file)
|
||||||
|
#tqdm(enumerate(record_iterator),total=n_records)
|
||||||
|
with open('./outputs/' + audio_group + '.results.csv','w') as result_csv:
|
||||||
|
result_csv_w = csv.writer(result_csv, quoting=csv.QUOTE_MINIMAL)
|
||||||
|
for (i,string_record) in enumerate(record_iterator):
|
||||||
|
# string_record = next(record_iterator)
|
||||||
|
example = tf.train.Example()
|
||||||
|
example.ParseFromString(string_record)
|
||||||
|
spec_n1 = example.features.feature['spec_n1'].int64_list.value[0]
|
||||||
|
spec_n2 = example.features.feature['spec_n2'].int64_list.value[0]
|
||||||
|
spec_w1 = example.features.feature['spec_w1'].int64_list.value[0]
|
||||||
|
spec_w2 = example.features.feature['spec_w2'].int64_list.value[0]
|
||||||
|
spec1 = np.array(example.features.feature['spec1'].float_list.value).reshape(spec_n1,spec_w1)
|
||||||
|
spec2 = np.array(example.features.feature['spec2'].float_list.value).reshape(spec_n2,spec_w2)
|
||||||
|
p_spec1,p_spec2 = padd_zeros(spec1,n_spec),padd_zeros(spec2,n_spec)
|
||||||
|
input_arr = np.asarray([[p_spec1,p_spec2]])
|
||||||
|
output_arr = np.asarray([example.features.feature['output'].int64_list.value])
|
||||||
|
y_pred = model.predict([input_arr[:, 0], input_arr[:, 1]])
|
||||||
|
predicted = np.asarray(y_pred[0]>0.5).astype(output_arr.dtype)
|
||||||
|
expected = output_arr[0]
|
||||||
|
if np.all(predicted == expected):
|
||||||
|
continue
|
||||||
|
word = example.features.feature['word'].bytes_list.value[0].decode()
|
||||||
|
phoneme1 = example.features.feature['phoneme1'].bytes_list.value[0].decode()
|
||||||
|
phoneme2 = example.features.feature['phoneme2'].bytes_list.value[0].decode()
|
||||||
|
voice1 = example.features.feature['voice1'].bytes_list.value[0].decode()
|
||||||
|
voice2 = example.features.feature['voice2'].bytes_list.value[0].decode()
|
||||||
|
language = example.features.feature['language'].bytes_list.value[0].decode()
|
||||||
|
rate1 = example.features.feature['rate1'].int64_list.value[0]
|
||||||
|
rate2 = example.features.feature['rate2'].int64_list.value[0]
|
||||||
|
variant1 = example.features.feature['variant1'].bytes_list.value[0].decode()
|
||||||
|
variant2 = example.features.feature['variant2'].bytes_list.value[0].decode()
|
||||||
|
file1 = example.features.feature['file1'].bytes_list.value[0].decode()
|
||||||
|
file2 = example.features.feature['file2'].bytes_list.value[0].decode()
|
||||||
|
print(phoneme1,phoneme2,voice1,voice2,rate1,rate2,variant1,variant2,file1,file2)
|
||||||
|
result_csv_w.writerow([phoneme1,phoneme2,voice1,voice2,rate1,rate2,variant1,variant2,file1,file2])
|
||||||
|
|
||||||
|
|
||||||
|
evaluate_siamese('story_words',model_file='siamese_speech_model-92-epoch-0.20-acc.h5')
|
||||||
|
# test_with('rand_edu')
|
||||||
# sunflower_data,sunflower_result = get_word_pairs_data('sweater',15)
|
# sunflower_data,sunflower_result = get_word_pairs_data('sweater',15)
|
||||||
# print(np.argmax(model.predict([sunflower_data[:, 0], sunflower_data[:, 1]]),axis=1))
|
# print(np.argmax(model.predict([sunflower_data[:, 0], sunflower_data[:, 1]]),axis=1))
|
||||||
# print(sunflower_result)
|
# print(sunflower_result)
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue