Merge branch 'master' of ssh://invnuc/~/Public/Repos/speech_scoring
commit
1f60183ab8
|
|
@ -9,7 +9,7 @@ from speech_spectrum import generate_aiff_spectrogram
|
|||
from speech_pitch import compute_mfcc
|
||||
from sklearn.model_selection import train_test_split
|
||||
import itertools
|
||||
import os
|
||||
import os,shutil
|
||||
import random
|
||||
import csv
|
||||
import gc
|
||||
|
|
@ -145,15 +145,19 @@ def read_siamese_tfrecords_generator(audio_group='audio',batch_size=32,test_size
|
|||
output_class = []
|
||||
const_file = os.path.join('./outputs',audio_group+'.constants')
|
||||
(n_spec,n_features,n_records) = pickle.load(open(const_file,'rb'))
|
||||
print('reading tfrecords({}-train)...'.format(audio_group))
|
||||
|
||||
def copy_read_consts(dest_dir):
|
||||
shutil.copy2(const_file,dest_dir)
|
||||
return (n_spec,n_features,n_records)
|
||||
# @threadsafe_iter
|
||||
def record_generator():
|
||||
print('reading tfrecords({}-train)...'.format(audio_group))
|
||||
input_data = []
|
||||
output_data = []
|
||||
while True:
|
||||
record_iterator = tf.python_io.tf_record_iterator(path=records_file)
|
||||
#tqdm(enumerate(record_iterator),total=n_records)
|
||||
record_iterator,records_count = record_generator_count(records_file)
|
||||
#tqdm(enumerate(record_iterator),total=records_count)
|
||||
#enumerate(record_iterator)
|
||||
for (i,string_record) in enumerate(record_iterator):
|
||||
example = tf.train.Example()
|
||||
example.ParseFromString(string_record)
|
||||
|
|
@ -175,11 +179,9 @@ def read_siamese_tfrecords_generator(audio_group='audio',batch_size=32,test_size
|
|||
output_data = []
|
||||
|
||||
# Read test in one-shot
|
||||
te_records_file = os.path.join('./outputs',audio_group+'.test.tfrecords')
|
||||
te_re_iterator = tf.python_io.tf_record_iterator(path=records_file)
|
||||
te_n_records = len([i for i in te_re_iterator])
|
||||
te_re_iterator = tf.python_io.tf_record_iterator(path=records_file)
|
||||
print('reading tfrecords({}-test)...'.format(audio_group))
|
||||
te_records_file = os.path.join('./outputs',audio_group+'.test.tfrecords')
|
||||
te_re_iterator,te_n_records = record_generator_count(records_file)
|
||||
test_size = min([test_size,te_n_records]) if test_size > 0 else te_n_records
|
||||
input_data = np.zeros((test_size,2,n_spec,n_features))
|
||||
output_data = np.zeros((test_size,2))
|
||||
|
|
@ -198,7 +200,7 @@ def read_siamese_tfrecords_generator(audio_group='audio',batch_size=32,test_size
|
|||
output = example.features.feature['output'].int64_list.value
|
||||
output_data[i] = np.asarray(output)
|
||||
|
||||
return record_generator,input_data,output_data,n_spec,n_features,n_records
|
||||
return record_generator,input_data,output_data,copy_read_consts
|
||||
|
||||
def audio_samples_word_count(audio_group='audio'):
|
||||
audio_samples = pd.read_csv( './outputs/' + audio_group + '.csv')
|
||||
|
|
@ -206,7 +208,9 @@ def audio_samples_word_count(audio_group='audio'):
|
|||
|
||||
def record_generator_count(records_file):
|
||||
record_iterator = tf.python_io.tf_record_iterator(path=records_file)
|
||||
count = len([i for i in record_iterator])
|
||||
count = 0
|
||||
for i in record_iterator:
|
||||
count+=1
|
||||
record_iterator = tf.python_io.tf_record_iterator(path=records_file)
|
||||
return record_iterator,count
|
||||
|
||||
|
|
@ -250,7 +254,7 @@ if __name__ == '__main__':
|
|||
# create_spectrogram_tfrecords('story_all',sample_count=25)
|
||||
# fix_csv('story_words_test')
|
||||
#fix_csv('story_phrases')
|
||||
create_spectrogram_tfrecords('story_phrases',sample_count=10,train_test_ratio=0.1)
|
||||
create_spectrogram_tfrecords('story_phrases',sample_count=100,train_test_ratio=0.1)
|
||||
# create_spectrogram_tfrecords('audio',sample_count=50)
|
||||
# read_siamese_tfrecords_generator('audio')
|
||||
# padd_zeros_siamese_tfrecords('audio')
|
||||
|
|
|
|||
|
|
@ -74,7 +74,8 @@ def train_siamese(audio_group = 'audio'):
|
|||
create_dir(model_dir)
|
||||
log_dir = './logs/'+audio_group
|
||||
create_dir(log_dir)
|
||||
tr_gen_fn,te_pairs,te_y,n_step,n_features,n_records = read_siamese_tfrecords_generator(audio_group,batch_size=batch_size,test_size=batch_size)
|
||||
tr_gen_fn,te_pairs,te_y,copy_read_consts = read_siamese_tfrecords_generator(audio_group,batch_size=batch_size,test_size=batch_size)
|
||||
n_step,n_features,n_records = copy_read_consts()
|
||||
tr_gen = tr_gen_fn()
|
||||
input_dim = (n_step, n_features)
|
||||
|
||||
|
|
|
|||
|
|
@ -29,14 +29,13 @@ def test_with(audio_group):
|
|||
def evaluate_siamese(records_file,audio_group='audio',weights = 'siamese_speech_model-final.h5'):
|
||||
# audio_group='audio';model_file = 'siamese_speech_model-305-epoch-0.20-acc.h5'
|
||||
# records_file = os.path.join('./outputs',eval_group+'.train.tfrecords')
|
||||
const_file = os.path.join('./outputs',audio_group+'.constants')
|
||||
const_file = os.path.join('./models/'+audio_group+'/',audio_group+'.constants')
|
||||
arch_file='./models/'+audio_group+'/siamese_speech_model_arch.yaml'
|
||||
weight_file='./models/'+audio_group+'/'+weights
|
||||
(n_spec,n_features,n_records) = pickle.load(open(const_file,'rb'))
|
||||
print('evaluating {}...'.format(records_file))
|
||||
model = load_model_arch(arch_file)
|
||||
# model = siamese_model((n_spec, n_features))
|
||||
n_spec = 422
|
||||
model.load_weights(weight_file)
|
||||
record_iterator,records_count = record_generator_count(records_file)
|
||||
total,same_success,diff_success,skipped,same_failed,diff_failed = 0,0,0,0,0,0
|
||||
|
|
@ -103,6 +102,38 @@ def evaluate_siamese(records_file,audio_group='audio',weights = 'siamese_speech_
|
|||
"expected","predicted","success"])
|
||||
result_data.to_csv('./outputs/' + audio_group + '.results.csv')
|
||||
|
||||
def inspect_tfrecord(records_file,audio_group='audio'):
|
||||
record_iterator,records_count = record_generator_count(records_file)
|
||||
all_results = []
|
||||
for (i,string_record) in tqdm(enumerate(record_iterator),total=records_count):
|
||||
# string_record = next(record_iterator)
|
||||
example = tf.train.Example()
|
||||
example.ParseFromString(string_record)
|
||||
spec_n1 = example.features.feature['spec_n1'].int64_list.value[0]
|
||||
spec_n2 = example.features.feature['spec_n2'].int64_list.value[0]
|
||||
word = example.features.feature['word'].bytes_list.value[0].decode()
|
||||
phoneme1 = example.features.feature['phoneme1'].bytes_list.value[0].decode()
|
||||
phoneme2 = example.features.feature['phoneme2'].bytes_list.value[0].decode()
|
||||
voice1 = example.features.feature['voice1'].bytes_list.value[0].decode()
|
||||
voice2 = example.features.feature['voice2'].bytes_list.value[0].decode()
|
||||
language = example.features.feature['language'].bytes_list.value[0].decode()
|
||||
rate1 = example.features.feature['rate1'].int64_list.value[0]
|
||||
rate2 = example.features.feature['rate2'].int64_list.value[0]
|
||||
variant1 = example.features.feature['variant1'].bytes_list.value[0].decode()
|
||||
variant2 = example.features.feature['variant2'].bytes_list.value[0].decode()
|
||||
file1 = example.features.feature['file1'].bytes_list.value[0].decode()
|
||||
file2 = example.features.feature['file2'].bytes_list.value[0].decode()
|
||||
output_arr = np.asarray([example.features.feature['output'].int64_list.value])
|
||||
expected = output_arr[0]
|
||||
result = {"phoneme1":phoneme1,"phoneme2":phoneme2,"voice1":voice1
|
||||
,"voice2":voice2,"rate1":rate1,"rate2":rate2,"spec_n1":spec_n1
|
||||
,"spec_n2":spec_n2,"variant1":variant1,"variant2":variant2
|
||||
,"file1":file1,"file2":file2,"expected":expected[0]}
|
||||
all_results.append(result)
|
||||
result_data = pd.DataFrame(all_results,columns=["phoneme1","phoneme2"
|
||||
,"voice1","voice2","rate1","rate2","spec_n1","spec_n2","variant1","variant2","file1","file2",
|
||||
"expected"])
|
||||
result_data.to_csv('./outputs/' + audio_group + '.pairs.csv')
|
||||
|
||||
def play_results(audio_group='audio'):
|
||||
result_data = pd.read_csv('./outputs/' + audio_group + '.results.csv')
|
||||
|
|
@ -133,8 +164,10 @@ def play_results(audio_group='audio'):
|
|||
def visualize_results(audio_group='audio'):
|
||||
# %matplotlib inline
|
||||
audio_group = 'story_phrases'
|
||||
source = pd.read_csv('./outputs/'+audio_group+'.pairs.csv',index_col=0)
|
||||
source.groupby(['voice1','voice2']).size()
|
||||
result = pd.read_csv('./outputs/' + audio_group + '.results.csv',index_col=0)
|
||||
result.groupby('success').size().plot(kind='bar')
|
||||
# result.groupby('success').size().plot(kind='bar')
|
||||
result.describe(include=['object'])
|
||||
failed = result[result['success'] == False]
|
||||
same_failed = failed[failed['variant1'] == failed['variant2']]
|
||||
|
|
@ -145,9 +178,10 @@ def visualize_results(audio_group='audio'):
|
|||
if __name__ == '__main__':
|
||||
# evaluate_siamese('./outputs/story_words_test.train.tfrecords',audio_group='story_words.gpu',weights ='siamese_speech_model-58-epoch-0.00-acc.h5')
|
||||
# evaluate_siamese('./outputs/story_words.test.tfrecords',audio_group='story_words',weights ='siamese_speech_model-675-epoch-0.00-acc.h5')
|
||||
evaluate_siamese('./outputs/story_phrases.test.tfrecords',audio_group='story_phrases',weights ='siamese_speech_model-329-epoch-0.00-acc.h5')
|
||||
evaluate_siamese('./outputs/story_words_test.train.tfrecords',audio_group='story_phrases',weights ='siamese_speech_model-231-epoch-0.00-acc.h5')
|
||||
# play_results('story_words')
|
||||
visualize_results('story_words.gpu')
|
||||
#inspect_tfrecord('./outputs/story_phrases.test.tfrecords',audio_group='story_phrases')
|
||||
# visualize_results('story_words.gpu')
|
||||
# test_with('rand_edu')
|
||||
# sunflower_data,sunflower_result = get_word_pairs_data('sweater',15)
|
||||
# print(np.argmax(model.predict([sunflower_data[:, 0], sunflower_data[:, 1]]),axis=1))
|
||||
|
|
|
|||
Loading…
Reference in New Issue