Compare commits

...

4 Commits

Author SHA1 Message Date
Malar Kannan c8a07b3d7b Merge branch 'master' of ssh://invnuc/~/Public/Repos/speech_scoring 2017-12-07 12:00:59 +05:30
Malar Kannan 8785522196 Merge branch 'master' of /home/ilml/Public/Repos/speech_scoring 2017-12-07 12:00:44 +05:30
Malar Kannan 435c4a4aa6 added a resume parameter for training 2017-12-07 12:00:42 +05:30
Malar Kannan c1801b5aa3 implented segment tfrecords batch data-generator 2017-12-07 11:48:19 +05:30
5 changed files with 90 additions and 22 deletions

View File

@ -176,6 +176,69 @@ def create_segments_tfrecords(collection_name='story_test_segments',sample_count
const_file = './outputs/segments/'+collection_name+'/constants.pkl' const_file = './outputs/segments/'+collection_name+'/constants.pkl'
pickle.dump((n_spec,n_features,n_records),open(const_file,'wb')) pickle.dump((n_spec,n_features,n_records),open(const_file,'wb'))
def record_generator_count(records_file):
record_iterator = tf.python_io.tf_record_iterator(path=records_file)
count,spec_n = 0,0
for i in record_iterator:
count+=1
record_iterator = tf.python_io.tf_record_iterator(path=records_file)
return record_iterator,count
def read_segments_tfrecords_generator(collection_name='audio',batch_size=32,test_size=0):
records_file = './outputs/segments/'+collection_name+'/train.tfrecords'
const_file = './outputs/segments/'+collection_name+'/constants.pkl'
(n_spec,n_features,n_records) = pickle.load(open(const_file,'rb'))
def copy_read_consts(dest_dir):
shutil.copy2(const_file,dest_dir+'/constants.pkl')
return (n_spec,n_features,n_records)
# @threadsafe_iter
def record_generator():
print('reading tfrecords({}-train)...'.format(collection_name))
input_data = []
output_data = []
while True:
record_iterator,records_count = record_generator_count(records_file)
for (i,string_record) in enumerate(record_iterator):
example = tf.train.Example()
example.ParseFromString(string_record)
spec_n = example.features.feature['spec_n'].int64_list.value[0]
spec_w = example.features.feature['spec_w'].int64_list.value[0]
spec = np.array(example.features.feature['spec'].float_list.value).reshape(spec_n,spec_w)
spec = np.array(example.features.feature['output'].int64_list.value)
p_spec = padd_zeros(spec,n_spec)
input_data.append(p_spec)
output = example.features.feature['output'].int64_list.value
output_data.append(np.asarray(output))
if len(input_data) == batch_size or i == n_records-1:
input_arr = np.asarray(input_data)
output_arr = np.asarray(output_data)
yield (input_arr,output_arr)
input_data = []
output_data = []
# Read test in one-shot
# collection_name = 'story_test'
print('reading tfrecords({}-test)...'.format(collection_name))
te_records_file = './outputs/segments/'+collection_name+'/test.tfrecords'
te_re_iterator,te_n_records = record_generator_count(te_records_file)
test_size = min([test_size,te_n_records]) if test_size > 0 else te_n_records
input_data = np.zeros((test_size,2,n_spec,n_features))
output_data = np.zeros((test_size,2))
random_samples = enumerate(reservoir_sample(te_re_iterator,test_size))
for (i,string_record) in tqdm(random_samples,total=test_size):
example = tf.train.Example()
example.ParseFromString(string_record)
spec_n = example.features.feature['spec_n'].int64_list.value[0]
spec_w = example.features.feature['spec_w'].int64_list.value[0]
spec = np.array(example.features.feature['spec'].float_list.value).reshape(spec_n,spec_w)
p_spec = padd_zeros(spec,n_spec)
input_data[i] = p_spec
output = example.features.feature['output'].int64_list.value
output_data[i] = np.asarray(output)
return record_generator,input_data,output_data,copy_read_consts
if __name__ == '__main__': if __name__ == '__main__':
# plot_random_phrases() # plot_random_phrases()
# fix_csv('story_test_segments') # fix_csv('story_test_segments')

View File

@ -12,7 +12,13 @@ from keras.callbacks import TensorBoard, ModelCheckpoint
from keras import backend as K from keras import backend as K
from keras.utils import plot_model from keras.utils import plot_model
from speech_tools import create_dir,step_count from speech_tools import create_dir,step_count
from segment_data import segment_data_gen from segment_data import read_segments_tfrecords_generator
import importlib
import segment_data
import speech_tools
importlib.reload(segment_data)
importlib.reload(speech_tools)
# TODO implement ctc losses # TODO implement ctc losses
@ -48,22 +54,16 @@ def segment_model(input_dim):
return Model(inp, oup) return Model(inp, oup)
def simple_segment_model(input_dim): def simple_segment_model(input_dim):
input_dim = (100,100,1) # input_dim = (100,100)
inp = Input(shape=input_dim) inp = Input(shape=input_dim)
cnv1 = Conv2D(filters=32, kernel_size=(5,9))(inp) b_gr1 = Bidirectional(GRU(256, return_sequences=True),merge_mode='sum')(inp)
cnv2 = Conv2D(filters=1, kernel_size=(5,9))(cnv1)
dr_cnv2 = Dropout(rate=0.95)(cnv2)
# dr_cnv2
cn_rnn_dim = (dr_cnv2.shape[1].value,dr_cnv2.shape[2].value)
r_dr_cnv2 = Reshape(target_shape=cn_rnn_dim)(dr_cnv2)
b_gr1 = Bidirectional(GRU(512, return_sequences=True),merge_mode='sum')(r_dr_cnv2)
# b_gr1 # b_gr1
b_gr2 = Bidirectional(GRU(512, return_sequences=True),merge_mode='sum')(b_gr1) b_gr2 = Bidirectional(GRU(64, return_sequences=True),merge_mode='sum')(b_gr1)
b_gr3 = Bidirectional(GRU(512, return_sequences=True),merge_mode='sum')(b_gr2) b_gr3 = Bidirectional(GRU(1, return_sequences=True),merge_mode='sum')(b_gr2)
# b_gr3 # b_gr3
oup = Dense(2, activation='softmax')(b_gr3) # oup = Dense(2, activation='softmax')(b_gr3)
# oup # oup
return Model(inp, oup) return Model(inp, b_gr3)
def write_model_arch(mod,mod_file): def write_model_arch(mod,mod_file):
model_f = open(mod_file,'w') model_f = open(mod_file,'w')
@ -77,15 +77,16 @@ def load_model_arch(mod_file):
return mod return mod
def train_segment(collection_name = 'test'): def train_segment(collection_name = 'test'):
collection_name = 'story_test'
batch_size = 128 batch_size = 128
model_dir = './models/segment/'+collection_name model_dir = './models/segment/'+collection_name
create_dir(model_dir) create_dir(model_dir)
log_dir = './logs/segment/'+collection_name log_dir = './logs/segment/'+collection_name
create_dir(log_dir) create_dir(log_dir)
tr_gen_fn = segment_data_gen() tr_gen_fn,inp,oup,copy_read_consts = read_segments_tfrecords_generator(collection_name,batch_size,2*batch_size)
tr_gen = tr_gen_fn() tr_gen = tr_gen_fn()
n_step,n_features,n_records = copy_read_consts(model_dir)
input_dim = (n_step, n_features) input_dim = (n_step, n_features)
model = simple_segment_model(input_dim) model = simple_segment_model(input_dim)
plot_model(model,show_shapes=True, to_file=model_dir+'/model.png') plot_model(model,show_shapes=True, to_file=model_dir+'/model.png')
# loss_out = Lambda(ctc_lambda_func, output_shape=(1,), name='ctc')([y_pred, labels, input_length, label_length]) # loss_out = Lambda(ctc_lambda_func, output_shape=(1,), name='ctc')([y_pred, labels, input_length, label_length])

View File

@ -9,7 +9,6 @@ from speech_spectrum import generate_aiff_spectrogram
from speech_pitch import pitch_array from speech_pitch import pitch_array
from speech_pitch import compute_mfcc from speech_pitch import compute_mfcc
from sklearn.model_selection import train_test_split from sklearn.model_selection import train_test_split
import itertools
import os,shutil import os,shutil
import random import random
import csv import csv
@ -168,7 +167,7 @@ def read_siamese_tfrecords_generator(audio_group='audio',batch_size=32,test_size
# Read test in one-shot # Read test in one-shot
print('reading tfrecords({}-test)...'.format(audio_group)) print('reading tfrecords({}-test)...'.format(audio_group))
te_records_file = os.path.join('./outputs',audio_group+'.test.tfrecords') te_records_file = os.path.join('./outputs',audio_group+'.test.tfrecords')
te_re_iterator,te_n_records = record_generator_count(records_file) te_re_iterator,te_n_records = record_generator_count(te_records_file)
test_size = min([test_size,te_n_records]) if test_size > 0 else te_n_records test_size = min([test_size,te_n_records]) if test_size > 0 else te_n_records
input_data = np.zeros((test_size,2,n_spec,n_features)) input_data = np.zeros((test_size,2,n_spec,n_features))
output_data = np.zeros((test_size,2)) output_data = np.zeros((test_size,2))

View File

@ -74,7 +74,7 @@ def load_model_arch(mod_file):
model_f.close() model_f.close()
return mod return mod
def train_siamese(audio_group = 'audio'): def train_siamese(audio_group = 'audio',resume_weights='',initial_epoch=0):
batch_size = 128 batch_size = 128
model_dir = './models/'+audio_group model_dir = './models/'+audio_group
create_dir(model_dir) create_dir(model_dir)
@ -114,19 +114,22 @@ def train_siamese(audio_group = 'audio'):
model.compile(loss=categorical_crossentropy, optimizer=rms, metrics=[accuracy]) model.compile(loss=categorical_crossentropy, optimizer=rms, metrics=[accuracy])
write_model_arch(model,model_dir+'/siamese_speech_model_arch.yaml') write_model_arch(model,model_dir+'/siamese_speech_model_arch.yaml')
epoch_n_steps = step_count(n_records,batch_size) epoch_n_steps = step_count(n_records,batch_size)
if resume_weights != '':
model.load_weights(resume_weights)
model.fit_generator(tr_gen model.fit_generator(tr_gen
, epochs=1000 , epochs=1000
, steps_per_epoch=epoch_n_steps , steps_per_epoch=epoch_n_steps
, validation_data=([te_pairs[:, 0], te_pairs[:, 1]], te_y) , validation_data=([te_pairs[:, 0], te_pairs[:, 1]], te_y)
, max_queue_size=8 , max_queue_size=8
, callbacks=[tb_cb, cp_cb]) , callbacks=[tb_cb, cp_cb],initial_epoch=initial_epoch)
model.save(model_dir+'/siamese_speech_model-final.h5') model.save(model_dir+'/siamese_speech_model-final.h5')
y_pred = model.predict([te_pairs[:, 0], te_pairs[:, 1]]) # y_pred = model.predict([te_pairs[:, 0], te_pairs[:, 1]])
te_acc = compute_accuracy(te_y, y_pred) # te_acc = compute_accuracy(te_y, y_pred)
print('* Accuracy on test set: %0.2f%%' % (100 * te_acc)) # print('* Accuracy on test set: %0.2f%%' % (100 * te_acc))
if __name__ == '__main__': if __name__ == '__main__':
train_siamese('story_words_pitch') train_siamese('story_words_pitch')

View File

@ -2,6 +2,8 @@ import os
import math import math
import string import string
import threading import threading
import itertools
import random
import multiprocessing import multiprocessing
import pandas as pd import pandas as pd
import numpy as np import numpy as np