From e9f54c7f6ffaad0f3e55cbeff65a5ccd2359fa1e Mon Sep 17 00:00:00 2001 From: Malar Kannan Date: Wed, 15 Nov 2017 14:14:17 +0530 Subject: [PATCH] 1. tuned batchsize 2. fixed last batch carry-over --- speech_data.py | 2 +- speech_model.py | 18 ++++++++++-------- speech_tools.py | 4 ++++ 3 files changed, 15 insertions(+), 9 deletions(-) diff --git a/speech_data.py b/speech_data.py index 4242998..b7b5067 100644 --- a/speech_data.py +++ b/speech_data.py @@ -161,7 +161,7 @@ def read_siamese_tfrecords_generator(audio_group='audio',batch_size=32,test_size input_data.append(np.asarray([p_spec1,p_spec2])) output = example.features.feature['output'].int64_list.value output_data.append(np.asarray(output)) - if len(input_data) == batch_size: + if len(input_data) == batch_size or i == n_records-1: input_arr = np.asarray(input_data) output_arr = np.asarray(output_data) yield ([input_arr[:, 0], input_arr[:, 1]],output_arr) diff --git a/speech_model.py b/speech_model.py index 5136398..4b2d234 100644 --- a/speech_model.py +++ b/speech_model.py @@ -12,7 +12,7 @@ from keras.utils import to_categorical from keras.optimizers import RMSprop from keras.callbacks import TensorBoard, ModelCheckpoint from keras import backend as K -from speech_tools import create_dir +from speech_tools import create_dir,step_count # def euclidean_distance(vects): # x, y = vects @@ -96,7 +96,7 @@ def load_model_arch(mod_file): def train_siamese(audio_group = 'audio'): # the data, shuffled and split between train and test sets # tr_pairs, te_pairs, tr_y_e, te_y_e = speech_model_data() - batch_size = 128 + batch_size = 256 model_dir = './models/'+audio_group create_dir(model_dir) log_dir = './logs/'+audio_group @@ -141,12 +141,14 @@ def train_siamese(audio_group = 'audio'): # epochs=100, # validation_data=([te_pairs[:, 0], te_pairs[:, 1]], te_y), # callbacks=[tb_cb, cp_cb]) + epoch_n_steps = step_count(n_records,batch_size) model.fit_generator(tr_gen - ,epochs=1000 - ,steps_per_epoch=n_records//batch_size - ,validation_data=([te_pairs[:, 0], te_pairs[:, 1]], te_y) - ,use_multiprocessing=True, workers=1 - ,callbacks=[tb_cb, cp_cb]) + , epochs=1000 + , steps_per_epoch=epoch_n_steps + , validation_data=([te_pairs[:, 0], te_pairs[:, 1]], te_y) + # ,use_multiprocessing=True, workers=1 + , max_queue_size=32 + , callbacks=[tb_cb, cp_cb]) model.save(model_dir+'/siamese_speech_model-final.h5') # compute final accuracy on training and test sets # y_pred = model.predict([tr_pairs[:, 0], tr_pairs[:, 1]]) @@ -160,5 +162,5 @@ def train_siamese(audio_group = 'audio'): if __name__ == '__main__': - train_siamese('story_words') + train_siamese('story_words_test') # train_siamese('audio') diff --git a/speech_tools.py b/speech_tools.py index c252ac5..8e11986 100644 --- a/speech_tools.py +++ b/speech_tools.py @@ -1,4 +1,5 @@ import os +import math import threading import multiprocessing import pandas as pd @@ -11,6 +12,9 @@ from speech_spectrum import plot_stft, generate_spec_frec SAMPLE_RATE = 22050 N_CHANNELS = 2 +def step_count(n_records,batch_size): + return int(math.ceil(n_records*1.0/batch_size)) + def file_player(): p_oup = pyaudio.PyAudio() def play_file(audiopath,plot=False):