implemented streaming tfreccords

master
Malar Kannan 2017-11-09 20:31:29 +05:30
parent 0a4d4fadeb
commit ab452494b3
2 changed files with 57 additions and 21 deletions

View File

@ -183,15 +183,15 @@ def reservoir_sample(iterable, k):
sample[j] = item # replace item with gradually decreasing probability
return sample
def read_siamese_tfrecords_oneshot(audio_group='audio'):
def read_siamese_tfrecords_oneshot(audio_group='audio',sample_size=3000):
records_file = os.path.join('./outputs',audio_group+'_padded.tfrecords')
record_iterator = tf.python_io.tf_record_iterator(path=records_file)
input_pairs = []
output_class = []
const_file = os.path.join('./outputs',audio_group+'.constants')
(n_spec,n_features,n_records) = pickle.load(open(const_file,'rb'))
print('reading tfrecords...')
samples = min([30000,n_records])
print('reading tfrecords({})...'.format(audio_group))
samples = min([sample_size,n_records])
input_data = np.zeros((samples,2,n_spec,n_features))
output_data = np.zeros((samples,2))
random_samples = enumerate(reservoir_sample(record_iterator,samples))
@ -205,10 +205,38 @@ def read_siamese_tfrecords_oneshot(audio_group='audio'):
input_data[i] = np.asarray([spec1,spec2])
output = example.features.feature['output'].int64_list.value
output_data[i] = np.asarray(output)
print('converting to nparray...')
tr_pairs,te_pairs,tr_y,te_y = train_test_split(input_data,output_data,test_size=0.1)
result = (tr_pairs,te_pairs,tr_y,te_y,n_spec,n_features)
return result
# print('converting to nparray...')
# tr_pairs,te_pairs,tr_y,te_y = train_test_split(input_data,output_data,test_size=0.1)
# result = (tr_pairs,te_pairs,tr_y,te_y,n_spec,n_features)
return input_data,output_data
def read_siamese_tfrecords_generator(audio_group='audio',batch_size=32):
records_file = os.path.join('./outputs',audio_group+'_padded.tfrecords')
input_pairs = []
output_class = []
const_file = os.path.join('./outputs',audio_group+'.constants')
(n_spec,n_features,n_records) = pickle.load(open(const_file,'rb'))
print('reading tfrecords({})...'.format(audio_group))
def record_generator():
input_data = []
output_data = []
while True:
record_iterator = tf.python_io.tf_record_iterator(path=records_file)
for (i,string_record) in tqdm(enumerate(record_iterator),total=n_records):
example = tf.train.Example()
example.ParseFromString(string_record)
spec1 = np.array(example.features.feature['spec1'].float_list.value).reshape(n_spec,n_features)
spec2 = np.array(example.features.feature['spec2'].float_list.value).reshape(n_spec,n_features)
input_data.append(np.asarray([spec1,spec2]))
output = example.features.feature['output'].int64_list.value
output_data.append(np.asarray(output))
if len(input_data) == batch_size:
input_arr = np.asarray(input_data)
output_arr = np.asarray(output_data)
yield ([input_arr[:, 0], input_arr[:, 1]],output_arr)
input_data = []
output_data = []
return record_generator,n_spec,n_features,n_records
def read_siamese_tfrecords(audio_group='audio'):
audio_group='story_words_test'
@ -324,7 +352,8 @@ if __name__ == '__main__':
# create_spectrogram_tfrecords('story_words_test')
# read_siamese_tfrecords('story_all')
# read_siamese_tfrecords('story_words_test')
pickle_constants('story_words_test')
padd_zeros_siamese_tfrecords('story_words')
# pickle_constants('story_words')
# create_spectrogram_tfrecords('audio')
# padd_zeros_siamese_tfrecords('audio')
# create_padded_spectrogram()

View File

@ -2,7 +2,7 @@ from __future__ import absolute_import
from __future__ import print_function
import numpy as np
# from speech_data import speech_model_data
from speech_data import read_siamese_tfrecords_oneshot
from speech_data import read_siamese_tfrecords_oneshot,read_siamese_tfrecords_generator
from keras.models import Model,load_model
from keras.layers import Input, Dense, Dropout, LSTM, Lambda, Concatenate
from keras.losses import categorical_crossentropy
@ -82,7 +82,10 @@ def siamese_model(input_dim):
def train_siamese():
# the data, shuffled and split between train and test sets
# tr_pairs, te_pairs, tr_y_e, te_y_e = speech_model_data()
(tr_pairs,te_pairs,tr_y,te_y,n_step,n_features) = read_siamese_tfrecords_oneshot()
batch_size = 512
tr_gen_fn,n_step,n_features,n_records = read_siamese_tfrecords_generator('audio',batch_size)
tr_gen = tr_gen_fn()
(te_pairs,te_y) = read_siamese_tfrecords_oneshot('audio',1000)
# tr_y = to_categorical(tr_y_e, num_classes=2)
# te_y = to_categorical(te_y_e, num_classes=2)
input_dim = (n_step, n_features)
@ -113,22 +116,26 @@ def train_siamese():
# train
rms = RMSprop(lr=0.001)
model.compile(loss=categorical_crossentropy, optimizer=rms, metrics=[accuracy])
model.fit(
[tr_pairs[:, 0], tr_pairs[:, 1]],
tr_y,
batch_size=128,
epochs=100,
validation_data=([te_pairs[:, 0], te_pairs[:, 1]], te_y),
callbacks=[tb_cb, cp_cb])
# model.fit(
# [tr_pairs[:, 0], tr_pairs[:, 1]],
# tr_y,
# batch_size=128,
# epochs=100,
# validation_data=([te_pairs[:, 0], te_pairs[:, 1]], te_y),
# callbacks=[tb_cb, cp_cb])
model.fit_generator(tr_gen
,epochs=100
,steps_per_epoch=n_records//batch_size
,use_multiprocessing=True)
model.save('./models/siamese_speech_model-final.h5')
# compute final accuracy on training and test sets
y_pred = model.predict([tr_pairs[:, 0], tr_pairs[:, 1]])
tr_acc = compute_accuracy(tr_y, y_pred)
# y_pred = model.predict([tr_pairs[:, 0], tr_pairs[:, 1]])
# tr_acc = compute_accuracy(tr_y, y_pred)
# print('* Accuracy on training set: %0.2f%%' % (100 * tr_acc))
y_pred = model.predict([te_pairs[:, 0], te_pairs[:, 1]])
te_acc = compute_accuracy(te_y, y_pred)
print('* Accuracy on training set: %0.2f%%' % (100 * tr_acc))
print('* Accuracy on test set: %0.2f%%' % (100 * te_acc))