implemented streaming tfreccords

master
Malar Kannan 2017-11-09 20:31:29 +05:30
parent 0a4d4fadeb
commit ab452494b3
2 changed files with 57 additions and 21 deletions

View File

@ -183,15 +183,15 @@ def reservoir_sample(iterable, k):
sample[j] = item # replace item with gradually decreasing probability sample[j] = item # replace item with gradually decreasing probability
return sample return sample
def read_siamese_tfrecords_oneshot(audio_group='audio'): def read_siamese_tfrecords_oneshot(audio_group='audio',sample_size=3000):
records_file = os.path.join('./outputs',audio_group+'_padded.tfrecords') records_file = os.path.join('./outputs',audio_group+'_padded.tfrecords')
record_iterator = tf.python_io.tf_record_iterator(path=records_file) record_iterator = tf.python_io.tf_record_iterator(path=records_file)
input_pairs = [] input_pairs = []
output_class = [] output_class = []
const_file = os.path.join('./outputs',audio_group+'.constants') const_file = os.path.join('./outputs',audio_group+'.constants')
(n_spec,n_features,n_records) = pickle.load(open(const_file,'rb')) (n_spec,n_features,n_records) = pickle.load(open(const_file,'rb'))
print('reading tfrecords...') print('reading tfrecords({})...'.format(audio_group))
samples = min([30000,n_records]) samples = min([sample_size,n_records])
input_data = np.zeros((samples,2,n_spec,n_features)) input_data = np.zeros((samples,2,n_spec,n_features))
output_data = np.zeros((samples,2)) output_data = np.zeros((samples,2))
random_samples = enumerate(reservoir_sample(record_iterator,samples)) random_samples = enumerate(reservoir_sample(record_iterator,samples))
@ -205,10 +205,38 @@ def read_siamese_tfrecords_oneshot(audio_group='audio'):
input_data[i] = np.asarray([spec1,spec2]) input_data[i] = np.asarray([spec1,spec2])
output = example.features.feature['output'].int64_list.value output = example.features.feature['output'].int64_list.value
output_data[i] = np.asarray(output) output_data[i] = np.asarray(output)
print('converting to nparray...') # print('converting to nparray...')
tr_pairs,te_pairs,tr_y,te_y = train_test_split(input_data,output_data,test_size=0.1) # tr_pairs,te_pairs,tr_y,te_y = train_test_split(input_data,output_data,test_size=0.1)
result = (tr_pairs,te_pairs,tr_y,te_y,n_spec,n_features) # result = (tr_pairs,te_pairs,tr_y,te_y,n_spec,n_features)
return result return input_data,output_data
def read_siamese_tfrecords_generator(audio_group='audio',batch_size=32):
records_file = os.path.join('./outputs',audio_group+'_padded.tfrecords')
input_pairs = []
output_class = []
const_file = os.path.join('./outputs',audio_group+'.constants')
(n_spec,n_features,n_records) = pickle.load(open(const_file,'rb'))
print('reading tfrecords({})...'.format(audio_group))
def record_generator():
input_data = []
output_data = []
while True:
record_iterator = tf.python_io.tf_record_iterator(path=records_file)
for (i,string_record) in tqdm(enumerate(record_iterator),total=n_records):
example = tf.train.Example()
example.ParseFromString(string_record)
spec1 = np.array(example.features.feature['spec1'].float_list.value).reshape(n_spec,n_features)
spec2 = np.array(example.features.feature['spec2'].float_list.value).reshape(n_spec,n_features)
input_data.append(np.asarray([spec1,spec2]))
output = example.features.feature['output'].int64_list.value
output_data.append(np.asarray(output))
if len(input_data) == batch_size:
input_arr = np.asarray(input_data)
output_arr = np.asarray(output_data)
yield ([input_arr[:, 0], input_arr[:, 1]],output_arr)
input_data = []
output_data = []
return record_generator,n_spec,n_features,n_records
def read_siamese_tfrecords(audio_group='audio'): def read_siamese_tfrecords(audio_group='audio'):
audio_group='story_words_test' audio_group='story_words_test'
@ -324,7 +352,8 @@ if __name__ == '__main__':
# create_spectrogram_tfrecords('story_words_test') # create_spectrogram_tfrecords('story_words_test')
# read_siamese_tfrecords('story_all') # read_siamese_tfrecords('story_all')
# read_siamese_tfrecords('story_words_test') # read_siamese_tfrecords('story_words_test')
pickle_constants('story_words_test') padd_zeros_siamese_tfrecords('story_words')
# pickle_constants('story_words')
# create_spectrogram_tfrecords('audio') # create_spectrogram_tfrecords('audio')
# padd_zeros_siamese_tfrecords('audio') # padd_zeros_siamese_tfrecords('audio')
# create_padded_spectrogram() # create_padded_spectrogram()

View File

@ -2,7 +2,7 @@ from __future__ import absolute_import
from __future__ import print_function from __future__ import print_function
import numpy as np import numpy as np
# from speech_data import speech_model_data # from speech_data import speech_model_data
from speech_data import read_siamese_tfrecords_oneshot from speech_data import read_siamese_tfrecords_oneshot,read_siamese_tfrecords_generator
from keras.models import Model,load_model from keras.models import Model,load_model
from keras.layers import Input, Dense, Dropout, LSTM, Lambda, Concatenate from keras.layers import Input, Dense, Dropout, LSTM, Lambda, Concatenate
from keras.losses import categorical_crossentropy from keras.losses import categorical_crossentropy
@ -82,7 +82,10 @@ def siamese_model(input_dim):
def train_siamese(): def train_siamese():
# the data, shuffled and split between train and test sets # the data, shuffled and split between train and test sets
# tr_pairs, te_pairs, tr_y_e, te_y_e = speech_model_data() # tr_pairs, te_pairs, tr_y_e, te_y_e = speech_model_data()
(tr_pairs,te_pairs,tr_y,te_y,n_step,n_features) = read_siamese_tfrecords_oneshot() batch_size = 512
tr_gen_fn,n_step,n_features,n_records = read_siamese_tfrecords_generator('audio',batch_size)
tr_gen = tr_gen_fn()
(te_pairs,te_y) = read_siamese_tfrecords_oneshot('audio',1000)
# tr_y = to_categorical(tr_y_e, num_classes=2) # tr_y = to_categorical(tr_y_e, num_classes=2)
# te_y = to_categorical(te_y_e, num_classes=2) # te_y = to_categorical(te_y_e, num_classes=2)
input_dim = (n_step, n_features) input_dim = (n_step, n_features)
@ -113,22 +116,26 @@ def train_siamese():
# train # train
rms = RMSprop(lr=0.001) rms = RMSprop(lr=0.001)
model.compile(loss=categorical_crossentropy, optimizer=rms, metrics=[accuracy]) model.compile(loss=categorical_crossentropy, optimizer=rms, metrics=[accuracy])
model.fit( # model.fit(
[tr_pairs[:, 0], tr_pairs[:, 1]], # [tr_pairs[:, 0], tr_pairs[:, 1]],
tr_y, # tr_y,
batch_size=128, # batch_size=128,
epochs=100, # epochs=100,
validation_data=([te_pairs[:, 0], te_pairs[:, 1]], te_y), # validation_data=([te_pairs[:, 0], te_pairs[:, 1]], te_y),
callbacks=[tb_cb, cp_cb]) # callbacks=[tb_cb, cp_cb])
model.fit_generator(tr_gen
,epochs=100
,steps_per_epoch=n_records//batch_size
,use_multiprocessing=True)
model.save('./models/siamese_speech_model-final.h5') model.save('./models/siamese_speech_model-final.h5')
# compute final accuracy on training and test sets # compute final accuracy on training and test sets
y_pred = model.predict([tr_pairs[:, 0], tr_pairs[:, 1]]) # y_pred = model.predict([tr_pairs[:, 0], tr_pairs[:, 1]])
tr_acc = compute_accuracy(tr_y, y_pred) # tr_acc = compute_accuracy(tr_y, y_pred)
# print('* Accuracy on training set: %0.2f%%' % (100 * tr_acc))
y_pred = model.predict([te_pairs[:, 0], te_pairs[:, 1]]) y_pred = model.predict([te_pairs[:, 0], te_pairs[:, 1]])
te_acc = compute_accuracy(te_y, y_pred) te_acc = compute_accuracy(te_y, y_pred)
print('* Accuracy on training set: %0.2f%%' % (100 * tr_acc))
print('* Accuracy on test set: %0.2f%%' % (100 * te_acc)) print('* Accuracy on test set: %0.2f%%' % (100 * te_acc))