implemented padding and pipeline is complete
parent
41b3f1a9fe
commit
b8a9f87031
|
|
@ -95,17 +95,34 @@ def create_spectrogram_tfrecords(audio_group='audio'):
|
||||||
writer.write(example.SerializeToString())
|
writer.write(example.SerializeToString())
|
||||||
writer.close()
|
writer.close()
|
||||||
|
|
||||||
|
def padd_zeros(spgr, max_samples):
|
||||||
|
return np.lib.pad(spgr, [(0, max_samples - spgr.shape[0]), (0, 0)],
|
||||||
|
'constant')
|
||||||
|
|
||||||
|
def find_max_n(trf):
|
||||||
|
max_n = 0
|
||||||
|
max_n_it = tf.python_io.tf_record_iterator(path=trf)
|
||||||
|
for string_record in max_n_it:
|
||||||
|
example = tf.train.Example()
|
||||||
|
example.ParseFromString(string_record)
|
||||||
|
spec_n1 = example.features.feature['spec_n1'].int64_list.value[0]
|
||||||
|
spec_n2 = example.features.feature['spec_n2'].int64_list.value[0]
|
||||||
|
max_n = max([max_n,spec_n1,spec_n2])
|
||||||
|
return max_n
|
||||||
|
|
||||||
def read_siamese_tfrecords(audio_group='audio'):
|
def read_siamese_tfrecords(audio_group='audio'):
|
||||||
records_file = os.path.join('./outputs',audio_group+'.tfrecords')
|
records_file = os.path.join('./outputs',audio_group+'.tfrecords')
|
||||||
record_iterator = tf.python_io.tf_record_iterator(path=records_file)
|
record_iterator = tf.python_io.tf_record_iterator(path=records_file)
|
||||||
|
# input1,input2 = [],[]
|
||||||
input_pairs = []
|
input_pairs = []
|
||||||
output_class = []
|
output_class = []
|
||||||
input_words = []
|
max_n = find_max_n(records_file)
|
||||||
|
spec_w1 = 0
|
||||||
for string_record in record_iterator:
|
for string_record in record_iterator:
|
||||||
example = tf.train.Example()
|
example = tf.train.Example()
|
||||||
example.ParseFromString(string_record)
|
example.ParseFromString(string_record)
|
||||||
word = example.features.feature['word'].bytes_list.value[0]
|
# word = example.features.feature['word'].bytes_list.value[0]
|
||||||
input_words.append(word)
|
# input_words.append(word)
|
||||||
example.features.feature['spec2'].float_list.value[0]
|
example.features.feature['spec2'].float_list.value[0]
|
||||||
spec_n1 = example.features.feature['spec_n1'].int64_list.value[0]
|
spec_n1 = example.features.feature['spec_n1'].int64_list.value[0]
|
||||||
spec_n2 = example.features.feature['spec_n2'].int64_list.value[0]
|
spec_n2 = example.features.feature['spec_n2'].int64_list.value[0]
|
||||||
|
|
@ -113,10 +130,23 @@ def read_siamese_tfrecords(audio_group='audio'):
|
||||||
spec_w2 = example.features.feature['spec_w2'].int64_list.value[0]
|
spec_w2 = example.features.feature['spec_w2'].int64_list.value[0]
|
||||||
spec1 = np.array(example.features.feature['spec1'].float_list.value).reshape(spec_n1,spec_w1)
|
spec1 = np.array(example.features.feature['spec1'].float_list.value).reshape(spec_n1,spec_w1)
|
||||||
spec2 = np.array(example.features.feature['spec2'].float_list.value).reshape(spec_n2,spec_w2)
|
spec2 = np.array(example.features.feature['spec2'].float_list.value).reshape(spec_n2,spec_w2)
|
||||||
input_pairs.append([spec1,spec2])
|
p_spec1,p_spec2 = padd_zeros(spec1,max_n),padd_zeros(spec2,max_n)
|
||||||
|
# input1.append(spec1)
|
||||||
|
# input2.append(spec2)
|
||||||
|
input_pairs.append(np.asarray([p_spec1,p_spec2]))
|
||||||
|
# input_pairs.append([spec1,spec2])
|
||||||
output = example.features.feature['output'].int64_list.value
|
output = example.features.feature['output'].int64_list.value
|
||||||
output_class.append(output)
|
output_class.append(np.asarray(output))
|
||||||
return input_pairs,output_class
|
n_features = spec_w1
|
||||||
|
# if len(input_pairs) > 50:
|
||||||
|
# break
|
||||||
|
input_data,output_data = np.asarray(input_pairs),np.asarray(output_class)
|
||||||
|
# import pdb; pdb.set_trace()
|
||||||
|
# tr_x1,te_x1,tr_x2,te_x2,tr_y,te_y = train_test_split(input1,input2,output_class)
|
||||||
|
tr_pairs,te_pairs,tr_y,te_y = train_test_split(input_data,output_data)
|
||||||
|
# return (tr_x1,te_x1,tr_x2,te_x2,tr_y,te_y)
|
||||||
|
n_step,n_features = int(max_n),int(spec_w1)
|
||||||
|
return (tr_pairs,te_pairs,tr_y,te_y,n_step,n_features)
|
||||||
|
|
||||||
def audio_samples_word_count(audio_group='audio'):
|
def audio_samples_word_count(audio_group='audio'):
|
||||||
audio_group = 'story_all'
|
audio_group = 'story_all'
|
||||||
|
|
@ -157,7 +187,8 @@ if __name__ == '__main__':
|
||||||
# create_spectrogram_data()
|
# create_spectrogram_data()
|
||||||
# create_spectrogram_data('story_words')
|
# create_spectrogram_data('story_words')
|
||||||
# create_spectrogram_tfrecords('story_words')
|
# create_spectrogram_tfrecords('story_words')
|
||||||
create_spectrogram_tfrecords('story_all')
|
# create_spectrogram_tfrecords('story_all')
|
||||||
|
read_siamese_tfrecords('story_all')
|
||||||
# create_padded_spectrogram()
|
# create_padded_spectrogram()
|
||||||
# create_speech_pairs_data()
|
# create_speech_pairs_data()
|
||||||
# print(speech_model_data())
|
# print(speech_model_data())
|
||||||
|
|
|
||||||
|
|
@ -82,10 +82,10 @@ def siamese_model(input_dim):
|
||||||
def train_siamese():
|
def train_siamese():
|
||||||
# the data, shuffled and split between train and test sets
|
# the data, shuffled and split between train and test sets
|
||||||
# tr_pairs, te_pairs, tr_y_e, te_y_e = speech_model_data()
|
# tr_pairs, te_pairs, tr_y_e, te_y_e = speech_model_data()
|
||||||
pairs,y = read_siamese_tfrecords('story_words')
|
(tr_pairs,te_pairs,tr_y,te_y,n_step,n_features) = read_siamese_tfrecords('story_words')
|
||||||
# tr_y = to_categorical(tr_y_e, num_classes=2)
|
# tr_y = to_categorical(tr_y_e, num_classes=2)
|
||||||
# te_y = to_categorical(te_y_e, num_classes=2)
|
# te_y = to_categorical(te_y_e, num_classes=2)
|
||||||
input_dim = (None, 1654)
|
input_dim = (n_step, n_features)
|
||||||
|
|
||||||
model = siamese_model(input_dim)
|
model = siamese_model(input_dim)
|
||||||
|
|
||||||
|
|
@ -114,11 +114,11 @@ def train_siamese():
|
||||||
rms = RMSprop(lr=0.001)
|
rms = RMSprop(lr=0.001)
|
||||||
model.compile(loss=categorical_crossentropy, optimizer=rms, metrics=[accuracy])
|
model.compile(loss=categorical_crossentropy, optimizer=rms, metrics=[accuracy])
|
||||||
model.fit(
|
model.fit(
|
||||||
[tr_pairs[:, 0], tr_pairs[:, 1]],
|
[tr_x1, tr_x2],
|
||||||
tr_y,
|
tr_y,
|
||||||
batch_size=128,
|
batch_size=128,
|
||||||
epochs=50,
|
epochs=50,
|
||||||
validation_data=([te_pairs[:, 0], te_pairs[:, 1]], te_y),
|
validation_data=([tr_pairs[:, 0], tr_pairs[:, 1]], te_y),
|
||||||
callbacks=[tb_cb, cp_cb])
|
callbacks=[tb_cb, cp_cb])
|
||||||
|
|
||||||
model.save('./models/siamese_speech_model-final.h5')
|
model.save('./models/siamese_speech_model-final.h5')
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue