trying to overfit the model to identify false-negative types
parent
1190312def
commit
bb72c4045e
|
|
@ -0,0 +1,77 @@
|
||||||
|
bleach==1.5.0
|
||||||
|
click==6.7
|
||||||
|
cloudpickle==0.4.1
|
||||||
|
cycler==0.10.0
|
||||||
|
dask==0.15.4
|
||||||
|
decorator==4.1.2
|
||||||
|
distributed==1.19.3
|
||||||
|
entrypoints==0.2.3
|
||||||
|
enum34==1.1.6
|
||||||
|
futures==3.1.1
|
||||||
|
h5py==2.7.1
|
||||||
|
HeapDict==1.0.0
|
||||||
|
html5lib==0.9999999
|
||||||
|
ipykernel==4.6.1
|
||||||
|
ipython==6.2.1
|
||||||
|
ipython-genutils==0.2.0
|
||||||
|
ipywidgets==7.0.3
|
||||||
|
jedi==0.11.0
|
||||||
|
Jinja2==2.9.6
|
||||||
|
jsonschema==2.6.0
|
||||||
|
jupyter==1.0.0
|
||||||
|
jupyter-client==5.1.0
|
||||||
|
jupyter-console==5.2.0
|
||||||
|
jupyter-core==4.3.0
|
||||||
|
Keras==2.0.8
|
||||||
|
locket==0.2.0
|
||||||
|
Markdown==2.6.9
|
||||||
|
MarkupSafe==1.0
|
||||||
|
matplotlib==2.1.0
|
||||||
|
mistune==0.7.4
|
||||||
|
msgpack-python==0.4.8
|
||||||
|
nbconvert==5.3.1
|
||||||
|
nbformat==4.4.0
|
||||||
|
notebook==5.2.0
|
||||||
|
numexpr==2.6.4
|
||||||
|
numpy==1.13.3
|
||||||
|
pandas==0.20.3
|
||||||
|
pandocfilters==1.4.2
|
||||||
|
parso==0.1.0
|
||||||
|
partd==0.3.8
|
||||||
|
pexpect==4.2.1
|
||||||
|
pickleshare==0.7.4
|
||||||
|
pkg-resources==0.0.0
|
||||||
|
progressbar2==3.34.3
|
||||||
|
prompt-toolkit==1.0.15
|
||||||
|
protobuf==3.4.0
|
||||||
|
psutil==5.4.0
|
||||||
|
ptyprocess==0.5.2
|
||||||
|
PyAudio==0.2.11
|
||||||
|
Pygments==2.2.0
|
||||||
|
pyparsing==2.2.0
|
||||||
|
pysndfile==1.0.0
|
||||||
|
python-dateutil==2.6.1
|
||||||
|
python-utils==2.2.0
|
||||||
|
pytz==2017.2
|
||||||
|
PyYAML==3.12
|
||||||
|
pyzmq==16.0.2
|
||||||
|
qtconsole==4.3.1
|
||||||
|
scikit-learn==0.19.0
|
||||||
|
scipy==0.19.1
|
||||||
|
simplegeneric==0.8.1
|
||||||
|
six==1.11.0
|
||||||
|
sortedcontainers==1.5.7
|
||||||
|
tables==3.4.2
|
||||||
|
tblib==1.3.2
|
||||||
|
tensorflow==1.3.0
|
||||||
|
tensorflow-tensorboard==0.4.0rc1
|
||||||
|
terminado==0.6
|
||||||
|
testpath==0.3.1
|
||||||
|
toolz==0.8.2
|
||||||
|
tornado==4.5.2
|
||||||
|
tqdm==4.19.4
|
||||||
|
traitlets==4.3.2
|
||||||
|
wcwidth==0.1.7
|
||||||
|
Werkzeug==0.12.2
|
||||||
|
widgetsnbextension==3.0.6
|
||||||
|
zict==0.1.3
|
||||||
|
|
@ -20,9 +20,10 @@ def siamese_pairs(rightGroup, wrongGroup):
|
||||||
group1 = [r for (i, r) in rightGroup.iterrows()]
|
group1 = [r for (i, r) in rightGroup.iterrows()]
|
||||||
group2 = [r for (i, r) in wrongGroup.iterrows()]
|
group2 = [r for (i, r) in wrongGroup.iterrows()]
|
||||||
rightWrongPairs = [(g1, g2) for g2 in group2 for g1 in group1]
|
rightWrongPairs = [(g1, g2) for g2 in group2 for g1 in group1]
|
||||||
rightRightPairs = [i for i in itertools.combinations(group1, 2)]
|
rightRightPairs = [i for i in itertools.combinations(group1, 2)]#+[i for i in itertools.combinations(group2, 2)]
|
||||||
random.shuffle(rightWrongPairs)
|
# random.shuffle(rightWrongPairs)
|
||||||
random.shuffle(rightRightPairs)
|
# random.shuffle(rightRightPairs)
|
||||||
|
# return rightRightPairs[:10],rightWrongPairs[:10]
|
||||||
return rightRightPairs[:32],rightWrongPairs[:32]
|
return rightRightPairs[:32],rightWrongPairs[:32]
|
||||||
|
|
||||||
|
|
||||||
|
|
@ -45,8 +46,7 @@ def create_spectrogram_tfrecords(audio_group='audio',sample_count=0):
|
||||||
n_records,n_spec,n_features = 0,0,0
|
n_records,n_spec,n_features = 0,0,0
|
||||||
|
|
||||||
def write_samples(wg,sample_name):
|
def write_samples(wg,sample_name):
|
||||||
wg_sampled = reservoir_sample(wg,sample_count) if sample_count > 0 else wg
|
word_group_prog = tqdm(wg,desc='Computing spectrogram')
|
||||||
word_group_prog = tqdm(wg_sampled,desc='Computing spectrogram')
|
|
||||||
record_file = './outputs/{}.{}.tfrecords'.format(audio_group,sample_name)
|
record_file = './outputs/{}.{}.tfrecords'.format(audio_group,sample_name)
|
||||||
writer = tf.python_io.TFRecordWriter(record_file)
|
writer = tf.python_io.TFRecordWriter(record_file)
|
||||||
for (w, word_group) in word_group_prog:
|
for (w, word_group) in word_group_prog:
|
||||||
|
|
@ -100,7 +100,8 @@ def create_spectrogram_tfrecords(audio_group='audio',sample_count=0):
|
||||||
writer.close()
|
writer.close()
|
||||||
|
|
||||||
word_groups = [i for i in audio_samples.groupby('word')]
|
word_groups = [i for i in audio_samples.groupby('word')]
|
||||||
tr_audio_samples,te_audio_samples = train_test_split(word_groups,test_size=0.1)
|
wg_sampled = reservoir_sample(word_groups,sample_count) if sample_count > 0 else word_groups
|
||||||
|
tr_audio_samples,te_audio_samples = train_test_split(wg_sampled,test_size=0.1)
|
||||||
write_samples(tr_audio_samples,'train')
|
write_samples(tr_audio_samples,'train')
|
||||||
write_samples(te_audio_samples,'test')
|
write_samples(te_audio_samples,'test')
|
||||||
const_file = os.path.join('./outputs',audio_group+'.constants')
|
const_file = os.path.join('./outputs',audio_group+'.constants')
|
||||||
|
|
@ -124,7 +125,7 @@ def reservoir_sample(iterable, k):
|
||||||
sample[j] = item # replace item with gradually decreasing probability
|
sample[j] = item # replace item with gradually decreasing probability
|
||||||
return sample
|
return sample
|
||||||
|
|
||||||
def read_siamese_tfrecords_generator(audio_group='audio',batch_size=32,sample_size=100):
|
def read_siamese_tfrecords_generator(audio_group='audio',batch_size=32,test_size=100):
|
||||||
records_file = os.path.join('./outputs',audio_group+'.train.tfrecords')
|
records_file = os.path.join('./outputs',audio_group+'.train.tfrecords')
|
||||||
input_pairs = []
|
input_pairs = []
|
||||||
output_class = []
|
output_class = []
|
||||||
|
|
@ -160,13 +161,14 @@ def read_siamese_tfrecords_generator(audio_group='audio',batch_size=32,sample_si
|
||||||
# Read test in one-shot
|
# Read test in one-shot
|
||||||
te_records_file = os.path.join('./outputs',audio_group+'.test.tfrecords')
|
te_records_file = os.path.join('./outputs',audio_group+'.test.tfrecords')
|
||||||
te_re_iterator = tf.python_io.tf_record_iterator(path=records_file)
|
te_re_iterator = tf.python_io.tf_record_iterator(path=records_file)
|
||||||
|
te_n_records = len([i for i in te_re_iterator])
|
||||||
|
te_re_iterator = tf.python_io.tf_record_iterator(path=records_file)
|
||||||
print('reading tfrecords({}-test)...'.format(audio_group))
|
print('reading tfrecords({}-test)...'.format(audio_group))
|
||||||
samples = min([sample_size,n_records])
|
test_size = min([test_size,te_n_records]) if test_size > 0 else te_n_records
|
||||||
# samples = n_records
|
input_data = np.zeros((test_size,2,n_spec,n_features))
|
||||||
input_data = np.zeros((samples,2,n_spec,n_features))
|
output_data = np.zeros((test_size,2))
|
||||||
output_data = np.zeros((samples,2))
|
random_samples = enumerate(reservoir_sample(te_re_iterator,test_size))
|
||||||
random_samples = enumerate(reservoir_sample(te_re_iterator,samples))
|
for (i,string_record) in tqdm(random_samples,total=test_size):
|
||||||
for (i,string_record) in tqdm(random_samples,total=samples):
|
|
||||||
example = tf.train.Example()
|
example = tf.train.Example()
|
||||||
example.ParseFromString(string_record)
|
example.ParseFromString(string_record)
|
||||||
spec_n1 = example.features.feature['spec_n1'].int64_list.value[0]
|
spec_n1 = example.features.feature['spec_n1'].int64_list.value[0]
|
||||||
|
|
@ -187,7 +189,7 @@ def audio_samples_word_count(audio_group='audio'):
|
||||||
return len(audio_samples.groupby(audio_samples['word']))
|
return len(audio_samples.groupby(audio_samples['word']))
|
||||||
|
|
||||||
def fix_csv(audio_group='audio'):
|
def fix_csv(audio_group='audio'):
|
||||||
audio_csv_lines = open('./outputs/' + audio_group + '.csv','r').readlines()
|
audio_csv_lines = open('./outputs/' + audio_group + '.csv.orig','r').readlines()
|
||||||
audio_csv_data = [i.strip().split(',') for i in audio_csv_lines]
|
audio_csv_data = [i.strip().split(',') for i in audio_csv_lines]
|
||||||
proper_rows = [i for i in audio_csv_data if len(i) == 7]
|
proper_rows = [i for i in audio_csv_data if len(i) == 7]
|
||||||
with open('./outputs/' + audio_group + '.csv','w') as fixed_csv:
|
with open('./outputs/' + audio_group + '.csv','w') as fixed_csv:
|
||||||
|
|
@ -220,10 +222,13 @@ if __name__ == '__main__':
|
||||||
# read_siamese_tfrecords('story_all')
|
# read_siamese_tfrecords('story_all')
|
||||||
# read_siamese_tfrecords('story_words_test')
|
# read_siamese_tfrecords('story_words_test')
|
||||||
# padd_zeros_siamese_tfrecords('story_words')
|
# padd_zeros_siamese_tfrecords('story_words')
|
||||||
# fix_csv()
|
# fix_csv('story_words')
|
||||||
# pickle_constants('story_words')
|
# pickle_constants('story_words')
|
||||||
# create_spectrogram_tfrecords('audio',sample_count=100)
|
# create_spectrogram_tfrecords('audio',sample_count=100)
|
||||||
read_siamese_tfrecords_generator('audio')
|
# create_spectrogram_tfrecords('story_all',sample_count=25)
|
||||||
|
create_spectrogram_tfrecords('story_words',sample_count=10)
|
||||||
|
# create_spectrogram_tfrecords('audio',sample_count=50)
|
||||||
|
# read_siamese_tfrecords_generator('audio')
|
||||||
# padd_zeros_siamese_tfrecords('audio')
|
# padd_zeros_siamese_tfrecords('audio')
|
||||||
# create_padded_spectrogram()
|
# create_padded_spectrogram()
|
||||||
# create_speech_pairs_data()
|
# create_speech_pairs_data()
|
||||||
|
|
|
||||||
|
|
@ -2,7 +2,7 @@ from __future__ import absolute_import
|
||||||
from __future__ import print_function
|
from __future__ import print_function
|
||||||
import numpy as np
|
import numpy as np
|
||||||
# from speech_data import speech_model_data
|
# from speech_data import speech_model_data
|
||||||
from speech_data import read_siamese_tfrecords_oneshot,read_siamese_tfrecords_generator
|
from speech_data import read_siamese_tfrecords_generator
|
||||||
from keras.models import Model,load_model
|
from keras.models import Model,load_model
|
||||||
from keras.layers import Input, Dense, Dropout, LSTM, Lambda, Concatenate
|
from keras.layers import Input, Dense, Dropout, LSTM, Lambda, Concatenate
|
||||||
from keras.losses import categorical_crossentropy
|
from keras.losses import categorical_crossentropy
|
||||||
|
|
@ -14,42 +14,46 @@ from keras.callbacks import TensorBoard, ModelCheckpoint
|
||||||
from keras import backend as K
|
from keras import backend as K
|
||||||
|
|
||||||
def create_dir(direc):
|
def create_dir(direc):
|
||||||
|
import os
|
||||||
if not os.path.exists(direc):
|
if not os.path.exists(direc):
|
||||||
os.makedirs(direc)
|
os.makedirs(direc)
|
||||||
|
|
||||||
def euclidean_distance(vects):
|
# def euclidean_distance(vects):
|
||||||
x, y = vects
|
# x, y = vects
|
||||||
return K.sqrt(
|
# return K.sqrt(
|
||||||
K.maximum(K.sum(K.square(x - y), axis=1, keepdims=True), K.epsilon()))
|
# K.maximum(K.sum(K.square(x - y), axis=1, keepdims=True), K.epsilon()))
|
||||||
|
#
|
||||||
|
#
|
||||||
def eucl_dist_output_shape(shapes):
|
# def eucl_dist_output_shape(shapes):
|
||||||
shape1, shape2 = shapes
|
# shape1, shape2 = shapes
|
||||||
return (shape1[0], 1)
|
# return (shape1[0], 1)
|
||||||
|
#
|
||||||
|
#
|
||||||
def contrastive_loss(y_true, y_pred):
|
# def contrastive_loss(y_true, y_pred):
|
||||||
'''Contrastive loss from Hadsell-et-al.'06
|
# '''Contrastive loss from Hadsell-et-al.'06
|
||||||
http://yann.lecun.com/exdb/publis/pdf/hadsell-chopra-lecun-06.pdf
|
# http://yann.lecun.com/exdb/publis/pdf/hadsell-chopra-lecun-06.pdf
|
||||||
'''
|
# '''
|
||||||
return K.mean(y_true * K.square(y_pred) +
|
# return K.mean(y_true * K.square(y_pred) +
|
||||||
(1 - y_true) * K.square(K.maximum(1 - y_pred, 0)))
|
# (1 - y_true) * K.square(K.maximum(1 - y_pred, 0)))
|
||||||
|
|
||||||
def create_base_rnn_network(input_dim):
|
def create_base_rnn_network(input_dim):
|
||||||
'''Base network to be shared (eq. to feature extraction).
|
'''Base network to be shared (eq. to feature extraction).
|
||||||
'''
|
'''
|
||||||
inp = Input(shape=input_dim)
|
inp = Input(shape=input_dim)
|
||||||
ls1 = LSTM(256, return_sequences=True)(inp)
|
ls0 = LSTM(512, return_sequences=True)(inp)
|
||||||
|
ls1 = LSTM(256, return_sequences=True)(ls0)
|
||||||
ls2 = LSTM(128, return_sequences=True)(ls1)
|
ls2 = LSTM(128, return_sequences=True)(ls1)
|
||||||
# ls3 = LSTM(32, return_sequences=True)(ls2)
|
# ls3 = LSTM(32, return_sequences=True)(ls2)
|
||||||
ls4 = LSTM(64)(ls2)
|
ls4 = LSTM(64)(ls2)
|
||||||
|
d1 = Dense(128, activation='relu')(ls4)
|
||||||
|
d2 = Dense(64, activation='relu')(d1)
|
||||||
return Model(inp, ls4)
|
return Model(inp, ls4)
|
||||||
|
|
||||||
|
|
||||||
def compute_accuracy(y_true, y_pred):
|
def compute_accuracy(y_true, y_pred):
|
||||||
'''Compute classification accuracy with a fixed threshold on distances.
|
'''Compute classification accuracy with a fixed threshold on distances.
|
||||||
'''
|
'''
|
||||||
pred = y_pred.ravel() < 0.5
|
pred = y_pred.ravel() > 0.5
|
||||||
return np.mean(pred == y_true)
|
return np.mean(pred == y_true)
|
||||||
|
|
||||||
|
|
||||||
|
|
@ -60,11 +64,12 @@ def accuracy(y_true, y_pred):
|
||||||
|
|
||||||
def dense_classifier(processed):
|
def dense_classifier(processed):
|
||||||
conc_proc = Concatenate()(processed)
|
conc_proc = Concatenate()(processed)
|
||||||
d1 = Dense(16, activation='relu')(conc_proc)
|
d1 = Dense(64, activation='relu')(conc_proc)
|
||||||
# dr1 = Dropout(0.1)(d1)
|
# dr1 = Dropout(0.1)(d1)
|
||||||
d2 = Dense(8, activation='relu')(d1)
|
d2 = Dense(128, activation='relu')(d1)
|
||||||
|
d3 = Dense(8, activation='relu')(d2)
|
||||||
# dr2 = Dropout(0.1)(d2)
|
# dr2 = Dropout(0.1)(d2)
|
||||||
return Dense(2, activation='softmax')(d2)
|
return Dense(2, activation='softmax')(d3)
|
||||||
|
|
||||||
def siamese_model(input_dim):
|
def siamese_model(input_dim):
|
||||||
# input_dim = (15, 1654)
|
# input_dim = (15, 1654)
|
||||||
|
|
@ -85,10 +90,10 @@ def siamese_model(input_dim):
|
||||||
def train_siamese(audio_group = 'audio'):
|
def train_siamese(audio_group = 'audio'):
|
||||||
# the data, shuffled and split between train and test sets
|
# the data, shuffled and split between train and test sets
|
||||||
# tr_pairs, te_pairs, tr_y_e, te_y_e = speech_model_data()
|
# tr_pairs, te_pairs, tr_y_e, te_y_e = speech_model_data()
|
||||||
batch_size = 512
|
batch_size = 128
|
||||||
model_dir = './models/'+audio_group
|
model_dir = './models/'+audio_group
|
||||||
create_dir(model_dir)
|
create_dir(model_dir)
|
||||||
tr_gen_fn,te_pairs,te_y,n_step,n_features,n_records = read_siamese_tfrecords_generator(audio_group,batch_size,300)
|
tr_gen_fn,te_pairs,te_y,n_step,n_features,n_records = read_siamese_tfrecords_generator(audio_group,batch_size,256)
|
||||||
tr_gen = tr_gen_fn()
|
tr_gen = tr_gen_fn()
|
||||||
# tr_y = to_categorical(tr_y_e, num_classes=2)
|
# tr_y = to_categorical(tr_y_e, num_classes=2)
|
||||||
# te_y = to_categorical(te_y_e, num_classes=2)
|
# te_y = to_categorical(te_y_e, num_classes=2)
|
||||||
|
|
@ -113,12 +118,12 @@ def train_siamese(audio_group = 'audio'):
|
||||||
cp_file_fmt,
|
cp_file_fmt,
|
||||||
monitor='val_loss',
|
monitor='val_loss',
|
||||||
verbose=0,
|
verbose=0,
|
||||||
save_best_only=False,
|
save_best_only=True,
|
||||||
save_weights_only=False,
|
save_weights_only=True,
|
||||||
mode='auto',
|
mode='auto',
|
||||||
period=1)
|
period=1)
|
||||||
# train
|
# train
|
||||||
rms = RMSprop(lr=0.001)
|
rms = RMSprop()#lr=0.001
|
||||||
model.compile(loss=categorical_crossentropy, optimizer=rms, metrics=[accuracy])
|
model.compile(loss=categorical_crossentropy, optimizer=rms, metrics=[accuracy])
|
||||||
# model.fit(
|
# model.fit(
|
||||||
# [tr_pairs[:, 0], tr_pairs[:, 1]],
|
# [tr_pairs[:, 0], tr_pairs[:, 1]],
|
||||||
|
|
@ -128,11 +133,11 @@ def train_siamese(audio_group = 'audio'):
|
||||||
# validation_data=([te_pairs[:, 0], te_pairs[:, 1]], te_y),
|
# validation_data=([te_pairs[:, 0], te_pairs[:, 1]], te_y),
|
||||||
# callbacks=[tb_cb, cp_cb])
|
# callbacks=[tb_cb, cp_cb])
|
||||||
model.fit_generator(tr_gen
|
model.fit_generator(tr_gen
|
||||||
,epochs=100
|
,epochs=1000
|
||||||
,steps_per_epoch=n_records//batch_size
|
,steps_per_epoch=n_records//batch_size
|
||||||
,validation_data=([te_pairs[:, 0], te_pairs[:, 1]], te_y)
|
,validation_data=([te_pairs[:, 0], te_pairs[:, 1]], te_y)
|
||||||
,use_multiprocessing=True)
|
,use_multiprocessing=True)
|
||||||
|
# ,callbacks=[tb_cb, cp_cb])
|
||||||
model.save(model_dir+'/siamese_speech_model-final.h5')
|
model.save(model_dir+'/siamese_speech_model-final.h5')
|
||||||
# compute final accuracy on training and test sets
|
# compute final accuracy on training and test sets
|
||||||
# y_pred = model.predict([tr_pairs[:, 0], tr_pairs[:, 1]])
|
# y_pred = model.predict([tr_pairs[:, 0], tr_pairs[:, 1]])
|
||||||
|
|
@ -146,4 +151,5 @@ def train_siamese(audio_group = 'audio'):
|
||||||
|
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
train_siamese()
|
train_siamese('story_words')
|
||||||
|
# train_siamese('audio')
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue