Compare commits

..

No commits in common. "f44665e9b26e637afa8c01f5d623b826908124df" and "52bbb69c6556f6e829246b8891ab1a61e69d3d26" have entirely different histories.

2 changed files with 21 additions and 30 deletions

View File

@ -156,7 +156,7 @@ def create_segments_tfrecords(collection_name='story_test_segments',sample_count
f_bounds = [spec_frame(phrase_spec,b) for b in ph_bounds] f_bounds = [spec_frame(phrase_spec,b) for b in ph_bounds]
valid_bounds = [i for i in f_bounds if 0 < i < spec_n] valid_bounds = [i for i in f_bounds if 0 < i < spec_n]
b_frames = np.asarray(valid_bounds) b_frames = np.asarray(valid_bounds)
if len(b_frames) > 0: # print(spec_n,b_frames)
result[b_frames] = 1 result[b_frames] = 1
nonlocal n_records,n_spec,n_features nonlocal n_records,n_spec,n_features
n_spec = max([n_spec,spec_n]) n_spec = max([n_spec,spec_n])
@ -178,10 +178,9 @@ def create_segments_tfrecords(collection_name='story_test_segments',sample_count
word_groups = [i for i in audio_samples.groupby('phrase')] word_groups = [i for i in audio_samples.groupby('phrase')]
wg_sampled = reservoir_sample(word_groups,sample_count) if sample_count > 0 else word_groups wg_sampled = reservoir_sample(word_groups,sample_count) if sample_count > 0 else word_groups
# write_samples(word_groups,'all')
tr_audio_samples,te_audio_samples = train_test_split(wg_sampled,test_size=train_test_ratio) tr_audio_samples,te_audio_samples = train_test_split(wg_sampled,test_size=train_test_ratio)
write_samples(tr_audio_samples,'train') write_samples(tr_audio_samples,'train')
write_samples(te_audio_samples,'test') # write_samples(te_audio_samples,'test')
const_file = './outputs/segments/'+collection_name+'/constants.pkl' const_file = './outputs/segments/'+collection_name+'/constants.pkl'
pickle.dump((n_spec,n_features,n_records),open(const_file,'wb')) pickle.dump((n_spec,n_features,n_records),open(const_file,'wb'))
@ -256,9 +255,9 @@ if __name__ == '__main__':
# plot_random_phrases() # plot_random_phrases()
# fix_csv('story_test_segments') # fix_csv('story_test_segments')
# plot_segments('story_test_segments') # plot_segments('story_test_segments')
# fix_csv('story_words') # fix_csv('story_phrases')
# pass # pass
create_segments_tfrecords('story_words.30', sample_count=36,train_test_ratio=0.1) create_segments_tfrecords('story_phrases', sample_count=100)
# record_generator,input_data,output_data,copy_read_consts = read_segments_tfrecords_generator('story_test') # record_generator,input_data,output_data,copy_read_consts = read_segments_tfrecords_generator('story_test')
# tr_gen = record_generator() # tr_gen = record_generator()
# for i in tr_gen: # for i in tr_gen:

View File

@ -4,10 +4,10 @@ import numpy as np
from keras.models import Model,load_model,model_from_yaml from keras.models import Model,load_model,model_from_yaml
from keras.layers import Input,Concatenate,Lambda, Reshape, Dropout from keras.layers import Input,Concatenate,Lambda, Reshape, Dropout
from keras.layers import Dense,Conv2D, LSTM, Bidirectional, GRU from keras.layers import Dense,Conv2D, LSTM, Bidirectional, GRU
from keras.layers import BatchNormalization,Activation from keras.layers import BatchNormalization
from keras.losses import categorical_crossentropy from keras.losses import categorical_crossentropy
from keras.utils import to_categorical from keras.utils import to_categorical
from keras.optimizers import RMSprop,Adadelta,Adagrad,Adam,Nadam from keras.optimizers import RMSprop
from keras.callbacks import TensorBoard, ModelCheckpoint from keras.callbacks import TensorBoard, ModelCheckpoint
from keras import backend as K from keras import backend as K
from keras.utils import plot_model from keras.utils import plot_model
@ -36,38 +36,30 @@ def ctc_lambda_func(args):
return K.ctc_batch_cost(labels, y_pred, input_length, label_length) return K.ctc_batch_cost(labels, y_pred, input_length, label_length)
def segment_model(input_dim): def segment_model(input_dim):
# input_dim = (100,100,1)
inp = Input(shape=input_dim) inp = Input(shape=input_dim)
cnv1 = Conv2D(filters=32, kernel_size=(5,9))(inp) cnv1 = Conv2D(filters=32, kernel_size=(5,9))(inp)
cnv2 = Conv2D(filters=1, kernel_size=(5,9))(cnv1) cnv2 = Conv2D(filters=1, kernel_size=(5,9))(cnv1)
dr_cnv2 = Dropout(rate=0.95)(cnv2) dr_cnv2 = Dropout(rate=0.95)(cnv2)
# dr_cnv2
cn_rnn_dim = (dr_cnv2.shape[1].value,dr_cnv2.shape[2].value) cn_rnn_dim = (dr_cnv2.shape[1].value,dr_cnv2.shape[2].value)
r_dr_cnv2 = Reshape(target_shape=cn_rnn_dim)(dr_cnv2) r_dr_cnv2 = Reshape(target_shape=cn_rnn_dim)(dr_cnv2)
b_gr1 = Bidirectional(GRU(512, return_sequences=True),merge_mode='sum')(r_dr_cnv2) b_gr1 = Bidirectional(GRU(512, return_sequences=True),merge_mode='sum')(r_dr_cnv2)
# b_gr1
b_gr2 = Bidirectional(GRU(512, return_sequences=True),merge_mode='sum')(b_gr1) b_gr2 = Bidirectional(GRU(512, return_sequences=True),merge_mode='sum')(b_gr1)
b_gr3 = Bidirectional(GRU(512, return_sequences=True),merge_mode='sum')(b_gr2) b_gr3 = Bidirectional(GRU(512, return_sequences=True),merge_mode='sum')(b_gr2)
# b_gr3
oup = Dense(2, activation='softmax')(b_gr3) oup = Dense(2, activation='softmax')(b_gr3)
# oup
return Model(inp, oup) return Model(inp, oup)
def simple_segment_model(input_dim): def simple_segment_model(input_dim):
inp = Input(shape=input_dim) inp = Input(shape=input_dim)
b_gr1 = Bidirectional(LSTM(32, return_sequences=True))(inp) b_gr1 = Bidirectional(GRU(256, return_sequences=True),merge_mode='sum')(inp)
b_gr1 = Bidirectional(LSTM(16, return_sequences=True),merge_mode='sum')(b_gr1) # b_gr1
b_gr1 = LSTM(1, return_sequences=True,activation='softmax')(b_gr1) b_gr2 = Bidirectional(GRU(64, return_sequences=True),merge_mode='sum')(b_gr1)
# b_gr1 = LSTM(4, return_sequences=True)(b_gr1) b_gr3 = Bidirectional(GRU(1, return_sequences=True),merge_mode='sum')(b_gr2)
# b_gr1 = LSTM(2, return_sequences=True)(b_gr1) oup = Reshape(target_shape=(input_dim[0],))(b_gr3)
# bn_b_gr1 = BatchNormalization(momentum=0.98)(b_gr1)
# b_gr2 = GRU(64, return_sequences=True)(b_gr1)
# bn_b_gr2 = BatchNormalization(momentum=0.98)(b_gr2)
# d1 = Dense(32)(b_gr2)
# bn_d1 = BatchNormalization(momentum=0.98)(d1)
# bn_da1 = Activation('relu')(bn_d1)
# d2 = Dense(8)(bn_da1)
# bn_d2 = BatchNormalization(momentum=0.98)(d2)
# bn_da2 = Activation('relu')(bn_d2)
# d3 = Dense(1)(b_gr1)
# # bn_d3 = BatchNormalization(momentum=0.98)(d3)
# bn_da3 = Activation('softmax')(d3)
oup = Reshape(target_shape=(input_dim[0],))(b_gr1)
return Model(inp, oup) return Model(inp, oup)
def write_model_arch(mod,mod_file): def write_model_arch(mod,mod_file):
@ -83,7 +75,7 @@ def load_model_arch(mod_file):
def train_segment(collection_name = 'test',resume_weights='',initial_epoch=0): def train_segment(collection_name = 'test',resume_weights='',initial_epoch=0):
# collection_name = 'story_test' # collection_name = 'story_test'
batch_size = 128 batch_size = 64
# batch_size = 4 # batch_size = 4
model_dir = './models/segment/'+collection_name model_dir = './models/segment/'+collection_name
create_dir(model_dir) create_dir(model_dir)
@ -119,8 +111,8 @@ def train_segment(collection_name = 'test',resume_weights='',initial_epoch=0):
mode='auto', mode='auto',
period=1) period=1)
# train # train
opt = RMSprop() rms = RMSprop()
model.compile(loss=categorical_crossentropy, optimizer=opt, metrics=[accuracy]) model.compile(loss=categorical_crossentropy, optimizer=rms, metrics=[accuracy])
write_model_arch(model,model_dir+'/speech_segment_model_arch.yaml') write_model_arch(model,model_dir+'/speech_segment_model_arch.yaml')
epoch_n_steps = step_count(n_records,batch_size) epoch_n_steps = step_count(n_records,batch_size)
if resume_weights != '': if resume_weights != '':
@ -141,4 +133,4 @@ def train_segment(collection_name = 'test',resume_weights='',initial_epoch=0):
if __name__ == '__main__': if __name__ == '__main__':
# pass # pass
train_segment('story_words')#,'./models/segment/story_phrases.1000/speech_segment_model-final.h5',1001) train_segment('story_phrases','./models/segment/story_phrases.1000/speech_segment_model-final.h5',1001)