Merge branch 'master' of ssh://gpuaws/~/repos/speech_scoring
commit
ec08cc7d62
|
|
@ -8,6 +8,7 @@ distributed==1.19.3
|
|||
entrypoints==0.2.3
|
||||
enum34==1.1.6
|
||||
futures==3.1.1
|
||||
graphviz==0.8.1
|
||||
h5py==2.7.1
|
||||
HeapDict==1.0.0
|
||||
html5lib==0.9999999
|
||||
|
|
@ -44,10 +45,11 @@ pkg-resources==0.0.0
|
|||
praat-parselmouth==0.2.0
|
||||
progressbar2==3.34.3
|
||||
prompt-toolkit==1.0.15
|
||||
protobuf==3.4.0
|
||||
protobuf==3.5.0
|
||||
psutil==5.4.0
|
||||
ptyprocess==0.5.2
|
||||
PyAudio==0.2.11
|
||||
pydot==1.2.3
|
||||
Pygments==2.2.0
|
||||
pyparsing==2.2.0
|
||||
pysndfile==1.0.0
|
||||
|
|
@ -66,7 +68,7 @@ sortedcontainers==1.5.7
|
|||
tables==3.4.2
|
||||
tblib==1.3.2
|
||||
tensorflow==1.3.0
|
||||
tensorflow-tensorboard==0.4.0rc1
|
||||
tensorflow-tensorboard==0.4.0rc3
|
||||
terminado==0.6
|
||||
testpath==0.3.1
|
||||
toolz==0.8.2
|
||||
|
|
|
|||
|
|
@ -20,7 +20,7 @@ from tqdm import tqdm
|
|||
def siamese_pairs(rightGroup, wrongGroup):
|
||||
group1 = [r for (i, r) in rightGroup.iterrows()]
|
||||
group2 = [r for (i, r) in wrongGroup.iterrows()]
|
||||
rightWrongPairs = [(g1, g2) for g2 in group2 for g1 in group1]+[(g2, g1) for g2 in group2 for g1 in group1]
|
||||
rightWrongPairs = [(g1, g2) for g2 in group2 for g1 in group1]#+[(g2, g1) for g2 in group2 for g1 in group1]
|
||||
rightRightPairs = [i for i in itertools.permutations(group1, 2)]#+[i for i in itertools.combinations(group2, 2)]
|
||||
def filter_criteria(s1,s2):
|
||||
same = s1['variant'] == s2['variant']
|
||||
|
|
@ -28,8 +28,8 @@ def siamese_pairs(rightGroup, wrongGroup):
|
|||
voice_diff = s1['voice'] != s2['voice']
|
||||
if not same and phon_same:
|
||||
return False
|
||||
if same and not voice_diff:
|
||||
return False
|
||||
# if same and not voice_diff:
|
||||
# return False
|
||||
return True
|
||||
validRWPairs = [i for i in rightWrongPairs if filter_criteria(*i)]
|
||||
validRRPairs = [i for i in rightRightPairs if filter_criteria(*i)]
|
||||
|
|
@ -64,8 +64,8 @@ def create_spectrogram_tfrecords(audio_group='audio',sample_count=0,train_test_r
|
|||
for (w, word_group) in word_group_prog:
|
||||
word_group_prog.set_postfix(word=w,sample_name=sample_name)
|
||||
g = word_group.reset_index()
|
||||
# g['spectrogram'] = apply_by_multiprocessing(g['file_path'],generate_aiff_spectrogram)
|
||||
g['spectrogram'] = apply_by_multiprocessing(g['file_path'],compute_mfcc)
|
||||
g['spectrogram'] = apply_by_multiprocessing(g['file_path'],generate_aiff_spectrogram)
|
||||
# g['spectrogram'] = apply_by_multiprocessing(g['file_path'],compute_mfcc)
|
||||
sample_right = g.loc[g['variant'] == 'low']
|
||||
sample_wrong = g.loc[g['variant'] == 'medium']
|
||||
same, diff = siamese_pairs(sample_right, sample_wrong)
|
||||
|
|
@ -208,11 +208,17 @@ def audio_samples_word_count(audio_group='audio'):
|
|||
|
||||
def record_generator_count(records_file):
|
||||
record_iterator = tf.python_io.tf_record_iterator(path=records_file)
|
||||
count = 0
|
||||
count,spec_n = 0,0
|
||||
for i in record_iterator:
|
||||
example = tf.train.Example()
|
||||
example.ParseFromString(i)
|
||||
spec_n1 = example.features.feature['spec_n1'].int64_list.value[0]
|
||||
spec_n2 = example.features.feature['spec_n2'].int64_list.value[0]
|
||||
spec_n = max([spec_n,spec_n1,spec_n2])
|
||||
import pdb; pdb.set_trace()
|
||||
count+=1
|
||||
record_iterator = tf.python_io.tf_record_iterator(path=records_file)
|
||||
return record_iterator,count
|
||||
return record_iterator,count,spec_n
|
||||
|
||||
def fix_csv(audio_group='audio'):
|
||||
audio_csv_lines = open('./outputs/' + audio_group + '.csv','r').readlines()
|
||||
|
|
@ -253,8 +259,9 @@ if __name__ == '__main__':
|
|||
# create_spectrogram_tfrecords('audio',sample_count=100)
|
||||
# create_spectrogram_tfrecords('story_all',sample_count=25)
|
||||
# fix_csv('story_words_test')
|
||||
#fix_csv('story_phrases')
|
||||
create_spectrogram_tfrecords('story_phrases',sample_count=500,train_test_ratio=0.1)
|
||||
#fix_csv('audio')
|
||||
# create_spectrogram_tfrecords('story_words_test',sample_count=100,train_test_ratio=0.1)
|
||||
record_generator_count()
|
||||
# create_spectrogram_tfrecords('audio',sample_count=50)
|
||||
# read_siamese_tfrecords_generator('audio')
|
||||
# padd_zeros_siamese_tfrecords('audio')
|
||||
|
|
|
|||
|
|
@ -9,6 +9,7 @@ from keras.utils import to_categorical
|
|||
from keras.optimizers import RMSprop
|
||||
from keras.callbacks import TensorBoard, ModelCheckpoint
|
||||
from keras import backend as K
|
||||
from keras.utils import plot_model
|
||||
from speech_tools import create_dir,step_count
|
||||
|
||||
|
||||
|
|
@ -17,10 +18,10 @@ def create_base_rnn_network(input_dim):
|
|||
'''
|
||||
inp = Input(shape=input_dim)
|
||||
# ls0 = LSTM(512, return_sequences=True)(inp)
|
||||
ls1 = Bidirectional(LSTM(128, return_sequences=True))(inp)
|
||||
#ls2 = LSTM(128, return_sequences=True)(ls1)
|
||||
ls1 = LSTM(128, return_sequences=True)(inp)
|
||||
ls2 = LSTM(64, return_sequences=True)(ls1)
|
||||
# ls3 = LSTM(32, return_sequences=True)(ls2)
|
||||
ls4 = LSTM(64)(ls1)
|
||||
ls4 = LSTM(32)(ls2)
|
||||
# d1 = Dense(128, activation='relu')(ls4)
|
||||
#d2 = Dense(64, activation='relu')(ls2)
|
||||
return Model(inp, ls4)
|
||||
|
|
@ -55,7 +56,7 @@ def siamese_model(input_dim):
|
|||
processed_b = base_network(input_b)
|
||||
final_output = dense_classifier([processed_a,processed_b])
|
||||
model = Model([input_a, input_b], final_output)
|
||||
return model
|
||||
return model,base_network
|
||||
|
||||
def write_model_arch(mod,mod_file):
|
||||
model_f = open(mod_file,'w')
|
||||
|
|
@ -79,8 +80,9 @@ def train_siamese(audio_group = 'audio'):
|
|||
tr_gen = tr_gen_fn()
|
||||
input_dim = (n_step, n_features)
|
||||
|
||||
model = siamese_model(input_dim)
|
||||
|
||||
model,base_model = siamese_model(input_dim)
|
||||
plot_model(model,show_shapes=True, to_file=model_dir+'/model.png')
|
||||
plot_model(base_model,show_shapes=True, to_file=model_dir+'/base_model.png')
|
||||
tb_cb = TensorBoard(
|
||||
log_dir=log_dir,
|
||||
histogram_freq=1,
|
||||
|
|
|
|||
|
|
@ -178,7 +178,7 @@ def visualize_results(audio_group='audio'):
|
|||
if __name__ == '__main__':
|
||||
# evaluate_siamese('./outputs/story_words_test.train.tfrecords',audio_group='story_words.gpu',weights ='siamese_speech_model-58-epoch-0.00-acc.h5')
|
||||
# evaluate_siamese('./outputs/story_words.test.tfrecords',audio_group='story_words',weights ='siamese_speech_model-675-epoch-0.00-acc.h5')
|
||||
evaluate_siamese('./outputs/story_words_test.train.tfrecords',audio_group='story_phrases',weights ='siamese_speech_model-231-epoch-0.00-acc.h5')
|
||||
evaluate_siamese('./outputs/story_words_test.train.tfrecords',audio_group='story_words.gpu',weights ='siamese_speech_model-58-epoch-0.00-acc.h5')
|
||||
# play_results('story_words')
|
||||
#inspect_tfrecord('./outputs/story_phrases.test.tfrecords',audio_group='story_phrases')
|
||||
# visualize_results('story_words.gpu')
|
||||
|
|
|
|||
Loading…
Reference in New Issue