diff --git a/requirements-linux.txt b/requirements-linux.txt index dba434a..899b69f 100644 --- a/requirements-linux.txt +++ b/requirements-linux.txt @@ -40,6 +40,7 @@ parso==0.1.0 partd==0.3.8 pexpect==4.2.1 pickleshare==0.7.4 +pkg-resources==0.0.0 progressbar2==3.34.3 prompt-toolkit==1.0.15 protobuf==3.4.0 @@ -57,6 +58,7 @@ pyzmq==16.0.2 qtconsole==4.3.1 scikit-learn==0.19.0 scipy==0.19.1 +seaborn==0.8.1 simplegeneric==0.8.1 six==1.11.0 sortedcontainers==1.5.7 diff --git a/speech_data.py b/speech_data.py index 9169058..48d2ad8 100644 --- a/speech_data.py +++ b/speech_data.py @@ -21,10 +21,21 @@ def siamese_pairs(rightGroup, wrongGroup): group2 = [r for (i, r) in wrongGroup.iterrows()] rightWrongPairs = [(g1, g2) for g2 in group2 for g1 in group1]+[(g2, g1) for g2 in group2 for g1 in group1] rightRightPairs = [i for i in itertools.permutations(group1, 2)]#+[i for i in itertools.combinations(group2, 2)] - # random.shuffle(rightWrongPairs) - # random.shuffle(rightRightPairs) + def filter_criteria(s1,s2): + same = s1['variant'] == s2['variant'] + phon_same = s1['phonemes'] == s2['phonemes'] + voice_diff = s1['voice'] != s2['voice'] + if not same and phon_same: + return False + if same and not voice_diff: + return False + return True + validRWPairs = [i for i in rightWrongPairs if filter_criteria(*i)] + validRRPairs = [i for i in rightRightPairs if filter_criteria(*i)] + random.shuffle(validRWPairs) + random.shuffle(validRRPairs) # return rightRightPairs[:10],rightWrongPairs[:10] - return rightRightPairs[:32],rightWrongPairs[:32] + return validRWPairs[:32],validRRPairs[:32] def _float_feature(value): @@ -60,13 +71,6 @@ def create_spectrogram_tfrecords(audio_group='audio',sample_count=0,train_test_r for (output,group) in groups: group_prog = tqdm(group,desc='Writing Spectrogram') for sample1,sample2 in group_prog: - same = sample1['variant'] == sample2['variant'] - phon_same = sample1['phonemes'] == sample2['phonemes'] - voice_diff = sample1['voice'] != sample2['voice'] - if not same and phon_same: - continue - if same and not voice_diff: - continue group_prog.set_postfix(output=output ,var1=sample1['variant'] ,var2=sample2['variant']) @@ -243,8 +247,8 @@ if __name__ == '__main__': # create_spectrogram_tfrecords('audio',sample_count=100) # create_spectrogram_tfrecords('story_all',sample_count=25) # fix_csv('story_words_test') - fix_csv('story_phrases') - create_spectrogram_tfrecords('story_phrases',sample_count=100,train_test_ratio=0.3) + #fix_csv('story_phrases') + create_spectrogram_tfrecords('story_phrases',sample_count=10,train_test_ratio=0.1) # create_spectrogram_tfrecords('audio',sample_count=50) # read_siamese_tfrecords_generator('audio') # padd_zeros_siamese_tfrecords('audio') diff --git a/speech_test.py b/speech_test.py index 98adb58..95cc5da 100644 --- a/speech_test.py +++ b/speech_test.py @@ -12,6 +12,7 @@ import tensorflow as tf import csv from tqdm import tqdm from speech_data import padd_zeros +import seaborn as sns def predict_recording_with(m,sample_size=15): spec1 = record_spectrogram(n_sec=1.4) @@ -35,6 +36,7 @@ def evaluate_siamese(records_file,audio_group='audio',weights = 'siamese_speech_ print('evaluating {}...'.format(records_file)) model = load_model_arch(arch_file) # model = siamese_model((n_spec, n_features)) + n_spec = 422 model.load_weights(weight_file) record_iterator,records_count = record_generator_count(records_file) total,same_success,diff_success,skipped,same_failed,diff_failed = 0,0,0,0,0,0 @@ -130,20 +132,20 @@ def play_results(audio_group='audio'): def visualize_results(audio_group='audio'): # %matplotlib inline - audio_group = 'story_words.gpu' + audio_group = 'story_phrases' result = pd.read_csv('./outputs/' + audio_group + '.results.csv',index_col=0) result.groupby('success').size().plot(kind='bar') result.describe(include=['object']) failed = result[result['success'] == False] same_failed = failed[failed['variant1'] == failed['variant2']] diff_failed = failed[failed['variant1'] != failed['variant2']] - same_failed[same_failed['voice1'] != same_failed['voice2']] - diff_failed[diff_failed['voice1'] != diff_failed['voice2']] + result.groupby(['voice1','voice2']).size() if __name__ == '__main__': # evaluate_siamese('./outputs/story_words_test.train.tfrecords',audio_group='story_words.gpu',weights ='siamese_speech_model-58-epoch-0.00-acc.h5') # evaluate_siamese('./outputs/story_words.test.tfrecords',audio_group='story_words',weights ='siamese_speech_model-675-epoch-0.00-acc.h5') + evaluate_siamese('./outputs/story_phrases.test.tfrecords',audio_group='story_phrases',weights ='siamese_speech_model-329-epoch-0.00-acc.h5') # play_results('story_words') visualize_results('story_words.gpu') # test_with('rand_edu')