fixed randomize pair picking

master
Malar Kannan 2017-11-17 11:57:38 +05:30
parent 7fc89c0853
commit 6ff052be9b
3 changed files with 23 additions and 15 deletions

View File

@ -40,6 +40,7 @@ parso==0.1.0
partd==0.3.8
pexpect==4.2.1
pickleshare==0.7.4
pkg-resources==0.0.0
progressbar2==3.34.3
prompt-toolkit==1.0.15
protobuf==3.4.0
@ -57,6 +58,7 @@ pyzmq==16.0.2
qtconsole==4.3.1
scikit-learn==0.19.0
scipy==0.19.1
seaborn==0.8.1
simplegeneric==0.8.1
six==1.11.0
sortedcontainers==1.5.7

View File

@ -21,10 +21,21 @@ def siamese_pairs(rightGroup, wrongGroup):
group2 = [r for (i, r) in wrongGroup.iterrows()]
rightWrongPairs = [(g1, g2) for g2 in group2 for g1 in group1]+[(g2, g1) for g2 in group2 for g1 in group1]
rightRightPairs = [i for i in itertools.permutations(group1, 2)]#+[i for i in itertools.combinations(group2, 2)]
# random.shuffle(rightWrongPairs)
# random.shuffle(rightRightPairs)
def filter_criteria(s1,s2):
same = s1['variant'] == s2['variant']
phon_same = s1['phonemes'] == s2['phonemes']
voice_diff = s1['voice'] != s2['voice']
if not same and phon_same:
return False
if same and not voice_diff:
return False
return True
validRWPairs = [i for i in rightWrongPairs if filter_criteria(*i)]
validRRPairs = [i for i in rightRightPairs if filter_criteria(*i)]
random.shuffle(validRWPairs)
random.shuffle(validRRPairs)
# return rightRightPairs[:10],rightWrongPairs[:10]
return rightRightPairs[:32],rightWrongPairs[:32]
return validRWPairs[:32],validRRPairs[:32]
def _float_feature(value):
@ -60,13 +71,6 @@ def create_spectrogram_tfrecords(audio_group='audio',sample_count=0,train_test_r
for (output,group) in groups:
group_prog = tqdm(group,desc='Writing Spectrogram')
for sample1,sample2 in group_prog:
same = sample1['variant'] == sample2['variant']
phon_same = sample1['phonemes'] == sample2['phonemes']
voice_diff = sample1['voice'] != sample2['voice']
if not same and phon_same:
continue
if same and not voice_diff:
continue
group_prog.set_postfix(output=output
,var1=sample1['variant']
,var2=sample2['variant'])
@ -243,8 +247,8 @@ if __name__ == '__main__':
# create_spectrogram_tfrecords('audio',sample_count=100)
# create_spectrogram_tfrecords('story_all',sample_count=25)
# fix_csv('story_words_test')
fix_csv('story_phrases')
create_spectrogram_tfrecords('story_phrases',sample_count=100,train_test_ratio=0.3)
#fix_csv('story_phrases')
create_spectrogram_tfrecords('story_phrases',sample_count=10,train_test_ratio=0.1)
# create_spectrogram_tfrecords('audio',sample_count=50)
# read_siamese_tfrecords_generator('audio')
# padd_zeros_siamese_tfrecords('audio')

View File

@ -12,6 +12,7 @@ import tensorflow as tf
import csv
from tqdm import tqdm
from speech_data import padd_zeros
import seaborn as sns
def predict_recording_with(m,sample_size=15):
spec1 = record_spectrogram(n_sec=1.4)
@ -35,6 +36,7 @@ def evaluate_siamese(records_file,audio_group='audio',weights = 'siamese_speech_
print('evaluating {}...'.format(records_file))
model = load_model_arch(arch_file)
# model = siamese_model((n_spec, n_features))
n_spec = 422
model.load_weights(weight_file)
record_iterator,records_count = record_generator_count(records_file)
total,same_success,diff_success,skipped,same_failed,diff_failed = 0,0,0,0,0,0
@ -130,20 +132,20 @@ def play_results(audio_group='audio'):
def visualize_results(audio_group='audio'):
# %matplotlib inline
audio_group = 'story_words.gpu'
audio_group = 'story_phrases'
result = pd.read_csv('./outputs/' + audio_group + '.results.csv',index_col=0)
result.groupby('success').size().plot(kind='bar')
result.describe(include=['object'])
failed = result[result['success'] == False]
same_failed = failed[failed['variant1'] == failed['variant2']]
diff_failed = failed[failed['variant1'] != failed['variant2']]
same_failed[same_failed['voice1'] != same_failed['voice2']]
diff_failed[diff_failed['voice1'] != diff_failed['voice2']]
result.groupby(['voice1','voice2']).size()
if __name__ == '__main__':
# evaluate_siamese('./outputs/story_words_test.train.tfrecords',audio_group='story_words.gpu',weights ='siamese_speech_model-58-epoch-0.00-acc.h5')
# evaluate_siamese('./outputs/story_words.test.tfrecords',audio_group='story_words',weights ='siamese_speech_model-675-epoch-0.00-acc.h5')
evaluate_siamese('./outputs/story_phrases.test.tfrecords',audio_group='story_phrases',weights ='siamese_speech_model-329-epoch-0.00-acc.h5')
# play_results('story_words')
visualize_results('story_words.gpu')
# test_with('rand_edu')