# from speech_siamese import siamese_model from speech_tools import record_spectrogram, file_player # from importlib import reload # import speech_data # reload(speech_data) import numpy as np import pandas as pd import os import pickle import tensorflow as tf import csv from speech_data import padd_zeros def predict_recording_with(m,sample_size=15): spec1 = record_spectrogram(n_sec=1.4) spec2 = record_spectrogram(n_sec=1.4) inp = create_test_pair(spec1,spec2,sample_size) return m.predict([inp[:, 0], inp[:, 1]]) # while(True): # print(predict_recording_with(model)) def test_with(audio_group): X,Y = speech_data(audio_group) print(np.argmax(model.predict([X[:, 0], X[:, 1]]),axis=1)) print(Y.astype(np.int8)) def evaluate_siamese(audio_group='audio',model_file = 'siamese_speech_model-305-epoch-0.20-acc.h5'): # audio_group='audio';model_file = 'siamese_speech_model-305-epoch-0.20-acc.h5' records_file = os.path.join('./outputs',audio_group+'.train.tfrecords') const_file = os.path.join('./outputs',audio_group+'.constants') model_weights_path =os.path.join('./models/story_words/',model_file) (n_spec,n_features,n_records) = pickle.load(open(const_file,'rb')) print('evaluating tfrecords({}-train)...'.format(audio_group)) model = siamese_model((n_spec, n_features)) model.load_weights(model_weights_path) record_iterator = tf.python_io.tf_record_iterator(path=records_file) #tqdm(enumerate(record_iterator),total=n_records) result_csv = open('./outputs/' + audio_group + '.results.csv','w') result_csv_w = csv.writer(result_csv, quoting=csv.QUOTE_MINIMAL) result_csv_w.writerow(["phoneme1","phoneme2","voice1","voice2","rate1","rate2","variant1","variant2","file1","file2"]) for (i,string_record) in enumerate(record_iterator): # string_record = next(record_iterator) example = tf.train.Example() example.ParseFromString(string_record) spec_n1 = example.features.feature['spec_n1'].int64_list.value[0] spec_n2 = example.features.feature['spec_n2'].int64_list.value[0] spec_w1 = example.features.feature['spec_w1'].int64_list.value[0] spec_w2 = example.features.feature['spec_w2'].int64_list.value[0] spec1 = np.array(example.features.feature['spec1'].float_list.value).reshape(spec_n1,spec_w1) spec2 = np.array(example.features.feature['spec2'].float_list.value).reshape(spec_n2,spec_w2) p_spec1,p_spec2 = padd_zeros(spec1,n_spec),padd_zeros(spec2,n_spec) input_arr = np.asarray([[p_spec1,p_spec2]]) output_arr = np.asarray([example.features.feature['output'].int64_list.value]) y_pred = model.predict([input_arr[:, 0], input_arr[:, 1]]) predicted = np.asarray(y_pred[0]>0.5).astype(output_arr.dtype) expected = output_arr[0] if np.all(predicted == expected): continue word = example.features.feature['word'].bytes_list.value[0].decode() phoneme1 = example.features.feature['phoneme1'].bytes_list.value[0].decode() phoneme2 = example.features.feature['phoneme2'].bytes_list.value[0].decode() voice1 = example.features.feature['voice1'].bytes_list.value[0].decode() voice2 = example.features.feature['voice2'].bytes_list.value[0].decode() language = example.features.feature['language'].bytes_list.value[0].decode() rate1 = example.features.feature['rate1'].int64_list.value[0] rate2 = example.features.feature['rate2'].int64_list.value[0] variant1 = example.features.feature['variant1'].bytes_list.value[0].decode() variant2 = example.features.feature['variant2'].bytes_list.value[0].decode() file1 = example.features.feature['file1'].bytes_list.value[0].decode() file2 = example.features.feature['file2'].bytes_list.value[0].decode() print(phoneme1,phoneme2,voice1,voice2,rate1,rate2,variant1,variant2,file1,file2) result_csv_w.writerow([phoneme1,phoneme2,voice1,voice2,rate1,rate2,variant1,variant2,file1,file2]) result_csv.close() def play_results(audio_group='audio'): result_data = pd.read_csv('./outputs/' + audio_group + '.results.csv') play_file,close_player = file_player() quit = False for (i,r) in result_data.iterrows(): if quit: break keys = ["phoneme1","phoneme2","voice1","voice2","rate1","rate2","variant1","variant2"] row_vals = [str(r[k]) for k in keys] h_str = '\t'.join(keys) row_str = '\t'.join(row_vals) while True: print(h_str) print(row_str) play_file('./outputs/'+audio_group+'/'+r['file1'],True) play_file('./outputs/'+audio_group+'/'+r['file2'],True) a = input("press 'r/q/[Enter]' to replay/quit/continue:\t") if a == 'r': continue if a == 'q': quit = True break else: break close_player() # evaluate_siamese('story_words',model_file='siamese_speech_model-305-epoch-0.20-acc.h5') play_results('story_words') # test_with('rand_edu') # sunflower_data,sunflower_result = get_word_pairs_data('sweater',15) # print(np.argmax(model.predict([sunflower_data[:, 0], sunflower_data[:, 1]]),axis=1)) # print(sunflower_result)