visualizing and playing sound files where prediction fails

master
Malar Kannan 2017-11-13 19:22:30 +05:30
parent 988f66c2c2
commit e4b8b4e0a7
4 changed files with 95 additions and 43 deletions

View File

@ -6,7 +6,7 @@ from speech_utils import threadsafe_iter
import tensorflow as tf import tensorflow as tf
from tensorflow.python.ops import data_flow_ops from tensorflow.python.ops import data_flow_ops
import numpy as np import numpy as np
from spectro_gen import generate_aiff_spectrogram from speech_spectrum import generate_aiff_spectrogram
from sklearn.model_selection import train_test_split from sklearn.model_selection import train_test_split
import itertools import itertools
import os import os

View File

@ -1,15 +1,36 @@
import pyaudio import pyaudio
from pysndfile import sndio as snd
import numpy as np import numpy as np
# from matplotlib import pyplot as plt # from matplotlib import pyplot as plt
from spectro_gen import plot_stft, generate_spec_frec from speech_spectrum import plot_stft, generate_spec_frec
def record_spectrogram(n_sec, plot=False, playback=False):
SAMPLE_RATE = 22050 SAMPLE_RATE = 22050
N_CHANNELS = 2 N_CHANNELS = 2
def file_player():
p_oup = pyaudio.PyAudio()
def play_file(audiopath,plot=False):
print('playing',audiopath)
samples, samplerate, form = snd.read(audiopath)
stream = p_oup.open(
format=pyaudio.paFloat32,
channels=2,
rate=samplerate,
output=True)
one_channel = np.asarray([samples, samples]).T.reshape(-1)
audio_data = one_channel.astype(np.float32).tobytes()
stream.write(audio_data)
stream.close()
if plot:
plot_stft(samples, SAMPLE_RATE)
def close_player():
p_oup.terminate()
return play_file,close_player
def record_spectrogram(n_sec, plot=False, playback=False):
# show_record_prompt()
N_SEC = n_sec N_SEC = n_sec
CHUNKSIZE = int(SAMPLE_RATE * N_SEC / N_CHANNELS) # fixed chunk size CHUNKSIZE = int(SAMPLE_RATE * N_SEC / N_CHANNELS) # fixed chunk size
# show_record_prompt()
input('Press [Enter] to start recording sample... ') input('Press [Enter] to start recording sample... ')
p_inp = pyaudio.PyAudio() p_inp = pyaudio.PyAudio()
stream = p_inp.open( stream = p_inp.open(

View File

@ -1,9 +1,10 @@
from speech_siamese import siamese_model # from speech_siamese import siamese_model
from record_mic_speech import record_spectrogram from speech_tools import record_spectrogram, file_player
# from importlib import reload # from importlib import reload
# import speech_data # import speech_data
# reload(speech_data) # reload(speech_data)
import numpy as np import numpy as np
import pandas as pd
import os import os
import pickle import pickle
import tensorflow as tf import tensorflow as tf
@ -25,7 +26,8 @@ def test_with(audio_group):
print(np.argmax(model.predict([X[:, 0], X[:, 1]]),axis=1)) print(np.argmax(model.predict([X[:, 0], X[:, 1]]),axis=1))
print(Y.astype(np.int8)) print(Y.astype(np.int8))
def evaluate_siamese(audio_group='audio',model_file = 'siamese_speech_model-46-epoch-0.29-acc.h5'): def evaluate_siamese(audio_group='audio',model_file = 'siamese_speech_model-305-epoch-0.20-acc.h5'):
# audio_group='audio';model_file = 'siamese_speech_model-305-epoch-0.20-acc.h5'
records_file = os.path.join('./outputs',audio_group+'.train.tfrecords') records_file = os.path.join('./outputs',audio_group+'.train.tfrecords')
const_file = os.path.join('./outputs',audio_group+'.constants') const_file = os.path.join('./outputs',audio_group+'.constants')
model_weights_path =os.path.join('./models/story_words/',model_file) model_weights_path =os.path.join('./models/story_words/',model_file)
@ -36,8 +38,9 @@ def evaluate_siamese(audio_group='audio',model_file = 'siamese_speech_model-46-e
model.load_weights(model_weights_path) model.load_weights(model_weights_path)
record_iterator = tf.python_io.tf_record_iterator(path=records_file) record_iterator = tf.python_io.tf_record_iterator(path=records_file)
#tqdm(enumerate(record_iterator),total=n_records) #tqdm(enumerate(record_iterator),total=n_records)
with open('./outputs/' + audio_group + '.results.csv','w') as result_csv: result_csv = open('./outputs/' + audio_group + '.results.csv','w')
result_csv_w = csv.writer(result_csv, quoting=csv.QUOTE_MINIMAL) result_csv_w = csv.writer(result_csv, quoting=csv.QUOTE_MINIMAL)
result_csv_w.writerow(["phoneme1","phoneme2","voice1","voice2","rate1","rate2","variant1","variant2","file1","file2"])
for (i,string_record) in enumerate(record_iterator): for (i,string_record) in enumerate(record_iterator):
# string_record = next(record_iterator) # string_record = next(record_iterator)
example = tf.train.Example() example = tf.train.Example()
@ -70,9 +73,37 @@ def evaluate_siamese(audio_group='audio',model_file = 'siamese_speech_model-46-e
file2 = example.features.feature['file2'].bytes_list.value[0].decode() file2 = example.features.feature['file2'].bytes_list.value[0].decode()
print(phoneme1,phoneme2,voice1,voice2,rate1,rate2,variant1,variant2,file1,file2) print(phoneme1,phoneme2,voice1,voice2,rate1,rate2,variant1,variant2,file1,file2)
result_csv_w.writerow([phoneme1,phoneme2,voice1,voice2,rate1,rate2,variant1,variant2,file1,file2]) result_csv_w.writerow([phoneme1,phoneme2,voice1,voice2,rate1,rate2,variant1,variant2,file1,file2])
result_csv.close()
evaluate_siamese('story_words',model_file='siamese_speech_model-92-epoch-0.20-acc.h5') def play_results(audio_group='audio'):
result_data = pd.read_csv('./outputs/' + audio_group + '.results.csv')
play_file,close_player = file_player()
quit = False
for (i,r) in result_data.iterrows():
if quit:
break
keys = ["phoneme1","phoneme2","voice1","voice2","rate1","rate2","variant1","variant2"]
row_vals = [str(r[k]) for k in keys]
h_str = '\t'.join(keys)
row_str = '\t'.join(row_vals)
while True:
print(h_str)
print(row_str)
play_file('./outputs/'+audio_group+'/'+r['file1'],True)
play_file('./outputs/'+audio_group+'/'+r['file2'],True)
a = input("press 'r/q/[Enter]' to replay/quit/continue:\t")
if a == 'r':
continue
if a == 'q':
quit = True
break
else:
break
close_player()
# evaluate_siamese('story_words',model_file='siamese_speech_model-305-epoch-0.20-acc.h5')
play_results('story_words')
# test_with('rand_edu') # test_with('rand_edu')
# sunflower_data,sunflower_result = get_word_pairs_data('sweater',15) # sunflower_data,sunflower_result = get_word_pairs_data('sweater',15)
# print(np.argmax(model.predict([sunflower_data[:, 0], sunflower_data[:, 1]]),axis=1)) # print(np.argmax(model.predict([sunflower_data[:, 0], sunflower_data[:, 1]]),axis=1))