visualizing and playing sound files where prediction fails
parent
988f66c2c2
commit
e4b8b4e0a7
|
|
@ -6,7 +6,7 @@ from speech_utils import threadsafe_iter
|
||||||
import tensorflow as tf
|
import tensorflow as tf
|
||||||
from tensorflow.python.ops import data_flow_ops
|
from tensorflow.python.ops import data_flow_ops
|
||||||
import numpy as np
|
import numpy as np
|
||||||
from spectro_gen import generate_aiff_spectrogram
|
from speech_spectrum import generate_aiff_spectrogram
|
||||||
from sklearn.model_selection import train_test_split
|
from sklearn.model_selection import train_test_split
|
||||||
import itertools
|
import itertools
|
||||||
import os
|
import os
|
||||||
|
|
|
||||||
|
|
@ -1,15 +1,36 @@
|
||||||
import pyaudio
|
import pyaudio
|
||||||
|
from pysndfile import sndio as snd
|
||||||
import numpy as np
|
import numpy as np
|
||||||
# from matplotlib import pyplot as plt
|
# from matplotlib import pyplot as plt
|
||||||
from spectro_gen import plot_stft, generate_spec_frec
|
from speech_spectrum import plot_stft, generate_spec_frec
|
||||||
|
|
||||||
|
SAMPLE_RATE = 22050
|
||||||
|
N_CHANNELS = 2
|
||||||
|
|
||||||
|
def file_player():
|
||||||
|
p_oup = pyaudio.PyAudio()
|
||||||
|
def play_file(audiopath,plot=False):
|
||||||
|
print('playing',audiopath)
|
||||||
|
samples, samplerate, form = snd.read(audiopath)
|
||||||
|
stream = p_oup.open(
|
||||||
|
format=pyaudio.paFloat32,
|
||||||
|
channels=2,
|
||||||
|
rate=samplerate,
|
||||||
|
output=True)
|
||||||
|
one_channel = np.asarray([samples, samples]).T.reshape(-1)
|
||||||
|
audio_data = one_channel.astype(np.float32).tobytes()
|
||||||
|
stream.write(audio_data)
|
||||||
|
stream.close()
|
||||||
|
if plot:
|
||||||
|
plot_stft(samples, SAMPLE_RATE)
|
||||||
|
def close_player():
|
||||||
|
p_oup.terminate()
|
||||||
|
return play_file,close_player
|
||||||
|
|
||||||
def record_spectrogram(n_sec, plot=False, playback=False):
|
def record_spectrogram(n_sec, plot=False, playback=False):
|
||||||
SAMPLE_RATE = 22050
|
# show_record_prompt()
|
||||||
N_CHANNELS = 2
|
|
||||||
N_SEC = n_sec
|
N_SEC = n_sec
|
||||||
CHUNKSIZE = int(SAMPLE_RATE * N_SEC / N_CHANNELS) # fixed chunk size
|
CHUNKSIZE = int(SAMPLE_RATE * N_SEC / N_CHANNELS) # fixed chunk size
|
||||||
# show_record_prompt()
|
|
||||||
input('Press [Enter] to start recording sample... ')
|
input('Press [Enter] to start recording sample... ')
|
||||||
p_inp = pyaudio.PyAudio()
|
p_inp = pyaudio.PyAudio()
|
||||||
stream = p_inp.open(
|
stream = p_inp.open(
|
||||||
|
|
@ -1,9 +1,10 @@
|
||||||
from speech_siamese import siamese_model
|
# from speech_siamese import siamese_model
|
||||||
from record_mic_speech import record_spectrogram
|
from speech_tools import record_spectrogram, file_player
|
||||||
# from importlib import reload
|
# from importlib import reload
|
||||||
# import speech_data
|
# import speech_data
|
||||||
# reload(speech_data)
|
# reload(speech_data)
|
||||||
import numpy as np
|
import numpy as np
|
||||||
|
import pandas as pd
|
||||||
import os
|
import os
|
||||||
import pickle
|
import pickle
|
||||||
import tensorflow as tf
|
import tensorflow as tf
|
||||||
|
|
@ -25,7 +26,8 @@ def test_with(audio_group):
|
||||||
print(np.argmax(model.predict([X[:, 0], X[:, 1]]),axis=1))
|
print(np.argmax(model.predict([X[:, 0], X[:, 1]]),axis=1))
|
||||||
print(Y.astype(np.int8))
|
print(Y.astype(np.int8))
|
||||||
|
|
||||||
def evaluate_siamese(audio_group='audio',model_file = 'siamese_speech_model-46-epoch-0.29-acc.h5'):
|
def evaluate_siamese(audio_group='audio',model_file = 'siamese_speech_model-305-epoch-0.20-acc.h5'):
|
||||||
|
# audio_group='audio';model_file = 'siamese_speech_model-305-epoch-0.20-acc.h5'
|
||||||
records_file = os.path.join('./outputs',audio_group+'.train.tfrecords')
|
records_file = os.path.join('./outputs',audio_group+'.train.tfrecords')
|
||||||
const_file = os.path.join('./outputs',audio_group+'.constants')
|
const_file = os.path.join('./outputs',audio_group+'.constants')
|
||||||
model_weights_path =os.path.join('./models/story_words/',model_file)
|
model_weights_path =os.path.join('./models/story_words/',model_file)
|
||||||
|
|
@ -36,8 +38,9 @@ def evaluate_siamese(audio_group='audio',model_file = 'siamese_speech_model-46-e
|
||||||
model.load_weights(model_weights_path)
|
model.load_weights(model_weights_path)
|
||||||
record_iterator = tf.python_io.tf_record_iterator(path=records_file)
|
record_iterator = tf.python_io.tf_record_iterator(path=records_file)
|
||||||
#tqdm(enumerate(record_iterator),total=n_records)
|
#tqdm(enumerate(record_iterator),total=n_records)
|
||||||
with open('./outputs/' + audio_group + '.results.csv','w') as result_csv:
|
result_csv = open('./outputs/' + audio_group + '.results.csv','w')
|
||||||
result_csv_w = csv.writer(result_csv, quoting=csv.QUOTE_MINIMAL)
|
result_csv_w = csv.writer(result_csv, quoting=csv.QUOTE_MINIMAL)
|
||||||
|
result_csv_w.writerow(["phoneme1","phoneme2","voice1","voice2","rate1","rate2","variant1","variant2","file1","file2"])
|
||||||
for (i,string_record) in enumerate(record_iterator):
|
for (i,string_record) in enumerate(record_iterator):
|
||||||
# string_record = next(record_iterator)
|
# string_record = next(record_iterator)
|
||||||
example = tf.train.Example()
|
example = tf.train.Example()
|
||||||
|
|
@ -70,9 +73,37 @@ def evaluate_siamese(audio_group='audio',model_file = 'siamese_speech_model-46-e
|
||||||
file2 = example.features.feature['file2'].bytes_list.value[0].decode()
|
file2 = example.features.feature['file2'].bytes_list.value[0].decode()
|
||||||
print(phoneme1,phoneme2,voice1,voice2,rate1,rate2,variant1,variant2,file1,file2)
|
print(phoneme1,phoneme2,voice1,voice2,rate1,rate2,variant1,variant2,file1,file2)
|
||||||
result_csv_w.writerow([phoneme1,phoneme2,voice1,voice2,rate1,rate2,variant1,variant2,file1,file2])
|
result_csv_w.writerow([phoneme1,phoneme2,voice1,voice2,rate1,rate2,variant1,variant2,file1,file2])
|
||||||
|
result_csv.close()
|
||||||
|
|
||||||
|
|
||||||
evaluate_siamese('story_words',model_file='siamese_speech_model-92-epoch-0.20-acc.h5')
|
def play_results(audio_group='audio'):
|
||||||
|
result_data = pd.read_csv('./outputs/' + audio_group + '.results.csv')
|
||||||
|
play_file,close_player = file_player()
|
||||||
|
quit = False
|
||||||
|
for (i,r) in result_data.iterrows():
|
||||||
|
if quit:
|
||||||
|
break
|
||||||
|
keys = ["phoneme1","phoneme2","voice1","voice2","rate1","rate2","variant1","variant2"]
|
||||||
|
row_vals = [str(r[k]) for k in keys]
|
||||||
|
h_str = '\t'.join(keys)
|
||||||
|
row_str = '\t'.join(row_vals)
|
||||||
|
while True:
|
||||||
|
print(h_str)
|
||||||
|
print(row_str)
|
||||||
|
play_file('./outputs/'+audio_group+'/'+r['file1'],True)
|
||||||
|
play_file('./outputs/'+audio_group+'/'+r['file2'],True)
|
||||||
|
a = input("press 'r/q/[Enter]' to replay/quit/continue:\t")
|
||||||
|
if a == 'r':
|
||||||
|
continue
|
||||||
|
if a == 'q':
|
||||||
|
quit = True
|
||||||
|
break
|
||||||
|
else:
|
||||||
|
break
|
||||||
|
close_player()
|
||||||
|
|
||||||
|
# evaluate_siamese('story_words',model_file='siamese_speech_model-305-epoch-0.20-acc.h5')
|
||||||
|
play_results('story_words')
|
||||||
# test_with('rand_edu')
|
# test_with('rand_edu')
|
||||||
# sunflower_data,sunflower_result = get_word_pairs_data('sweater',15)
|
# sunflower_data,sunflower_result = get_word_pairs_data('sweater',15)
|
||||||
# print(np.argmax(model.predict([sunflower_data[:, 0], sunflower_data[:, 1]]),axis=1))
|
# print(np.argmax(model.predict([sunflower_data[:, 0], sunflower_data[:, 1]]),axis=1))
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue