added code to record and generate spectrogram, wip test model

master
Malar Kannan 2017-10-25 15:38:03 +05:30
parent a8f17ef764
commit f1e82a2539
5 changed files with 135 additions and 115 deletions

View File

@ -1,36 +1,42 @@
import pyaudio
import numpy as np
# from matplotlib import pyplot as plt
from spectro_gen import plot_stft
from spectro_gen import plot_stft, generate_spectrogram
def record_spectrogram(n_sec, plot=False, playback=False):
SAMPLE_RATE = 22050
N_SEC = 1.5
CHUNKSIZE = int(SAMPLE_RATE * N_SEC) # fixed chunk size
N_CHANNELS = 2
N_SEC = n_sec
CHUNKSIZE = int(SAMPLE_RATE * N_SEC / N_CHANNELS) # fixed chunk size
# show_record_prompt()
input('Press [Enter] to start recording sample... ')
p_inp = pyaudio.PyAudio()
stream = p_inp.open(
format=pyaudio.paFloat32,
channels=2,
channels=N_CHANNELS,
rate=SAMPLE_RATE,
input=True,
frames_per_buffer=CHUNKSIZE)
data = stream.read(CHUNKSIZE)
numpydata = np.frombuffer(data, dtype=np.float32)
multi_channel = np.abs(np.reshape(numpydata, (-1, 2))).mean(axis=1)
one_channel = np.asarray([multi_channel, -1 * multi_channel]).T.reshape(-1)
mean_channel_data = one_channel.tobytes()
plot_stft(one_channel, SAMPLE_RATE)
# plt.plot(one_channel)
# plt.show()
stream.stop_stream()
stream.close()
p_inp.terminate()
if plot:
plot_stft(one_channel, SAMPLE_RATE)
if playback:
p_oup = pyaudio.PyAudio()
stream = p_oup.open(
format=pyaudio.paFloat32, channels=2, rate=SAMPLE_RATE, output=True)
format=pyaudio.paFloat32,
channels=2,
rate=SAMPLE_RATE,
output=True)
stream.write(mean_channel_data)
stream.close()
p_oup.terminate()
ims, _ = generate_spectrogram(one_channel, SAMPLE_RATE)
return ims

View File

@ -128,7 +128,7 @@ def play_sunflower():
sample_r = snd.get_info('./outputs/sunflowers-Alex-150-normal-589.aiff')[0]
snd_data_f64 = snd.read('./outputs/sunflowers-Alex-150-normal-589.aiff')[0]
snd_data_f32 = snd_data_f64.astype(np.float32)
snd_data_f32.shape
print(snd_data_f32.shape)
snd_data = snd_data_f32.tobytes()
p_oup = pyaudio.PyAudio()
stream = p_oup.open(

View File

@ -15,6 +15,16 @@ def get_siamese_pairs(groupF1, groupF2):
return (t, f)
def create_X(sp, max_samples):
def append_zeros(spgr):
return np.lib.pad(spgr, [(0, max_samples - spgr.shape[0]), (0, 0)],
'median')
l_sample = append_zeros(sp[0]['spectrogram'])
r_sample = append_zeros(sp[1]['spectrogram'])
return np.asarray([l_sample, r_sample])
def sunflower_pairs_data():
audio_samples = pd.read_csv(
'./outputs/audio.csv',
@ -35,19 +45,7 @@ def sunflower_pairs_data():
Y = np.hstack([np.ones(len(same_data)), np.zeros(len(diff_data))])
X_sample_pairs = same_data + diff_data
def append_zeros(spgr):
sample = np.lib.pad(spgr, [(0, max_samples - spgr.shape[0]), (0, 0)],
'median')
return np.expand_dims(sample, axis=0)
def create_X(sp):
# sample_count = sp[0]['file'].shape[0]
l_sample = append_zeros(sp[0]['spectrogram'])
r_sample = append_zeros(
sp[1]['spectrogram'])
return np.expand_dims(np.vstack([l_sample, r_sample]), axis=0)
X_list = (create_X(sp) for sp in X_sample_pairs)
X_list = (create_X(sp, max_samples) for sp in X_sample_pairs)
X = np.vstack(X_list)
tr_pairs, te_pairs, tr_y, te_y = train_test_split(X, Y, test_size=0.1)
return train_test_split(X, Y, test_size=0.1)
@ -69,27 +67,16 @@ def create_speech_pairs_data(audio_group='audio'):
audio_samples = pd.read_pickle('outputs/spectrogram.pkl')
max_samples = audio_samples['spectrogram'].apply(
lambda x: x.shape[0]).max()
# sample_size = audio_samples['spectrogram'][0].shape[1]
def append_zeros(spgr):
sample = np.lib.pad(spgr, [(0, max_samples - spgr.shape[0]), (0, 0)],
'median')
return sample
def create_X(sp):
l_sample = append_zeros(sp[0]['spectrogram'])
r_sample = append_zeros(sp[1]['spectrogram'])
return np.asarray([l_sample, r_sample])
print('generating siamese speech pairs')
same_data, diff_data = [], []
for (w, g) in audio_samples.groupby(audio_samples['word']):
sample_norm = g.loc[audio_samples['variant'] == 'normal']
sample_phon = g.loc[audio_samples['variant'] == 'phoneme']
same, diff = get_siamese_pairs(sample_norm, sample_phon)
same_data.extend([create_X(s) for s in same[:10]])
diff_data.extend([create_X(d) for d in diff[:10]])
same_data.extend([create_X(s, max_samples) for s in same[:10]])
diff_data.extend([create_X(d, max_samples) for d in diff[:10]])
print('creating all speech pairs')
Y = np.hstack([np.ones(len(same_data)), np.zeros(len(diff_data))])
print('casting as array speech pairs')

View File

@ -2,9 +2,9 @@ from __future__ import absolute_import
from __future__ import print_function
import numpy as np
from speech_data import speech_model_data
from keras.models import Model
from keras.models import Model,load_model
from keras.layers import Input, Dense, Dropout, LSTM, Lambda
from keras.optimizers import RMSprop, SGD
from keras.optimizers import RMSprop
from keras.callbacks import TensorBoard, ModelCheckpoint
from keras import backend as K
@ -63,6 +63,7 @@ def accuracy(y_true, y_pred):
return K.mean(K.equal(y_true, K.cast(y_pred < 0.5, y_true.dtype)))
def train_siamese():
# the data, shuffled and split between train and test sets
tr_pairs, te_pairs, tr_y, te_y = speech_model_data()
input_dim = (tr_pairs.shape[2], tr_pairs.shape[3])
@ -107,7 +108,6 @@ cp_cb = ModelCheckpoint(
period=1)
# train
rms = RMSprop(lr=0.001)
sgd = SGD(lr=0.001)
model.compile(loss=contrastive_loss, optimizer=rms, metrics=[accuracy])
model.fit(
[tr_pairs[:, 0], tr_pairs[:, 1]],
@ -126,3 +126,23 @@ te_acc = compute_accuracy(te_y, y_pred)
print('* Accuracy on training set: %0.2f%%' % (100 * tr_acc))
print('* Accuracy on test set: %0.2f%%' % (100 * te_acc))
def trained_siamese_model():
# input_dim = (15, 1654)
# base_network = create_base_rnn_network(input_dim)
# input_a = Input(shape=input_dim)
# input_b = Input(shape=input_dim)
# processed_a = base_network(input_a)
# processed_b = base_network(input_b)
# distance = Lambda(
# euclidean_distance,
# output_shape=eucl_dist_output_shape)([processed_a, processed_b])
#
# model = Model([input_a, input_b], distance)
model = load_model('./models/siamese_speech_model-final.h5')
return model
if __name__ == '__main__':
train_siamese()

7
test_siamese.py Normal file
View File

@ -0,0 +1,7 @@
# from speech_siamese import trained_siamese_model
from keras.models import load_model
from record_mic_speech import record_spectrogram
model = load_model('./models/siamese_speech_model-final.h5')
spec1 = record_spectrogram(n_sec=1.2)
spec2 = record_spectrogram(n_sec=1.2)