Compare commits

...

5 Commits

Author SHA1 Message Date
Malar Kannan 6ab84b4dc2 Merge branch 'master' of ssh://invmac/~/Public/repos/speech-scoring 2017-11-02 13:16:04 +05:30
Malar Kannan d4454b6434 looping record test code 2017-11-02 13:14:59 +05:30
Malar Kannan 45977a819d generating randome samples 2017-11-02 13:14:08 +05:30
Malar Kannan 4188585488 updated test code 2017-10-31 17:41:02 +05:30
Malar Kannan 2d9b12af95 fixed out of range exception 2017-10-31 10:29:24 +05:30
3 changed files with 47 additions and 27 deletions

View File

@ -4,8 +4,9 @@ import numpy as np
from speech_data import speech_model_data from speech_data import speech_model_data
from keras.models import Model,load_model from keras.models import Model,load_model
from keras.layers import Input, Dense, Dropout, LSTM, Lambda, Concatenate from keras.layers import Input, Dense, Dropout, LSTM, Lambda, Concatenate
# from keras.losses import categorical_crossentropy from keras.losses import categorical_crossentropy
from keras.losses import binary_crossentropy # from keras.losses import binary_crossentropy
from keras.utils import to_categorical
# from keras.utils.np_utils import to_categorical # from keras.utils.np_utils import to_categorical
from keras.optimizers import RMSprop from keras.optimizers import RMSprop
from keras.callbacks import TensorBoard, ModelCheckpoint from keras.callbacks import TensorBoard, ModelCheckpoint
@ -30,15 +31,14 @@ def contrastive_loss(y_true, y_pred):
return K.mean(y_true * K.square(y_pred) + return K.mean(y_true * K.square(y_pred) +
(1 - y_true) * K.square(K.maximum(1 - y_pred, 0))) (1 - y_true) * K.square(K.maximum(1 - y_pred, 0)))
def create_base_rnn_network(input_dim): def create_base_rnn_network(input_dim):
'''Base network to be shared (eq. to feature extraction). '''Base network to be shared (eq. to feature extraction).
''' '''
inp = Input(shape=input_dim) inp = Input(shape=input_dim)
ls1 = LSTM(1024, return_sequences=True)(inp) ls1 = LSTM(256, return_sequences=True)(inp)
ls2 = LSTM(512, return_sequences=True)(ls1) ls2 = LSTM(128, return_sequences=True)(ls1)
# ls3 = LSTM(32, return_sequences=True)(ls2) # ls3 = LSTM(32, return_sequences=True)(ls2)
ls4 = LSTM(32)(ls2) ls4 = LSTM(64)(ls2)
return Model(inp, ls4) return Model(inp, ls4)
@ -52,15 +52,15 @@ def compute_accuracy(y_true, y_pred):
def accuracy(y_true, y_pred): def accuracy(y_true, y_pred):
'''Compute classification accuracy with a fixed threshold on distances. '''Compute classification accuracy with a fixed threshold on distances.
''' '''
return K.mean(K.equal(y_true, K.cast(y_pred < 0.5, y_true.dtype))) return K.mean(K.equal(y_true, K.cast(y_pred > 0.5, y_true.dtype)))
def dense_classifier(processed): def dense_classifier(processed):
conc_proc = Concatenate()(processed) conc_proc = Concatenate()(processed)
d1 = Dense(8, activation='relu')(conc_proc) d1 = Dense(16, activation='relu')(conc_proc)
dr1 = Dropout(0.1)(d1) # dr1 = Dropout(0.1)(d1)
# d2 = Dense(8, activation='relu')(dr1) d2 = Dense(8, activation='relu')(d1)
# dr2 = Dropout(0.1)(d2) # dr2 = Dropout(0.1)(d2)
return Dense(1, activation='sigmoid')(dr1) return Dense(2, activation='softmax')(d2)
def siamese_model(input_dim): def siamese_model(input_dim):
# input_dim = (15, 1654) # input_dim = (15, 1654)
@ -80,7 +80,9 @@ def siamese_model(input_dim):
def train_siamese(): def train_siamese():
# the data, shuffled and split between train and test sets # the data, shuffled and split between train and test sets
tr_pairs, te_pairs, tr_y, te_y = speech_model_data() tr_pairs, te_pairs, tr_y_e, te_y_e = speech_model_data()
tr_y = to_categorical(tr_y_e, num_classes=2)
te_y = to_categorical(te_y_e, num_classes=2)
input_dim = (tr_pairs.shape[2], tr_pairs.shape[3]) input_dim = (tr_pairs.shape[2], tr_pairs.shape[3])
model = siamese_model(input_dim) model = siamese_model(input_dim)
@ -96,7 +98,7 @@ def train_siamese():
embeddings_layer_names=None, embeddings_layer_names=None,
embeddings_metadata=None) embeddings_metadata=None)
cp_file_fmt = './models/siamese_speech_model-{epoch:02d}-epoch-{val_loss:0.2f}\ cp_file_fmt = './models/siamese_speech_model-{epoch:02d}-epoch-{val_loss:0.2f}\
-acc.h5' -acc.h5'
cp_cb = ModelCheckpoint( cp_cb = ModelCheckpoint(
cp_file_fmt, cp_file_fmt,
@ -108,7 +110,7 @@ def train_siamese():
period=1) period=1)
# train # train
rms = RMSprop(lr=0.001) rms = RMSprop(lr=0.001)
model.compile(loss=binary_crossentropy, optimizer=rms, metrics=[accuracy]) model.compile(loss=categorical_crossentropy, optimizer=rms, metrics=[accuracy])
model.fit( model.fit(
[tr_pairs[:, 0], tr_pairs[:, 1]], [tr_pairs[:, 0], tr_pairs[:, 1]],
tr_y, tr_y,

View File

@ -5,6 +5,7 @@ import speech_data
reload(speech_data) reload(speech_data)
from speech_data import create_test_pair,get_word_pairs_data from speech_data import create_test_pair,get_word_pairs_data
import numpy as np import numpy as np
from keras.utils import to_categorical
model = siamese_model((15, 1654)) model = siamese_model((15, 1654))
model.load_weights('./models/siamese_speech_model-final.h5') model.load_weights('./models/siamese_speech_model-final.h5')
@ -15,8 +16,9 @@ def predict_recording_with(m,sample_size=15):
inp = create_test_pair(spec1,spec2,sample_size) inp = create_test_pair(spec1,spec2,sample_size)
return m.predict([inp[:, 0], inp[:, 1]]) return m.predict([inp[:, 0], inp[:, 1]])
predict_recording_with(model) while(True):
print(predict_recording_with(model))
sunflower_data,sunflower_result = get_word_pairs_data('sunflowers',15) # sunflower_data,sunflower_result = get_word_pairs_data('sweater',15)
sunflower_result # print(np.argmax(model.predict([sunflower_data[:, 0], sunflower_data[:, 1]]),axis=1))
model.predict([sunflower_data[:, 0], sunflower_data[:, 1]]) < 0.5 # print(sunflower_result)

View File

@ -11,7 +11,7 @@ import progressbar
from generate_similar import similar_phoneme,similar_word from generate_similar import similar_phoneme,similar_word
OUTPUT_NAME = 'story_words' OUTPUT_NAME = 'rand_edu'
dest_dir = os.path.abspath('.') + '/outputs/' + OUTPUT_NAME + '/' dest_dir = os.path.abspath('.') + '/outputs/' + OUTPUT_NAME + '/'
dest_file = './outputs/' + OUTPUT_NAME + '.csv' dest_file = './outputs/' + OUTPUT_NAME + '.csv'
@ -95,8 +95,7 @@ class SynthVariant(object):
self.name = voice self.name = voice
self.lang = lang self.lang = lang
self.phoneme_capable = self.is_phoneme_capable() self.phoneme_capable = self.is_phoneme_capable()
if self.phoneme_capable:
create_dir(dest_dir + self.name + '/' + str(self.rate))
def __repr__(self): def __repr__(self):
return 'Synthesizer[{} - {}]'.format(self.name, self.rate) return 'Synthesizer[{} - {}]'.format(self.name, self.rate)
@ -128,6 +127,10 @@ class SynthVariant(object):
cli_gen_audio(phon_cmd, self.rate, self.name, d_path) cli_gen_audio(phon_cmd, self.rate, self.name, d_path)
return SynthFile(word, phoneme, r_path, self.name, self.lang, self.rate, variant) return SynthFile(word, phoneme, r_path, self.name, self.lang, self.rate, variant)
def create_synth_dirs(self):
if self.phoneme_capable:
create_dir(dest_dir + self.name + '/' + str(self.rate))
@staticmethod @staticmethod
def voices_for_lang(lang): def voices_for_lang(lang):
voices_installed = NSSpeechSynthesizer.availableVoices() voices_installed = NSSpeechSynthesizer.availableVoices()
@ -168,9 +171,10 @@ def synth_generator():
def synth_for_words(words, writer): def synth_for_words(words, writer):
prog_title = "Synthesizing {} words : ".format(len(words)) prog_title = "Synthesizing {} words : ".format(len(words))
(update, prog) = prog_bar(prog_title)
for s in voice_synths: for s in voice_synths:
s.create_synth_dirs()
for v in ['low', 'medium', 'high']: for v in ['low', 'medium', 'high']:
(update, prog) = prog_bar(prog_title)
for w in prog(words): for w in prog(words):
update('"{}" with {} variant ({})'.format(w, s, v)) update('"{}" with {} variant ({})'.format(w, s, v))
synthed = s.generate_audio(w, v) synthed = s.generate_audio(w, v)
@ -209,13 +213,24 @@ def synth_logger(fname, csv=False):
else: else:
return json_writer, close_file return json_writer, close_file
def generate_audio_for_text_list(text_list):
(writer, closer) = synth_logger(dest_file, csv=True)
synth_for_words = synth_generator()
try:
synth_for_words(text_list, writer)
except:
import traceback
import sys
traceback.print_exc(file=sys.stdout)
pass
closer()
def generate_audio_for_stories(): def generate_audio_for_stories():
story_file = './inputs/all_stories_hs.json' # story_file = './inputs/all_stories_hs.json'
# story_file = './inputs/all_stories.json' story_file = './inputs/all_stories.json'
stories_data = json.load(open(story_file)) stories_data = json.load(open(story_file))
word_list = [t[0] for i in stories_data.values() for t in i] # word_list = [t[0] for i in stories_data.values() for t in i]
# word_list = [i for g in stories_data.values() for i in g] word_list = [i for g in stories_data.values() for i in g]
(writer, closer) = synth_logger(dest_file, csv=True) (writer, closer) = synth_logger(dest_file, csv=True)
synth_for_words = synth_generator() synth_for_words = synth_generator()
try: try:
@ -228,4 +243,5 @@ def generate_audio_for_stories():
closer() closer()
if __name__ == '__main__': if __name__ == '__main__':
generate_audio_for_stories() generate_audio_for_text_list(['random','education'])
# generate_audio_for_stories()