Compare commits
5 Commits
6fbf06814c
...
6ab84b4dc2
| Author | SHA1 | Date |
|---|---|---|
|
|
6ab84b4dc2 | |
|
|
d4454b6434 | |
|
|
45977a819d | |
|
|
4188585488 | |
|
|
2d9b12af95 |
|
|
@ -4,8 +4,9 @@ import numpy as np
|
||||||
from speech_data import speech_model_data
|
from speech_data import speech_model_data
|
||||||
from keras.models import Model,load_model
|
from keras.models import Model,load_model
|
||||||
from keras.layers import Input, Dense, Dropout, LSTM, Lambda, Concatenate
|
from keras.layers import Input, Dense, Dropout, LSTM, Lambda, Concatenate
|
||||||
# from keras.losses import categorical_crossentropy
|
from keras.losses import categorical_crossentropy
|
||||||
from keras.losses import binary_crossentropy
|
# from keras.losses import binary_crossentropy
|
||||||
|
from keras.utils import to_categorical
|
||||||
# from keras.utils.np_utils import to_categorical
|
# from keras.utils.np_utils import to_categorical
|
||||||
from keras.optimizers import RMSprop
|
from keras.optimizers import RMSprop
|
||||||
from keras.callbacks import TensorBoard, ModelCheckpoint
|
from keras.callbacks import TensorBoard, ModelCheckpoint
|
||||||
|
|
@ -30,15 +31,14 @@ def contrastive_loss(y_true, y_pred):
|
||||||
return K.mean(y_true * K.square(y_pred) +
|
return K.mean(y_true * K.square(y_pred) +
|
||||||
(1 - y_true) * K.square(K.maximum(1 - y_pred, 0)))
|
(1 - y_true) * K.square(K.maximum(1 - y_pred, 0)))
|
||||||
|
|
||||||
|
|
||||||
def create_base_rnn_network(input_dim):
|
def create_base_rnn_network(input_dim):
|
||||||
'''Base network to be shared (eq. to feature extraction).
|
'''Base network to be shared (eq. to feature extraction).
|
||||||
'''
|
'''
|
||||||
inp = Input(shape=input_dim)
|
inp = Input(shape=input_dim)
|
||||||
ls1 = LSTM(1024, return_sequences=True)(inp)
|
ls1 = LSTM(256, return_sequences=True)(inp)
|
||||||
ls2 = LSTM(512, return_sequences=True)(ls1)
|
ls2 = LSTM(128, return_sequences=True)(ls1)
|
||||||
# ls3 = LSTM(32, return_sequences=True)(ls2)
|
# ls3 = LSTM(32, return_sequences=True)(ls2)
|
||||||
ls4 = LSTM(32)(ls2)
|
ls4 = LSTM(64)(ls2)
|
||||||
return Model(inp, ls4)
|
return Model(inp, ls4)
|
||||||
|
|
||||||
|
|
||||||
|
|
@ -52,15 +52,15 @@ def compute_accuracy(y_true, y_pred):
|
||||||
def accuracy(y_true, y_pred):
|
def accuracy(y_true, y_pred):
|
||||||
'''Compute classification accuracy with a fixed threshold on distances.
|
'''Compute classification accuracy with a fixed threshold on distances.
|
||||||
'''
|
'''
|
||||||
return K.mean(K.equal(y_true, K.cast(y_pred < 0.5, y_true.dtype)))
|
return K.mean(K.equal(y_true, K.cast(y_pred > 0.5, y_true.dtype)))
|
||||||
|
|
||||||
def dense_classifier(processed):
|
def dense_classifier(processed):
|
||||||
conc_proc = Concatenate()(processed)
|
conc_proc = Concatenate()(processed)
|
||||||
d1 = Dense(8, activation='relu')(conc_proc)
|
d1 = Dense(16, activation='relu')(conc_proc)
|
||||||
dr1 = Dropout(0.1)(d1)
|
# dr1 = Dropout(0.1)(d1)
|
||||||
# d2 = Dense(8, activation='relu')(dr1)
|
d2 = Dense(8, activation='relu')(d1)
|
||||||
# dr2 = Dropout(0.1)(d2)
|
# dr2 = Dropout(0.1)(d2)
|
||||||
return Dense(1, activation='sigmoid')(dr1)
|
return Dense(2, activation='softmax')(d2)
|
||||||
|
|
||||||
def siamese_model(input_dim):
|
def siamese_model(input_dim):
|
||||||
# input_dim = (15, 1654)
|
# input_dim = (15, 1654)
|
||||||
|
|
@ -80,7 +80,9 @@ def siamese_model(input_dim):
|
||||||
|
|
||||||
def train_siamese():
|
def train_siamese():
|
||||||
# the data, shuffled and split between train and test sets
|
# the data, shuffled and split between train and test sets
|
||||||
tr_pairs, te_pairs, tr_y, te_y = speech_model_data()
|
tr_pairs, te_pairs, tr_y_e, te_y_e = speech_model_data()
|
||||||
|
tr_y = to_categorical(tr_y_e, num_classes=2)
|
||||||
|
te_y = to_categorical(te_y_e, num_classes=2)
|
||||||
input_dim = (tr_pairs.shape[2], tr_pairs.shape[3])
|
input_dim = (tr_pairs.shape[2], tr_pairs.shape[3])
|
||||||
|
|
||||||
model = siamese_model(input_dim)
|
model = siamese_model(input_dim)
|
||||||
|
|
@ -96,7 +98,7 @@ def train_siamese():
|
||||||
embeddings_layer_names=None,
|
embeddings_layer_names=None,
|
||||||
embeddings_metadata=None)
|
embeddings_metadata=None)
|
||||||
cp_file_fmt = './models/siamese_speech_model-{epoch:02d}-epoch-{val_loss:0.2f}\
|
cp_file_fmt = './models/siamese_speech_model-{epoch:02d}-epoch-{val_loss:0.2f}\
|
||||||
-acc.h5'
|
-acc.h5'
|
||||||
|
|
||||||
cp_cb = ModelCheckpoint(
|
cp_cb = ModelCheckpoint(
|
||||||
cp_file_fmt,
|
cp_file_fmt,
|
||||||
|
|
@ -108,7 +110,7 @@ def train_siamese():
|
||||||
period=1)
|
period=1)
|
||||||
# train
|
# train
|
||||||
rms = RMSprop(lr=0.001)
|
rms = RMSprop(lr=0.001)
|
||||||
model.compile(loss=binary_crossentropy, optimizer=rms, metrics=[accuracy])
|
model.compile(loss=categorical_crossentropy, optimizer=rms, metrics=[accuracy])
|
||||||
model.fit(
|
model.fit(
|
||||||
[tr_pairs[:, 0], tr_pairs[:, 1]],
|
[tr_pairs[:, 0], tr_pairs[:, 1]],
|
||||||
tr_y,
|
tr_y,
|
||||||
|
|
|
||||||
|
|
@ -5,6 +5,7 @@ import speech_data
|
||||||
reload(speech_data)
|
reload(speech_data)
|
||||||
from speech_data import create_test_pair,get_word_pairs_data
|
from speech_data import create_test_pair,get_word_pairs_data
|
||||||
import numpy as np
|
import numpy as np
|
||||||
|
from keras.utils import to_categorical
|
||||||
|
|
||||||
model = siamese_model((15, 1654))
|
model = siamese_model((15, 1654))
|
||||||
model.load_weights('./models/siamese_speech_model-final.h5')
|
model.load_weights('./models/siamese_speech_model-final.h5')
|
||||||
|
|
@ -15,8 +16,9 @@ def predict_recording_with(m,sample_size=15):
|
||||||
inp = create_test_pair(spec1,spec2,sample_size)
|
inp = create_test_pair(spec1,spec2,sample_size)
|
||||||
return m.predict([inp[:, 0], inp[:, 1]])
|
return m.predict([inp[:, 0], inp[:, 1]])
|
||||||
|
|
||||||
predict_recording_with(model)
|
while(True):
|
||||||
|
print(predict_recording_with(model))
|
||||||
|
|
||||||
sunflower_data,sunflower_result = get_word_pairs_data('sunflowers',15)
|
# sunflower_data,sunflower_result = get_word_pairs_data('sweater',15)
|
||||||
sunflower_result
|
# print(np.argmax(model.predict([sunflower_data[:, 0], sunflower_data[:, 1]]),axis=1))
|
||||||
model.predict([sunflower_data[:, 0], sunflower_data[:, 1]]) < 0.5
|
# print(sunflower_result)
|
||||||
|
|
|
||||||
|
|
@ -11,7 +11,7 @@ import progressbar
|
||||||
|
|
||||||
from generate_similar import similar_phoneme,similar_word
|
from generate_similar import similar_phoneme,similar_word
|
||||||
|
|
||||||
OUTPUT_NAME = 'story_words'
|
OUTPUT_NAME = 'rand_edu'
|
||||||
dest_dir = os.path.abspath('.') + '/outputs/' + OUTPUT_NAME + '/'
|
dest_dir = os.path.abspath('.') + '/outputs/' + OUTPUT_NAME + '/'
|
||||||
dest_file = './outputs/' + OUTPUT_NAME + '.csv'
|
dest_file = './outputs/' + OUTPUT_NAME + '.csv'
|
||||||
|
|
||||||
|
|
@ -95,8 +95,7 @@ class SynthVariant(object):
|
||||||
self.name = voice
|
self.name = voice
|
||||||
self.lang = lang
|
self.lang = lang
|
||||||
self.phoneme_capable = self.is_phoneme_capable()
|
self.phoneme_capable = self.is_phoneme_capable()
|
||||||
if self.phoneme_capable:
|
|
||||||
create_dir(dest_dir + self.name + '/' + str(self.rate))
|
|
||||||
|
|
||||||
def __repr__(self):
|
def __repr__(self):
|
||||||
return 'Synthesizer[{} - {}]'.format(self.name, self.rate)
|
return 'Synthesizer[{} - {}]'.format(self.name, self.rate)
|
||||||
|
|
@ -128,6 +127,10 @@ class SynthVariant(object):
|
||||||
cli_gen_audio(phon_cmd, self.rate, self.name, d_path)
|
cli_gen_audio(phon_cmd, self.rate, self.name, d_path)
|
||||||
return SynthFile(word, phoneme, r_path, self.name, self.lang, self.rate, variant)
|
return SynthFile(word, phoneme, r_path, self.name, self.lang, self.rate, variant)
|
||||||
|
|
||||||
|
def create_synth_dirs(self):
|
||||||
|
if self.phoneme_capable:
|
||||||
|
create_dir(dest_dir + self.name + '/' + str(self.rate))
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def voices_for_lang(lang):
|
def voices_for_lang(lang):
|
||||||
voices_installed = NSSpeechSynthesizer.availableVoices()
|
voices_installed = NSSpeechSynthesizer.availableVoices()
|
||||||
|
|
@ -168,9 +171,10 @@ def synth_generator():
|
||||||
|
|
||||||
def synth_for_words(words, writer):
|
def synth_for_words(words, writer):
|
||||||
prog_title = "Synthesizing {} words : ".format(len(words))
|
prog_title = "Synthesizing {} words : ".format(len(words))
|
||||||
(update, prog) = prog_bar(prog_title)
|
|
||||||
for s in voice_synths:
|
for s in voice_synths:
|
||||||
|
s.create_synth_dirs()
|
||||||
for v in ['low', 'medium', 'high']:
|
for v in ['low', 'medium', 'high']:
|
||||||
|
(update, prog) = prog_bar(prog_title)
|
||||||
for w in prog(words):
|
for w in prog(words):
|
||||||
update('"{}" with {} variant ({})'.format(w, s, v))
|
update('"{}" with {} variant ({})'.format(w, s, v))
|
||||||
synthed = s.generate_audio(w, v)
|
synthed = s.generate_audio(w, v)
|
||||||
|
|
@ -209,13 +213,24 @@ def synth_logger(fname, csv=False):
|
||||||
else:
|
else:
|
||||||
return json_writer, close_file
|
return json_writer, close_file
|
||||||
|
|
||||||
|
def generate_audio_for_text_list(text_list):
|
||||||
|
(writer, closer) = synth_logger(dest_file, csv=True)
|
||||||
|
synth_for_words = synth_generator()
|
||||||
|
try:
|
||||||
|
synth_for_words(text_list, writer)
|
||||||
|
except:
|
||||||
|
import traceback
|
||||||
|
import sys
|
||||||
|
traceback.print_exc(file=sys.stdout)
|
||||||
|
pass
|
||||||
|
closer()
|
||||||
|
|
||||||
def generate_audio_for_stories():
|
def generate_audio_for_stories():
|
||||||
story_file = './inputs/all_stories_hs.json'
|
# story_file = './inputs/all_stories_hs.json'
|
||||||
# story_file = './inputs/all_stories.json'
|
story_file = './inputs/all_stories.json'
|
||||||
stories_data = json.load(open(story_file))
|
stories_data = json.load(open(story_file))
|
||||||
word_list = [t[0] for i in stories_data.values() for t in i]
|
# word_list = [t[0] for i in stories_data.values() for t in i]
|
||||||
# word_list = [i for g in stories_data.values() for i in g]
|
word_list = [i for g in stories_data.values() for i in g]
|
||||||
(writer, closer) = synth_logger(dest_file, csv=True)
|
(writer, closer) = synth_logger(dest_file, csv=True)
|
||||||
synth_for_words = synth_generator()
|
synth_for_words = synth_generator()
|
||||||
try:
|
try:
|
||||||
|
|
@ -228,4 +243,5 @@ def generate_audio_for_stories():
|
||||||
closer()
|
closer()
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
generate_audio_for_stories()
|
generate_audio_for_text_list(['random','education'])
|
||||||
|
# generate_audio_for_stories()
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue