Compare commits

...

3 Commits

Author SHA1 Message Date
Malar Kannan 1b0ba26a6e Merge branch 'master' of ssh://invnuc/~/Public/Repos/speech_scoring 2017-11-15 14:17:15 +05:30
Malar Kannan e9f54c7f6f 1. tuned batchsize
2. fixed last batch carry-over
2017-11-15 14:14:17 +05:30
Malar Kannan 7684ab3a74 ported to tqdm 2017-11-14 22:59:51 +05:30
4 changed files with 46 additions and 53 deletions

View File

@ -161,7 +161,7 @@ def read_siamese_tfrecords_generator(audio_group='audio',batch_size=32,test_size
input_data.append(np.asarray([p_spec1,p_spec2])) input_data.append(np.asarray([p_spec1,p_spec2]))
output = example.features.feature['output'].int64_list.value output = example.features.feature['output'].int64_list.value
output_data.append(np.asarray(output)) output_data.append(np.asarray(output))
if len(input_data) == batch_size: if len(input_data) == batch_size or i == n_records-1:
input_arr = np.asarray(input_data) input_arr = np.asarray(input_data)
output_arr = np.asarray(output_data) output_arr = np.asarray(output_data)
yield ([input_arr[:, 0], input_arr[:, 1]],output_arr) yield ([input_arr[:, 0], input_arr[:, 1]],output_arr)

View File

@ -12,7 +12,7 @@ from keras.utils import to_categorical
from keras.optimizers import RMSprop from keras.optimizers import RMSprop
from keras.callbacks import TensorBoard, ModelCheckpoint from keras.callbacks import TensorBoard, ModelCheckpoint
from keras import backend as K from keras import backend as K
from speech_tools import create_dir from speech_tools import create_dir,step_count
# def euclidean_distance(vects): # def euclidean_distance(vects):
# x, y = vects # x, y = vects
@ -96,7 +96,7 @@ def load_model_arch(mod_file):
def train_siamese(audio_group = 'audio'): def train_siamese(audio_group = 'audio'):
# the data, shuffled and split between train and test sets # the data, shuffled and split between train and test sets
# tr_pairs, te_pairs, tr_y_e, te_y_e = speech_model_data() # tr_pairs, te_pairs, tr_y_e, te_y_e = speech_model_data()
batch_size = 128 batch_size = 256
model_dir = './models/'+audio_group model_dir = './models/'+audio_group
create_dir(model_dir) create_dir(model_dir)
log_dir = './logs/'+audio_group log_dir = './logs/'+audio_group
@ -141,12 +141,14 @@ def train_siamese(audio_group = 'audio'):
# epochs=100, # epochs=100,
# validation_data=([te_pairs[:, 0], te_pairs[:, 1]], te_y), # validation_data=([te_pairs[:, 0], te_pairs[:, 1]], te_y),
# callbacks=[tb_cb, cp_cb]) # callbacks=[tb_cb, cp_cb])
epoch_n_steps = step_count(n_records,batch_size)
model.fit_generator(tr_gen model.fit_generator(tr_gen
,epochs=1000 , epochs=1000
,steps_per_epoch=n_records//batch_size , steps_per_epoch=epoch_n_steps
,validation_data=([te_pairs[:, 0], te_pairs[:, 1]], te_y) , validation_data=([te_pairs[:, 0], te_pairs[:, 1]], te_y)
,use_multiprocessing=True, workers=1 # ,use_multiprocessing=True, workers=1
,callbacks=[tb_cb, cp_cb]) , max_queue_size=32
, callbacks=[tb_cb, cp_cb])
model.save(model_dir+'/siamese_speech_model-final.h5') model.save(model_dir+'/siamese_speech_model-final.h5')
# compute final accuracy on training and test sets # compute final accuracy on training and test sets
# y_pred = model.predict([tr_pairs[:, 0], tr_pairs[:, 1]]) # y_pred = model.predict([tr_pairs[:, 0], tr_pairs[:, 1]])
@ -160,5 +162,5 @@ def train_siamese(audio_group = 'audio'):
if __name__ == '__main__': if __name__ == '__main__':
train_siamese('story_words') train_siamese('story_words_test')
# train_siamese('audio') # train_siamese('audio')

View File

@ -5,16 +5,16 @@ from Foundation import NSURL
import json import json
import csv import csv
import random import random
import string
import os import os
import re import re
import subprocess import subprocess
import time import time
import progressbar from tqdm import tqdm
from generate_similar import similar_phoneme_phrase,similar_phrase from generate_similar import similar_phoneme_phrase,similar_phrase
from speech_tools import format_filename
OUTPUT_NAME = 'story_words_test' OUTPUT_NAME = 'story_phrases'
dest_dir = os.path.abspath('.') + '/outputs/' + OUTPUT_NAME + '/' dest_dir = os.path.abspath('.') + '/outputs/' + OUTPUT_NAME + '/'
dest_file = './outputs/' + OUTPUT_NAME + '.csv' dest_file = './outputs/' + OUTPUT_NAME + '.csv'
@ -24,21 +24,25 @@ def hms_string(sec_elapsed):
s = sec_elapsed % 60. s = sec_elapsed % 60.
return "{}:{:>02}:{:>05.2f}".format(h, m, s) return "{}:{:>02}:{:>05.2f}".format(h, m, s)
def prog_bar(title):
widgets = [title, progressbar.Counter(), 'th entry - ', progressbar.FormatLabel(
''), ' [', progressbar.Bar(), '] - ', progressbar.ETA()]
prog = progressbar.ProgressBar(widgets=widgets)
def update_prog(current):
widgets[3] = progressbar.FormatLabel(current)
prog.update()
return (update_prog, prog)
def create_dir(direc): def create_dir(direc):
if not os.path.exists(direc): if not os.path.exists(direc):
os.makedirs(direc) os.makedirs(direc)
def format_filename(s):
"""
Take a string and return a valid filename constructed from the string.
Uses a whitelist approach: any characters not present in valid_chars are
removed. Also spaces are replaced with underscores.
Note: this method may produce invalid filenames such as ``, `.` or `..`
When I use this method I prepend a date string like '2009_01_15_19_46_32_'
and append a file extension like '.txt', so I avoid the potential of using
an invalid filename.
"""
valid_chars = "-_.() %s%s" % (string.ascii_letters, string.digits)
filename = ''.join(c for c in s if c in valid_chars)
filename = filename.replace(' ','_') # I don't like spaces in filenames.
return filename
def dest_filename(w, v, r, t): def dest_filename(w, v, r, t):
rand_no = str(random.randint(0, 10000)) rand_no = str(random.randint(0, 10000))
@ -55,7 +59,7 @@ def dest_path(v, r, n):
def cli_gen_audio(speech_cmd, rate, voice, out_path): def cli_gen_audio(speech_cmd, rate, voice, out_path):
subprocess.call( subprocess.call(
['say', '-v', voice, '-r', ['say', '-v', voice, '-r',
str(rate), '-o', out_path, speech_cmd]) str(rate), '-o', out_path, "'"+speech_cmd+"'"])
class SynthFile(object): class SynthFile(object):
@ -173,7 +177,7 @@ class SynthVariant(object):
def synth_generator(): def synth_generator():
us_voices_ids = SynthVariant.voices_for_lang('en-US') us_voices_ids = SynthVariant.voices_for_lang('en-US')
voice_rates = [150, 180, 210, 250] voice_rates = [150, 180, 210]#, 250]
voice_synths = [] voice_synths = []
create_dir(dest_dir) create_dir(dest_dir)
for vp in us_voices_ids: for vp in us_voices_ids:
@ -191,9 +195,10 @@ def synth_generator():
for s in voice_synths: for s in voice_synths:
s.create_synth_dirs() s.create_synth_dirs()
for v in ['low', 'medium', 'high']: for v in ['low', 'medium', 'high']:
(update, prog) = prog_bar(prog_title) prog = tqdm(words)
for w in prog(words): prog.set_postfix(variant=v,voice=s.name,rate=s.rate)
update('"{}" with {} variant ({})'.format(w, s, v)) for w in tqdm(words):
prog.set_postfix(word=w)
synthed = s.generate_audio(w, v) synthed = s.generate_audio(w, v)
writer(synthed) writer(synthed)
end_time = time.time() end_time = time.time()
@ -201,7 +206,7 @@ def synth_generator():
print("It took {} to synthsize all variants.".format(time_str)) print("It took {} to synthsize all variants.".format(time_str))
return synth_for_words return synth_for_words
def synth_logger(fname, csv=False): def synth_logger(fname, csv_mode=False):
f = open(fname, 'w') f = open(fname, 'w')
s_csv_w = csv.writer(f, quoting=csv.QUOTE_MINIMAL) s_csv_w = csv.writer(f, quoting=csv.QUOTE_MINIMAL)
def csv_writer(s): def csv_writer(s):
@ -212,18 +217,18 @@ def synth_logger(fname, csv=False):
synth_list.append(s) synth_list.append(s)
def close_file(): def close_file():
if csv: if csv_mode:
f.close() f.close()
else: else:
json.dump([s.get_json() for s in synth_list], f) json.dump([s.get_json() for s in synth_list], f)
f.close() f.close()
if csv: if csv_mode:
return csv_writer, close_file return csv_writer, close_file
else: else:
return json_writer, close_file return json_writer, close_file
def generate_audio_for_text_list(text_list): def generate_audio_for_text_list(text_list):
(writer, closer) = synth_logger(dest_file, csv=True) (writer, closer) = synth_logger(dest_file, csv_mode=True)
synth_for_texts = synth_generator() synth_for_texts = synth_generator()
try: try:
synth_for_texts(text_list, writer) synth_for_texts(text_list, writer)
@ -239,7 +244,7 @@ def generate_audio_for_stories():
story_file = './inputs/all_stories.json' story_file = './inputs/all_stories.json'
stories_data = json.load(open(story_file)) stories_data = json.load(open(story_file))
# text_list = [t[0] for i in stories_data.values() for t in i] # text_list = [t[0] for i in stories_data.values() for t in i]
text_list = [i.replace('-','') for g in stories_data.values() for i in g] text_list = [i for g in stories_data.values() for i in g]
generate_audio_for_text_list(text_list) generate_audio_for_text_list(text_list)
def generate_test_audio_for_stories(): def generate_test_audio_for_stories():
@ -257,6 +262,6 @@ def generate_test_audio_for_stories():
if __name__ == '__main__': if __name__ == '__main__':
generate_test_audio_for_stories() # generate_test_audio_for_stories()
# generate_audio_for_text_list(['I want to go home','education']) # generate_audio_for_text_list(['I want to go home','education'])
# generate_audio_for_stories() generate_audio_for_stories()

View File

@ -1,4 +1,5 @@
import os import os
import math
import threading import threading
import multiprocessing import multiprocessing
import pandas as pd import pandas as pd
@ -11,6 +12,9 @@ from speech_spectrum import plot_stft, generate_spec_frec
SAMPLE_RATE = 22050 SAMPLE_RATE = 22050
N_CHANNELS = 2 N_CHANNELS = 2
def step_count(n_records,batch_size):
return int(math.ceil(n_records*1.0/batch_size))
def file_player(): def file_player():
p_oup = pyaudio.PyAudio() p_oup = pyaudio.PyAudio()
def play_file(audiopath,plot=False): def play_file(audiopath,plot=False):
@ -132,21 +136,3 @@ def threadsafe_generator(f):
def g(*a, **kw): def g(*a, **kw):
return threadsafe_iter(f(*a, **kw)) return threadsafe_iter(f(*a, **kw))
return g return g
def format_filename(s):
"""
Take a string and return a valid filename constructed from the string.
Uses a whitelist approach: any characters not present in valid_chars are
removed. Also spaces are replaced with underscores.
Note: this method may produce invalid filenames such as ``, `.` or `..`
When I use this method I prepend a date string like '2009_01_15_19_46_32_'
and append a file extension like '.txt', so I avoid the potential of using
an invalid filename.
"""
valid_chars = "-_.() %s%s" % (string.ascii_letters, string.digits)
filename = ''.join(c for c in s if c in valid_chars)
filename = filename.replace(' ','_') # I don't like spaces in filenames.
return filename