using csv writer instead as comma in phrases are mis-aligning columns

master
Malar Kannan 2017-11-07 11:56:09 +05:30
parent 15f29895d4
commit 55e2de2f04
2 changed files with 36 additions and 119 deletions

View File

@ -11,17 +11,11 @@ import os
import random import random
import csv import csv
import gc import gc
import progressbar
def get_siamese_pairs(groupF1, groupF2): def prog_bar(title):
group1 = [r for (i, r) in groupF1.iterrows()] widgets = [title, progressbar.Counter(), ' [', progressbar.Bar(), '] - ', progressbar.ETA()]
group2 = [r for (i, r) in groupF2.iterrows()] return progressbar.ProgressBar(widgets=widgets)
diff = [(g1, g2) for g2 in group2 for g1 in group1]
same = [i for i in itertools.combinations(group1, 2)
] + [i for i in itertools.combinations(group2, 2)]
random.shuffle(same)
random.shuffle(diff)
# return (random.sample(same,10), random.sample(diff,10))
return same[:10],diff[:10]
def siamese_pairs(rightGroup, wrongGroup): def siamese_pairs(rightGroup, wrongGroup):
group1 = [r for (i, r) in rightGroup.iterrows()] group1 = [r for (i, r) in rightGroup.iterrows()]
@ -32,73 +26,8 @@ def siamese_pairs(rightGroup, wrongGroup):
random.shuffle(rightRightPairs) random.shuffle(rightRightPairs)
# return (random.sample(same,10), random.sample(diff,10)) # return (random.sample(same,10), random.sample(diff,10))
# return rightRightPairs[:10],rightWrongPairs[:10] # return rightRightPairs[:10],rightWrongPairs[:10]
return rightRightPairs,rightWrongPairs return rightRightPairs[:32],rightWrongPairs[:32]
# return rightRightPairs,rightWrongPairs
def append_zeros(spgr, max_samples):
return np.lib.pad(spgr, [(0, max_samples - spgr.shape[0]), (0, 0)],
'median')
def padd_zeros(spgr, max_samples):
return np.lib.pad(spgr, [(0, max_samples - spgr.shape[0]), (0, 0)],
'constant')
def to_onehot(a,class_count=2):
a_row_n = a.shape[0]
b = np.zeros((a_row_n, class_count))
b[np.arange(a_row_n), a] = 1
return b
def create_pair(l, r, max_samples):
l_sample = padd_zeros(l, max_samples)
r_sample = padd_zeros(r, max_samples)
return np.asarray([l_sample, r_sample])
def create_test_pair(l, r, max_samples):
l_sample = append_zeros(l, max_samples)
r_sample = append_zeros(r, max_samples)
return np.asarray([[l_sample, r_sample]])
def create_X(sp, max_samples):
return create_pair(sp[0]['spectrogram'], sp[1]['spectrogram'], max_samples)
# def get_word_pairs_data(word, max_samples):
# audio_samples = pd.read_csv(
# './outputs/audio.csv',
# names=['word', 'voice', 'rate', 'variant', 'file'])
# audio_samples = audio_samples.loc[audio_samples['word'] ==
# word].reset_index(drop=True)
# audio_samples.loc[:, 'spectrogram'] = audio_samples.loc[:, 'file'].apply(
# lambda x: 'outputs/audio/' + x).apply(generate_aiff_spectrogram)
# max_samples = audio_samples['spectrogram'].apply(
# lambda x: x.shape[0]).max()
# same_data, diff_data = [], []
# for (w, g) in audio_samples.groupby(audio_samples['word']):
# sample_norm = g.loc[audio_samples['variant'] == 'normal']
# sample_phon = g.loc[audio_samples['variant'] == 'phoneme']
# same, diff = get_siamese_pairs(sample_norm, sample_phon)
# same_data.extend([create_X(s, max_samples) for s in same])
# diff_data.extend([create_X(d, max_samples) for d in diff])
# Y = np.hstack([np.ones(len(same_data)), np.zeros(len(diff_data))])
# X = np.asarray(same_data + diff_data)
# # tr_pairs, te_pairs, tr_y, te_y = train_test_split(X, Y, test_size=0.1)
# return (X, Y)
def create_spectrogram_data(audio_group='audio'):
audio_samples = pd.read_csv( './outputs/' + audio_group + '.csv'
, names=['word','phonemes', 'voice', 'language', 'rate', 'variant', 'file']
, quoting=csv.QUOTE_NONE)
# audio_samples = audio_samples.loc[audio_samples['word'] ==
# 'sunflowers'].reset_index(drop=True)
audio_samples['file_paths'] = audio_samples.loc[:, 'file'].apply(lambda x: 'outputs/' + audio_group + '/' + x)
audio_samples['file_exists'] = apply_by_multiprocessing(audio_samples['file_paths'], os.path.exists)
audio_samples = audio_samples[audio_samples['file_exists'] == True].reset_index()
audio_samples['spectrogram'] = apply_by_multiprocessing(audio_samples['file_paths'],generate_aiff_spectrogram)#.apply(
audio_samples['window_count'] = audio_samples.loc[:,'spectrogram'].apply(lambda x: x.shape[0])
audio_samples.to_pickle('outputs/{}-spectrogram.pkl'.format(audio_group))
def create_spectrogram_tfrecords(audio_group='audio'): def create_spectrogram_tfrecords(audio_group='audio'):
''' '''
@ -113,7 +42,9 @@ def create_spectrogram_tfrecords(audio_group='audio'):
audio_samples['file_path'] = audio_samples.loc[:, 'file'].apply(lambda x: 'outputs/' + audio_group + '/' + x) audio_samples['file_path'] = audio_samples.loc[:, 'file'].apply(lambda x: 'outputs/' + audio_group + '/' + x)
audio_samples['file_exists'] = apply_by_multiprocessing(audio_samples['file_path'], os.path.exists) audio_samples['file_exists'] = apply_by_multiprocessing(audio_samples['file_path'], os.path.exists)
audio_samples = audio_samples[audio_samples['file_exists'] == True].reset_index() audio_samples = audio_samples[audio_samples['file_exists'] == True].reset_index()
audio_samples['rate_int'] = apply_by_multiprocessing(audio_samples['rate'], str.isdigit)
audio_samples = audio_samples[audio_samples['rate_int'] == True].reset_index().drop(['level_0'],axis=1)
audio_samples['rate'] = audio_samples['rate'].astype(int)
def _float_feature(value): def _float_feature(value):
return tf.train.Feature(float_list=tf.train.FloatList(value=value)) return tf.train.Feature(float_list=tf.train.FloatList(value=value))
@ -124,7 +55,8 @@ def create_spectrogram_tfrecords(audio_group='audio'):
return tf.train.Feature(bytes_list=tf.train.BytesList(value=value)) return tf.train.Feature(bytes_list=tf.train.BytesList(value=value))
writer = tf.python_io.TFRecordWriter('./outputs/' + audio_group + '.tfrecords') writer = tf.python_io.TFRecordWriter('./outputs/' + audio_group + '.tfrecords')
for (w, word_group) in audio_samples.groupby(audio_samples['word']): prog = prog_bar('Generating siamese pairs : ')
for (w, word_group) in prog(audio_samples.groupby(audio_samples['word'])):
g = word_group.reset_index() g = word_group.reset_index()
g['spectrogram'] = apply_by_multiprocessing(g['file_path'],generate_aiff_spectrogram) g['spectrogram'] = apply_by_multiprocessing(g['file_path'],generate_aiff_spectrogram)
sample_right = g.loc[audio_samples['variant'] == 'low'] sample_right = g.loc[audio_samples['variant'] == 'low']
@ -186,24 +118,6 @@ def read_siamese_tfrecords(audio_group='audio'):
output_class.append(output) output_class.append(output)
return input_pairs,output_class return input_pairs,output_class
def create_speech_pairs_data(audio_group='audio'):
audio_samples = pd.read_pickle('outputs/{}-spectrogram.pkl'.format(audio_group))
# sample_size = audio_samples['spectrogram'][0].shape[1]
tr_audio_samples,te_audio_samples = train_test_split(audio_samples, test_size=0.1)
def save_samples_for(sample_name,samples):
print('generating {} siamese speech pairs'.format(sample_name))
X,Y = create_tagged_data(samples)
print('shuffling array speech pairs')
rng_state = np.random.get_state()
np.random.shuffle(X)
np.random.set_state(rng_state)
np.random.shuffle(Y)
print('pickling X/Y')
np.save('outputs/{}-train-X.npy'.format(audio_group), X)
np.save('outputs/{}-train-Y.npy'.format(audio_group), Y)
save_samples_for('train',tr_audio_samples)
save_samples_for('test',te_audio_samples)
def audio_samples_word_count(audio_group='audio'): def audio_samples_word_count(audio_group='audio'):
audio_group = 'story_all' audio_group = 'story_all'
audio_samples = pd.read_csv( './outputs/' + audio_group + '.csv' audio_samples = pd.read_csv( './outputs/' + audio_group + '.csv'
@ -216,15 +130,23 @@ def audio_samples_word_count(audio_group='audio'):
audio_samples = audio_samples[audio_samples['file_exists'] == True].reset_index() audio_samples = audio_samples[audio_samples['file_exists'] == True].reset_index()
return len(audio_samples.groupby(audio_samples['word'])) return len(audio_samples.groupby(audio_samples['word']))
def speech_model_data(): def fix_csv(audio_group='audio'):
tr_pairs = np.load('outputs/tr_pairs.npy') / 255.0 audio_group = 'story_all'
te_pairs = np.load('outputs/te_pairs.npy') / 255.0 audio_samples = pd.read_csv( './outputs/story_words.csv'
tr_pairs[tr_pairs < 0] = 0 , names=['word','phonemes', 'voice', 'language', 'rate', 'variant', 'file']
te_pairs[te_pairs < 0] = 0 , quoting=csv.QUOTE_NONE)
tr_y = np.load('outputs/tr_y.npy') voice_set = set(audio_samples['voice'].unique().tolist())
te_y = np.load('outputs/te_y.npy') audio_csv_lines = open('./outputs/' + audio_group + '.csv','r').readlines()
return tr_pairs, te_pairs, tr_y, te_y audio_csv_data = [i.strip().split(',') for i in audio_csv_lines]
to_be_fixed = [i for i in audio_csv_data if len(i) > 7]
def unite_words(entries):
entries = to_be_fixed[0]
word_entries = next(((entries[:i],entries[i:]) for (i,e) in enumerate(entries) if e in voice_set),'')
word_entries[1]
return
to_be_fixed[0]
entries = [unite_words for e in to_be_fixed]
[i for i in entries if len(i) % 2 != 0]
if __name__ == '__main__': if __name__ == '__main__':
# sunflower_pairs_data() # sunflower_pairs_data()

View File

@ -3,6 +3,7 @@ from AppKit import NSSpeechSynthesizer, NSSpeechInputModeProperty
from AppKit import NSSpeechModePhoneme from AppKit import NSSpeechModePhoneme
from Foundation import NSURL from Foundation import NSURL
import json import json
import csv
import random import random
import os import os
import re import re
@ -81,6 +82,11 @@ class SynthFile(object):
return ','.join([str(c) for c in cols])+'\n' return ','.join([str(c) for c in cols])+'\n'
def get_values(self):
cols = [self.word, self.phoneme, self.voice,
self.voice_lang, self.rate, self.variant,
self.filename]
return [str(c) for c in cols]
class SynthVariant(object): class SynthVariant(object):
"""docstring for SynthVariant.""" """docstring for SynthVariant."""
@ -191,22 +197,11 @@ def synth_generator():
print("It took {} to synthsize all variants.".format(time_str)) print("It took {} to synthsize all variants.".format(time_str))
return synth_for_words return synth_for_words
def write_synths(synth_list, fname, csv=False):
f = open(fname, 'w')
if csv:
for s in synth_list:
f.write(s.get_csv())
else:
json.dump([s.get_json() for s in synth_list], f)
f.close()
def synth_logger(fname, csv=False): def synth_logger(fname, csv=False):
f = open(fname, 'w') f = open(fname, 'w')
s_csv_w = csv.writer(f, quoting=csv.QUOTE_MINIMAL)
def csv_writer(s): def csv_writer(s):
f.write(s.get_csv()) s_csv_w.writerow(s.get_values())
synth_list = [] synth_list = []
def json_writer(s): def json_writer(s):