discarding phoneme incapable synthesizers

master
Malar Kannan 2017-10-26 16:51:32 +05:30
parent a953fa3355
commit e57576d6fa
2 changed files with 16 additions and 4 deletions

View File

@ -2,3 +2,4 @@
1. create spectrograms of 150ms windows with 50ms overlap for each word.
2. train a rnn to output a vector using the spectrograms
3. train a nn to output True/False based on the acceptability of the rnn output. -> Siamese network(implementation detail)
4. validate with real world samples

View File

@ -89,11 +89,16 @@ class SynthVariant(object):
self.rate = rate
self.name = voice
self.lang = lang
self.phoneme_capable = self.is_phoneme_capable()
create_dir(dest_dir + self.name + '/' + str(self.rate))
def __repr__(self):
return 'Synthesizer[{} - {}]'.format(self.name, self.rate)
def is_phoneme_capable(self):
orig_phon = self.synth.phonemesFromText_('water')
return orig_phon != ''
def generate_audio(self, word, variant):
orig_phon, phoneme, phon_cmd = self.synth.phonemesFromText_(
word), '', word
@ -105,7 +110,7 @@ class SynthVariant(object):
phon_cmd = '[[inpt PHON]] ' + phoneme
elif variant == 'high':
phoneme = orig_phon
phon_cmd = word
phon_cmd = '[[inpt PHON]] ' + phoneme
# elif variant == 'long':
# if phon != '':
# self.phone_synth.startSpeakingString_toURL_(phon,d_url)
@ -152,8 +157,11 @@ def synth_generator():
for (i, v, l) in us_voices_ids:
for r in voice_rates:
s = SynthVariant(i, v, l, r)
print('Created ', s)
voice_synths.append(s)
if s.phoneme_capable:
print('Adding ', s)
voice_synths.append(s)
else:
print('Discarding phoneme incapable ', s)
def synth_for_words(words, writer):
# all_synths = []
@ -233,6 +241,9 @@ def generate_audio_for_stories():
# synths = synth_generator()([OUTPUT_NAME])
generate_audio_for_stories()
# write_synths(synths, dest_file, True)
# write_synths(synths,'./outputs/synths.json')
if __name__ == '__main__':
generate_audio_for_stories()