discarding phoneme incapable synthesizers
parent
a953fa3355
commit
e57576d6fa
1
TODO.md
1
TODO.md
|
|
@ -2,3 +2,4 @@
|
||||||
1. create spectrograms of 150ms windows with 50ms overlap for each word.
|
1. create spectrograms of 150ms windows with 50ms overlap for each word.
|
||||||
2. train a rnn to output a vector using the spectrograms
|
2. train a rnn to output a vector using the spectrograms
|
||||||
3. train a nn to output True/False based on the acceptability of the rnn output. -> Siamese network(implementation detail)
|
3. train a nn to output True/False based on the acceptability of the rnn output. -> Siamese network(implementation detail)
|
||||||
|
4. validate with real world samples
|
||||||
|
|
|
||||||
|
|
@ -89,11 +89,16 @@ class SynthVariant(object):
|
||||||
self.rate = rate
|
self.rate = rate
|
||||||
self.name = voice
|
self.name = voice
|
||||||
self.lang = lang
|
self.lang = lang
|
||||||
|
self.phoneme_capable = self.is_phoneme_capable()
|
||||||
create_dir(dest_dir + self.name + '/' + str(self.rate))
|
create_dir(dest_dir + self.name + '/' + str(self.rate))
|
||||||
|
|
||||||
def __repr__(self):
|
def __repr__(self):
|
||||||
return 'Synthesizer[{} - {}]'.format(self.name, self.rate)
|
return 'Synthesizer[{} - {}]'.format(self.name, self.rate)
|
||||||
|
|
||||||
|
def is_phoneme_capable(self):
|
||||||
|
orig_phon = self.synth.phonemesFromText_('water')
|
||||||
|
return orig_phon != ''
|
||||||
|
|
||||||
def generate_audio(self, word, variant):
|
def generate_audio(self, word, variant):
|
||||||
orig_phon, phoneme, phon_cmd = self.synth.phonemesFromText_(
|
orig_phon, phoneme, phon_cmd = self.synth.phonemesFromText_(
|
||||||
word), '', word
|
word), '', word
|
||||||
|
|
@ -105,7 +110,7 @@ class SynthVariant(object):
|
||||||
phon_cmd = '[[inpt PHON]] ' + phoneme
|
phon_cmd = '[[inpt PHON]] ' + phoneme
|
||||||
elif variant == 'high':
|
elif variant == 'high':
|
||||||
phoneme = orig_phon
|
phoneme = orig_phon
|
||||||
phon_cmd = word
|
phon_cmd = '[[inpt PHON]] ' + phoneme
|
||||||
# elif variant == 'long':
|
# elif variant == 'long':
|
||||||
# if phon != '':
|
# if phon != '':
|
||||||
# self.phone_synth.startSpeakingString_toURL_(phon,d_url)
|
# self.phone_synth.startSpeakingString_toURL_(phon,d_url)
|
||||||
|
|
@ -152,8 +157,11 @@ def synth_generator():
|
||||||
for (i, v, l) in us_voices_ids:
|
for (i, v, l) in us_voices_ids:
|
||||||
for r in voice_rates:
|
for r in voice_rates:
|
||||||
s = SynthVariant(i, v, l, r)
|
s = SynthVariant(i, v, l, r)
|
||||||
print('Created ', s)
|
if s.phoneme_capable:
|
||||||
|
print('Adding ', s)
|
||||||
voice_synths.append(s)
|
voice_synths.append(s)
|
||||||
|
else:
|
||||||
|
print('Discarding phoneme incapable ', s)
|
||||||
|
|
||||||
def synth_for_words(words, writer):
|
def synth_for_words(words, writer):
|
||||||
# all_synths = []
|
# all_synths = []
|
||||||
|
|
@ -233,6 +241,9 @@ def generate_audio_for_stories():
|
||||||
|
|
||||||
|
|
||||||
# synths = synth_generator()([OUTPUT_NAME])
|
# synths = synth_generator()([OUTPUT_NAME])
|
||||||
generate_audio_for_stories()
|
|
||||||
# write_synths(synths, dest_file, True)
|
# write_synths(synths, dest_file, True)
|
||||||
# write_synths(synths,'./outputs/synths.json')
|
# write_synths(synths,'./outputs/synths.json')
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
generate_audio_for_stories()
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue