fixed dupliate words

master
Malar Kannan 2017-11-15 18:27:49 +05:30
parent 1b0ba26a6e
commit a67ce148d6
1 changed files with 8 additions and 6 deletions

View File

@ -198,9 +198,10 @@ def synth_generator():
prog = tqdm(words) prog = tqdm(words)
prog.set_postfix(variant=v,voice=s.name,rate=s.rate) prog.set_postfix(variant=v,voice=s.name,rate=s.rate)
for w in tqdm(words): for w in tqdm(words):
prog.set_postfix(word=w) prog.set_description('Synthesizing text:"{}"'.format(w))
synthed = s.generate_audio(w, v) synthed = s.generate_audio(w, v)
writer(synthed) writer(synthed)
prog.close()
end_time = time.time() end_time = time.time()
time_str = hms_string(end_time - start_time) time_str = hms_string(end_time - start_time)
print("It took {} to synthsize all variants.".format(time_str)) print("It took {} to synthsize all variants.".format(time_str))
@ -243,20 +244,21 @@ def generate_audio_for_stories():
# story_file = './inputs/all_stories_hs.json' # story_file = './inputs/all_stories_hs.json'
story_file = './inputs/all_stories.json' story_file = './inputs/all_stories.json'
stories_data = json.load(open(story_file)) stories_data = json.load(open(story_file))
# text_list = [t[0] for i in stories_data.values() for t in i] # text_list_dup = [t[0] for i in stories_data.values() for t in i]
text_list = [i for g in stories_data.values() for i in g] text_list_dup = [t for i in stories_data.values() for t in i]
generate_audio_for_text_list(text_list) text_list = sorted(list(set(text_list_dup)))
generate_audio_for_text_list(text_list[:5])
def generate_test_audio_for_stories(): def generate_test_audio_for_stories():
story_file = './inputs/all_stories_hs.json' story_file = './inputs/all_stories_hs.json'
# story_file = './inputs/all_stories.json' # story_file = './inputs/all_stories.json'
stories_data = json.load(open(story_file)) stories_data = json.load(open(story_file))
text_list = [t[0] for i in stories_data.values() for t in i] text_list_dup = [t[0] for i in stories_data.values() for t in i]
text_list = sorted(list(set(text_list_dup)))
# text_list = [i.replace('-','') for g in stories_data.values() for i in g] # text_list = [i.replace('-','') for g in stories_data.values() for i in g]
word_list = [i.strip('\n_') for i in open('./inputs/wordlist.txt','r').readlines()] word_list = [i.strip('\n_') for i in open('./inputs/wordlist.txt','r').readlines()]
text_set = set(text_list) text_set = set(text_list)
new_word_list = [i for i in word_list if i not in text_set and len(i) > 4] new_word_list = [i for i in word_list if i not in text_set and len(i) > 4]
# len(new_word_list)
test_words = new_word_list[:int(len(text_list)/5+1)] test_words = new_word_list[:int(len(text_list)/5+1)]
generate_audio_for_text_list(test_words) generate_audio_for_text_list(test_words)