diff --git a/speech_samplegen.py b/speech_samplegen.py index 2ff5849..14d74cd 100644 --- a/speech_samplegen.py +++ b/speech_samplegen.py @@ -12,9 +12,9 @@ import time from tqdm import tqdm from generate_similar import similar_phoneme_phrase,similar_phrase -from speech_tools import hms_string,create_dir,format_filename +from speech_tools import hms_string,create_dir,format_filename,reservoir_sample -OUTPUT_NAME = 'story_phrases' +OUTPUT_NAME = 'test_5_words' dest_dir = os.path.abspath('.') + '/outputs/' + OUTPUT_NAME + '/' dest_file = './outputs/' + OUTPUT_NAME + '.csv' @@ -227,7 +227,7 @@ def generate_audio_for_stories(): text_list = sorted(list(set(text_list_dup))) generate_audio_for_text_list(text_list) -def generate_test_audio_for_stories(): +def generate_test_audio_for_stories(sample_count=0): ''' Picks a list of words from the wordlist that are not in story words and generates the variants @@ -241,11 +241,12 @@ def generate_test_audio_for_stories(): word_list = [i.strip('\n_') for i in open('./inputs/wordlist.txt','r').readlines()] text_set = set(text_list) new_word_list = [i for i in word_list if i not in text_set and len(i) > 4] - test_words = new_word_list[:int(len(text_list)/5+1)] + # test_words = new_word_list[:int(len(text_list)/5+1)] + test_words = reservoir_sample(new_word_list,sample_count) if sample_count > 0 else new_word_list generate_audio_for_text_list(test_words) if __name__ == '__main__': - # generate_test_audio_for_stories() + generate_test_audio_for_stories(5) # generate_audio_for_text_list(['I want to go home','education']) - generate_audio_for_stories() + # generate_audio_for_stories() diff --git a/speech_segmentgen.py b/speech_segmentgen.py index 4b3010d..b03995a 100644 --- a/speech_segmentgen.py +++ b/speech_segmentgen.py @@ -57,7 +57,8 @@ class Delegate (NSObject): '''Called automatically when the application has launched''' print("App Launched!") # phrases = story_texts()#random.sample(story_texts(), 100) # - phrases = test_texts(30) + # phrases = test_texts(30) + phrases = story_words() # print(phrases) generate_audio(phrases) @@ -174,14 +175,19 @@ class SynthesizerQueue(object): def story_texts(): - # story_file = './inputs/all_stories_hs.json' story_file = './inputs/all_stories.json' stories_data = json.load(open(story_file)) - # text_list_dup = [t[0] for i in stories_data.values() for t in i] text_list_dup = [t for i in stories_data.values() for t in i] text_list = sorted(list(set(text_list_dup))) return text_list +def story_words(): + story_file = './inputs/all_stories_hs.json' + stories_data = json.load(open(story_file)) + text_list_dup = [t[0] for i in stories_data.values() for t in i] + text_list = sorted(list(set(text_list_dup))) + return text_list + def test_texts(count=10): word_list = [i.strip('\n_') for i in open('./inputs/wordlist.txt','r').readlines()] text_list = sorted(random.sample(list(set(word_list)),count))