diff --git a/speech_samplegen.py b/speech_samplegen.py index f71e317..2b6faa4 100644 --- a/speech_samplegen.py +++ b/speech_samplegen.py @@ -5,7 +5,6 @@ from Foundation import NSURL import json import csv import random -import string import os import re import subprocess @@ -13,36 +12,12 @@ import time from tqdm import tqdm from generate_similar import similar_phoneme_phrase,similar_phrase +from speech_tools import hms_string,create_dir,format_filename OUTPUT_NAME = 'story_phrases' dest_dir = os.path.abspath('.') + '/outputs/' + OUTPUT_NAME + '/' dest_file = './outputs/' + OUTPUT_NAME + '.csv' -def hms_string(sec_elapsed): - h = int(sec_elapsed / (60 * 60)) - m = int((sec_elapsed % (60 * 60)) / 60) - s = sec_elapsed % 60. - return "{}:{:>02}:{:>05.2f}".format(h, m, s) - -def create_dir(direc): - if not os.path.exists(direc): - os.makedirs(direc) - -def format_filename(s): - """ - Take a string and return a valid filename constructed from the string. - Uses a whitelist approach: any characters not present in valid_chars are - removed. Also spaces are replaced with underscores. - - Note: this method may produce invalid filenames such as ``, `.` or `..` - When I use this method I prepend a date string like '2009_01_15_19_46_32_' - and append a file extension like '.txt', so I avoid the potential of using - an invalid filename. - """ - valid_chars = "-_.() %s%s" % (string.ascii_letters, string.digits) - filename = ''.join(c for c in s if c in valid_chars) - filename = filename.replace(' ','_') # I don't like spaces in filenames. - return filename def dest_filename(w, v, r, t): rand_no = str(random.randint(0, 10000)) diff --git a/speech_segmentgen.py b/speech_segmentgen.py index a09b949..4b3010d 100644 --- a/speech_segmentgen.py +++ b/speech_segmentgen.py @@ -10,9 +10,7 @@ import json import csv import subprocess from tqdm import tqdm - -from speech_samplegen import SynthVariant, format_filename -from speech_tools import create_dir +from speech_tools import create_dir,format_filename apple_phonemes = [ '%', '@', 'AE', 'EY', 'AO', 'AX', 'IY', 'EH', 'IH', 'AY', 'IX', 'AA', 'UW', @@ -20,7 +18,7 @@ apple_phonemes = [ 'l', 'm', 'n', 'N', 'p', 'r', 's', 'S', 't', 'T', 'v', 'w', 'y', 'z', 'Z' ] -OUTPUT_NAME = 'story_phrases_segments' +OUTPUT_NAME = 'story_test_segments' dest_dir = os.path.abspath('.') + '/outputs/' + OUTPUT_NAME + '/' csv_dest_file = os.path.abspath('.') + '/outputs/' + OUTPUT_NAME + '.csv' @@ -58,7 +56,10 @@ class Delegate (NSObject): def applicationDidFinishLaunching_(self, aNotification): '''Called automatically when the application has launched''' print("App Launched!") - generate_audio() + # phrases = story_texts()#random.sample(story_texts(), 100) # + phrases = test_texts(30) + # print(phrases) + generate_audio(phrases) class PhonemeTiming(object): @@ -181,10 +182,13 @@ def story_texts(): text_list = sorted(list(set(text_list_dup))) return text_list +def test_texts(count=10): + word_list = [i.strip('\n_') for i in open('./inputs/wordlist.txt','r').readlines()] + text_list = sorted(random.sample(list(set(word_list)),count)) + return text_list -def generate_audio(): +def generate_audio(phrases): synthQ = SynthesizerQueue() - phrases = story_texts()#random.sample(story_texts(), 100) # f = open(csv_dest_file, 'w') s_csv_w = csv.writer(f, quoting=csv.QUOTE_MINIMAL) i = 0 diff --git a/speech_tools.py b/speech_tools.py index 1418c09..4771952 100644 --- a/speech_tools.py +++ b/speech_tools.py @@ -1,5 +1,6 @@ import os import math +import string import threading import multiprocessing import pandas as pd @@ -87,10 +88,15 @@ def apply_by_multiprocessing(df,func,**kwargs): def square(x): return x**x -if __name__ == '__main__': - df = pd.DataFrame({'a':range(10), 'b':range(10)}) - apply_by_multiprocessing(df, square, axis=1, workers=4) +# if __name__ == '__main__': +# df = pd.DataFrame({'a':range(10), 'b':range(10)}) +# apply_by_multiprocessing(df, square, axis=1, workers=4) +def hms_string(sec_elapsed): + h = int(sec_elapsed / (60 * 60)) + m = int((sec_elapsed % (60 * 60)) / 60) + s = sec_elapsed % 60. + return "{}:{:>02}:{:>05.2f}".format(h, m, s) def rm_rf(d): for path in (os.path.join(d,f) for f in os.listdir(d)): @@ -108,6 +114,22 @@ def create_dir(direc): create_dir(direc) +def format_filename(s): + """ + Take a string and return a valid filename constructed from the string. + Uses a whitelist approach: any characters not present in valid_chars are + removed. Also spaces are replaced with underscores. + + Note: this method may produce invalid filenames such as ``, `.` or `..` + When I use this method I prepend a date string like '2009_01_15_19_46_32_' + and append a file extension like '.txt', so I avoid the potential of using + an invalid filename. + """ + valid_chars = "-_.() %s%s" % (string.ascii_letters, string.digits) + filename = ''.join(c for c in s if c in valid_chars) + filename = filename.replace(' ','_') # I don't like spaces in filenames. + return filename + #################### Now make the data generator threadsafe #################### class threadsafe_iter: