implemented segment-generation for random words for testing
parent
6ef4e86f41
commit
b50edb980d
|
|
@ -5,7 +5,6 @@ from Foundation import NSURL
|
|||
import json
|
||||
import csv
|
||||
import random
|
||||
import string
|
||||
import os
|
||||
import re
|
||||
import subprocess
|
||||
|
|
@ -13,36 +12,12 @@ import time
|
|||
from tqdm import tqdm
|
||||
|
||||
from generate_similar import similar_phoneme_phrase,similar_phrase
|
||||
from speech_tools import hms_string,create_dir,format_filename
|
||||
|
||||
OUTPUT_NAME = 'story_phrases'
|
||||
dest_dir = os.path.abspath('.') + '/outputs/' + OUTPUT_NAME + '/'
|
||||
dest_file = './outputs/' + OUTPUT_NAME + '.csv'
|
||||
|
||||
def hms_string(sec_elapsed):
|
||||
h = int(sec_elapsed / (60 * 60))
|
||||
m = int((sec_elapsed % (60 * 60)) / 60)
|
||||
s = sec_elapsed % 60.
|
||||
return "{}:{:>02}:{:>05.2f}".format(h, m, s)
|
||||
|
||||
def create_dir(direc):
|
||||
if not os.path.exists(direc):
|
||||
os.makedirs(direc)
|
||||
|
||||
def format_filename(s):
|
||||
"""
|
||||
Take a string and return a valid filename constructed from the string.
|
||||
Uses a whitelist approach: any characters not present in valid_chars are
|
||||
removed. Also spaces are replaced with underscores.
|
||||
|
||||
Note: this method may produce invalid filenames such as ``, `.` or `..`
|
||||
When I use this method I prepend a date string like '2009_01_15_19_46_32_'
|
||||
and append a file extension like '.txt', so I avoid the potential of using
|
||||
an invalid filename.
|
||||
"""
|
||||
valid_chars = "-_.() %s%s" % (string.ascii_letters, string.digits)
|
||||
filename = ''.join(c for c in s if c in valid_chars)
|
||||
filename = filename.replace(' ','_') # I don't like spaces in filenames.
|
||||
return filename
|
||||
|
||||
def dest_filename(w, v, r, t):
|
||||
rand_no = str(random.randint(0, 10000))
|
||||
|
|
|
|||
|
|
@ -10,9 +10,7 @@ import json
|
|||
import csv
|
||||
import subprocess
|
||||
from tqdm import tqdm
|
||||
|
||||
from speech_samplegen import SynthVariant, format_filename
|
||||
from speech_tools import create_dir
|
||||
from speech_tools import create_dir,format_filename
|
||||
|
||||
apple_phonemes = [
|
||||
'%', '@', 'AE', 'EY', 'AO', 'AX', 'IY', 'EH', 'IH', 'AY', 'IX', 'AA', 'UW',
|
||||
|
|
@ -20,7 +18,7 @@ apple_phonemes = [
|
|||
'l', 'm', 'n', 'N', 'p', 'r', 's', 'S', 't', 'T', 'v', 'w', 'y', 'z', 'Z'
|
||||
]
|
||||
|
||||
OUTPUT_NAME = 'story_phrases_segments'
|
||||
OUTPUT_NAME = 'story_test_segments'
|
||||
|
||||
dest_dir = os.path.abspath('.') + '/outputs/' + OUTPUT_NAME + '/'
|
||||
csv_dest_file = os.path.abspath('.') + '/outputs/' + OUTPUT_NAME + '.csv'
|
||||
|
|
@ -58,7 +56,10 @@ class Delegate (NSObject):
|
|||
def applicationDidFinishLaunching_(self, aNotification):
|
||||
'''Called automatically when the application has launched'''
|
||||
print("App Launched!")
|
||||
generate_audio()
|
||||
# phrases = story_texts()#random.sample(story_texts(), 100) #
|
||||
phrases = test_texts(30)
|
||||
# print(phrases)
|
||||
generate_audio(phrases)
|
||||
|
||||
|
||||
class PhonemeTiming(object):
|
||||
|
|
@ -181,10 +182,13 @@ def story_texts():
|
|||
text_list = sorted(list(set(text_list_dup)))
|
||||
return text_list
|
||||
|
||||
def test_texts(count=10):
|
||||
word_list = [i.strip('\n_') for i in open('./inputs/wordlist.txt','r').readlines()]
|
||||
text_list = sorted(random.sample(list(set(word_list)),count))
|
||||
return text_list
|
||||
|
||||
def generate_audio():
|
||||
def generate_audio(phrases):
|
||||
synthQ = SynthesizerQueue()
|
||||
phrases = story_texts()#random.sample(story_texts(), 100) #
|
||||
f = open(csv_dest_file, 'w')
|
||||
s_csv_w = csv.writer(f, quoting=csv.QUOTE_MINIMAL)
|
||||
i = 0
|
||||
|
|
|
|||
|
|
@ -1,5 +1,6 @@
|
|||
import os
|
||||
import math
|
||||
import string
|
||||
import threading
|
||||
import multiprocessing
|
||||
import pandas as pd
|
||||
|
|
@ -87,10 +88,15 @@ def apply_by_multiprocessing(df,func,**kwargs):
|
|||
def square(x):
|
||||
return x**x
|
||||
|
||||
if __name__ == '__main__':
|
||||
df = pd.DataFrame({'a':range(10), 'b':range(10)})
|
||||
apply_by_multiprocessing(df, square, axis=1, workers=4)
|
||||
# if __name__ == '__main__':
|
||||
# df = pd.DataFrame({'a':range(10), 'b':range(10)})
|
||||
# apply_by_multiprocessing(df, square, axis=1, workers=4)
|
||||
|
||||
def hms_string(sec_elapsed):
|
||||
h = int(sec_elapsed / (60 * 60))
|
||||
m = int((sec_elapsed % (60 * 60)) / 60)
|
||||
s = sec_elapsed % 60.
|
||||
return "{}:{:>02}:{:>05.2f}".format(h, m, s)
|
||||
|
||||
def rm_rf(d):
|
||||
for path in (os.path.join(d,f) for f in os.listdir(d)):
|
||||
|
|
@ -108,6 +114,22 @@ def create_dir(direc):
|
|||
create_dir(direc)
|
||||
|
||||
|
||||
def format_filename(s):
|
||||
"""
|
||||
Take a string and return a valid filename constructed from the string.
|
||||
Uses a whitelist approach: any characters not present in valid_chars are
|
||||
removed. Also spaces are replaced with underscores.
|
||||
|
||||
Note: this method may produce invalid filenames such as ``, `.` or `..`
|
||||
When I use this method I prepend a date string like '2009_01_15_19_46_32_'
|
||||
and append a file extension like '.txt', so I avoid the potential of using
|
||||
an invalid filename.
|
||||
"""
|
||||
valid_chars = "-_.() %s%s" % (string.ascii_letters, string.digits)
|
||||
filename = ''.join(c for c in s if c in valid_chars)
|
||||
filename = filename.replace(' ','_') # I don't like spaces in filenames.
|
||||
return filename
|
||||
|
||||
#################### Now make the data generator threadsafe ####################
|
||||
|
||||
class threadsafe_iter:
|
||||
|
|
|
|||
Loading…
Reference in New Issue