Merge branch 'master' of ssh://invmac/~/Public/repos/speech-scoring
commit
8e14db2437
|
|
@ -5,7 +5,6 @@ from Foundation import NSURL
|
||||||
import json
|
import json
|
||||||
import csv
|
import csv
|
||||||
import random
|
import random
|
||||||
import string
|
|
||||||
import os
|
import os
|
||||||
import re
|
import re
|
||||||
import subprocess
|
import subprocess
|
||||||
|
|
@ -13,36 +12,12 @@ import time
|
||||||
from tqdm import tqdm
|
from tqdm import tqdm
|
||||||
|
|
||||||
from generate_similar import similar_phoneme_phrase,similar_phrase
|
from generate_similar import similar_phoneme_phrase,similar_phrase
|
||||||
|
from speech_tools import hms_string,create_dir,format_filename
|
||||||
|
|
||||||
OUTPUT_NAME = 'story_phrases'
|
OUTPUT_NAME = 'story_phrases'
|
||||||
dest_dir = os.path.abspath('.') + '/outputs/' + OUTPUT_NAME + '/'
|
dest_dir = os.path.abspath('.') + '/outputs/' + OUTPUT_NAME + '/'
|
||||||
dest_file = './outputs/' + OUTPUT_NAME + '.csv'
|
dest_file = './outputs/' + OUTPUT_NAME + '.csv'
|
||||||
|
|
||||||
def hms_string(sec_elapsed):
|
|
||||||
h = int(sec_elapsed / (60 * 60))
|
|
||||||
m = int((sec_elapsed % (60 * 60)) / 60)
|
|
||||||
s = sec_elapsed % 60.
|
|
||||||
return "{}:{:>02}:{:>05.2f}".format(h, m, s)
|
|
||||||
|
|
||||||
def create_dir(direc):
|
|
||||||
if not os.path.exists(direc):
|
|
||||||
os.makedirs(direc)
|
|
||||||
|
|
||||||
def format_filename(s):
|
|
||||||
"""
|
|
||||||
Take a string and return a valid filename constructed from the string.
|
|
||||||
Uses a whitelist approach: any characters not present in valid_chars are
|
|
||||||
removed. Also spaces are replaced with underscores.
|
|
||||||
|
|
||||||
Note: this method may produce invalid filenames such as ``, `.` or `..`
|
|
||||||
When I use this method I prepend a date string like '2009_01_15_19_46_32_'
|
|
||||||
and append a file extension like '.txt', so I avoid the potential of using
|
|
||||||
an invalid filename.
|
|
||||||
"""
|
|
||||||
valid_chars = "-_.() %s%s" % (string.ascii_letters, string.digits)
|
|
||||||
filename = ''.join(c for c in s if c in valid_chars)
|
|
||||||
filename = filename.replace(' ','_') # I don't like spaces in filenames.
|
|
||||||
return filename
|
|
||||||
|
|
||||||
def dest_filename(w, v, r, t):
|
def dest_filename(w, v, r, t):
|
||||||
rand_no = str(random.randint(0, 10000))
|
rand_no = str(random.randint(0, 10000))
|
||||||
|
|
|
||||||
|
|
@ -10,9 +10,7 @@ import json
|
||||||
import csv
|
import csv
|
||||||
import subprocess
|
import subprocess
|
||||||
from tqdm import tqdm
|
from tqdm import tqdm
|
||||||
|
from speech_tools import create_dir,format_filename
|
||||||
from speech_samplegen import SynthVariant, format_filename
|
|
||||||
from speech_tools import create_dir
|
|
||||||
|
|
||||||
apple_phonemes = [
|
apple_phonemes = [
|
||||||
'%', '@', 'AE', 'EY', 'AO', 'AX', 'IY', 'EH', 'IH', 'AY', 'IX', 'AA', 'UW',
|
'%', '@', 'AE', 'EY', 'AO', 'AX', 'IY', 'EH', 'IH', 'AY', 'IX', 'AA', 'UW',
|
||||||
|
|
@ -20,7 +18,7 @@ apple_phonemes = [
|
||||||
'l', 'm', 'n', 'N', 'p', 'r', 's', 'S', 't', 'T', 'v', 'w', 'y', 'z', 'Z'
|
'l', 'm', 'n', 'N', 'p', 'r', 's', 'S', 't', 'T', 'v', 'w', 'y', 'z', 'Z'
|
||||||
]
|
]
|
||||||
|
|
||||||
OUTPUT_NAME = 'story_phrases_segments'
|
OUTPUT_NAME = 'story_test_segments'
|
||||||
|
|
||||||
dest_dir = os.path.abspath('.') + '/outputs/' + OUTPUT_NAME + '/'
|
dest_dir = os.path.abspath('.') + '/outputs/' + OUTPUT_NAME + '/'
|
||||||
csv_dest_file = os.path.abspath('.') + '/outputs/' + OUTPUT_NAME + '.csv'
|
csv_dest_file = os.path.abspath('.') + '/outputs/' + OUTPUT_NAME + '.csv'
|
||||||
|
|
@ -58,7 +56,10 @@ class Delegate (NSObject):
|
||||||
def applicationDidFinishLaunching_(self, aNotification):
|
def applicationDidFinishLaunching_(self, aNotification):
|
||||||
'''Called automatically when the application has launched'''
|
'''Called automatically when the application has launched'''
|
||||||
print("App Launched!")
|
print("App Launched!")
|
||||||
generate_audio()
|
# phrases = story_texts()#random.sample(story_texts(), 100) #
|
||||||
|
phrases = test_texts(30)
|
||||||
|
# print(phrases)
|
||||||
|
generate_audio(phrases)
|
||||||
|
|
||||||
|
|
||||||
class PhonemeTiming(object):
|
class PhonemeTiming(object):
|
||||||
|
|
@ -181,10 +182,13 @@ def story_texts():
|
||||||
text_list = sorted(list(set(text_list_dup)))
|
text_list = sorted(list(set(text_list_dup)))
|
||||||
return text_list
|
return text_list
|
||||||
|
|
||||||
|
def test_texts(count=10):
|
||||||
|
word_list = [i.strip('\n_') for i in open('./inputs/wordlist.txt','r').readlines()]
|
||||||
|
text_list = sorted(random.sample(list(set(word_list)),count))
|
||||||
|
return text_list
|
||||||
|
|
||||||
def generate_audio():
|
def generate_audio(phrases):
|
||||||
synthQ = SynthesizerQueue()
|
synthQ = SynthesizerQueue()
|
||||||
phrases = story_texts()#random.sample(story_texts(), 100) #
|
|
||||||
f = open(csv_dest_file, 'w')
|
f = open(csv_dest_file, 'w')
|
||||||
s_csv_w = csv.writer(f, quoting=csv.QUOTE_MINIMAL)
|
s_csv_w = csv.writer(f, quoting=csv.QUOTE_MINIMAL)
|
||||||
i = 0
|
i = 0
|
||||||
|
|
|
||||||
|
|
@ -1,5 +1,6 @@
|
||||||
import os
|
import os
|
||||||
import math
|
import math
|
||||||
|
import string
|
||||||
import threading
|
import threading
|
||||||
import multiprocessing
|
import multiprocessing
|
||||||
import pandas as pd
|
import pandas as pd
|
||||||
|
|
@ -101,10 +102,15 @@ def apply_by_multiprocessing(df,func,**kwargs):
|
||||||
def square(x):
|
def square(x):
|
||||||
return x**x
|
return x**x
|
||||||
|
|
||||||
if __name__ == '__main__':
|
# if __name__ == '__main__':
|
||||||
df = pd.DataFrame({'a':range(10), 'b':range(10)})
|
# df = pd.DataFrame({'a':range(10), 'b':range(10)})
|
||||||
apply_by_multiprocessing(df, square, axis=1, workers=4)
|
# apply_by_multiprocessing(df, square, axis=1, workers=4)
|
||||||
|
|
||||||
|
def hms_string(sec_elapsed):
|
||||||
|
h = int(sec_elapsed / (60 * 60))
|
||||||
|
m = int((sec_elapsed % (60 * 60)) / 60)
|
||||||
|
s = sec_elapsed % 60.
|
||||||
|
return "{}:{:>02}:{:>05.2f}".format(h, m, s)
|
||||||
|
|
||||||
def rm_rf(d):
|
def rm_rf(d):
|
||||||
for path in (os.path.join(d,f) for f in os.listdir(d)):
|
for path in (os.path.join(d,f) for f in os.listdir(d)):
|
||||||
|
|
@ -122,6 +128,22 @@ def create_dir(direc):
|
||||||
create_dir(direc)
|
create_dir(direc)
|
||||||
|
|
||||||
|
|
||||||
|
def format_filename(s):
|
||||||
|
"""
|
||||||
|
Take a string and return a valid filename constructed from the string.
|
||||||
|
Uses a whitelist approach: any characters not present in valid_chars are
|
||||||
|
removed. Also spaces are replaced with underscores.
|
||||||
|
|
||||||
|
Note: this method may produce invalid filenames such as ``, `.` or `..`
|
||||||
|
When I use this method I prepend a date string like '2009_01_15_19_46_32_'
|
||||||
|
and append a file extension like '.txt', so I avoid the potential of using
|
||||||
|
an invalid filename.
|
||||||
|
"""
|
||||||
|
valid_chars = "-_.() %s%s" % (string.ascii_letters, string.digits)
|
||||||
|
filename = ''.join(c for c in s if c in valid_chars)
|
||||||
|
filename = filename.replace(' ','_') # I don't like spaces in filenames.
|
||||||
|
return filename
|
||||||
|
|
||||||
#################### Now make the data generator threadsafe ####################
|
#################### Now make the data generator threadsafe ####################
|
||||||
|
|
||||||
class threadsafe_iter:
|
class threadsafe_iter:
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue