Merge branch 'master' of ssh://invmac/~/Public/repos/speech-scoring

2017-12-06 17:32:46 +05:30
parent bcf1041bde b50edb980d
commit 8e14db2437
3 changed files with 37 additions and 36 deletions
--- a/speech_samplegen.py
+++ b/speech_samplegen.py
@@ -5,7 +5,6 @@ from Foundation import NSURL
 import json
 import csv
 import random
 import string
 import os
 import re
 import subprocess
@@ -13,36 +12,12 @@ import time
 from tqdm import tqdm
 from generate_similar import similar_phoneme_phrase,similar_phrase
 from speech_tools import hms_string,create_dir,format_filename
 OUTPUT_NAME = 'story_phrases'
 dest_dir = os.path.abspath('.') + '/outputs/' + OUTPUT_NAME + '/'
 dest_file = './outputs/' + OUTPUT_NAME + '.csv'
 def hms_string(sec_elapsed):
    h = int(sec_elapsed / (60 * 60))
    m = int((sec_elapsed % (60 * 60)) / 60)
    s = sec_elapsed % 60.
    return "{}:{:>02}:{:>05.2f}".format(h, m, s)
 def create_dir(direc):
    if not os.path.exists(direc):
        os.makedirs(direc)
 def format_filename(s):
    """
    Take a string and return a valid filename constructed from the string.
    Uses a whitelist approach: any characters not present in valid_chars are
    removed. Also spaces are replaced with underscores.
    Note: this method may produce invalid filenames such as ``, `.` or `..`
    When I use this method I prepend a date string like '2009_01_15_19_46_32_'
    and append a file extension like '.txt', so I avoid the potential of using
    an invalid filename.
    """
    valid_chars = "-_.() %s%s" % (string.ascii_letters, string.digits)
    filename = ''.join(c for c in s if c in valid_chars)
    filename = filename.replace(' ','_') # I don't like spaces in filenames.
    return filename
 def dest_filename(w, v, r, t):
    rand_no = str(random.randint(0, 10000))
--- a/speech_segmentgen.py
+++ b/speech_segmentgen.py
@@ -10,9 +10,7 @@ import json
 import csv
 import subprocess
 from tqdm import tqdm
-
+from speech_tools import create_dir,format_filename
 from speech_samplegen import SynthVariant, format_filename
 from speech_tools import create_dir
 apple_phonemes = [
    '%', '@', 'AE', 'EY', 'AO', 'AX', 'IY', 'EH', 'IH', 'AY', 'IX', 'AA', 'UW',
@@ -20,7 +18,7 @@ apple_phonemes = [
    'l', 'm', 'n', 'N', 'p', 'r', 's', 'S', 't', 'T', 'v', 'w', 'y', 'z', 'Z'
 ]
-OUTPUT_NAME = 'story_phrases_segments'
+OUTPUT_NAME = 'story_test_segments'
 dest_dir = os.path.abspath('.') + '/outputs/' + OUTPUT_NAME + '/'
 csv_dest_file = os.path.abspath('.') + '/outputs/' + OUTPUT_NAME + '.csv'
@@ -58,7 +56,10 @@ class Delegate (NSObject):
    def applicationDidFinishLaunching_(self, aNotification):
        '''Called automatically when the application has launched'''
        print("App Launched!")
-        generate_audio()
+        # phrases = story_texts()#random.sample(story_texts(), 100)  #
        phrases = test_texts(30)
        # print(phrases)
        generate_audio(phrases)
 class PhonemeTiming(object):
@@ -181,10 +182,13 @@ def story_texts():
    text_list = sorted(list(set(text_list_dup)))
    return text_list
 def test_texts(count=10):
    word_list = [i.strip('\n_') for i in open('./inputs/wordlist.txt','r').readlines()]
    text_list = sorted(random.sample(list(set(word_list)),count))
    return text_list
-def generate_audio():
+def generate_audio(phrases):
    synthQ = SynthesizerQueue()
    phrases = story_texts()#random.sample(story_texts(), 100)  # 
    f = open(csv_dest_file, 'w')
    s_csv_w = csv.writer(f, quoting=csv.QUOTE_MINIMAL)
    i = 0
--- a/speech_tools.py
+++ b/speech_tools.py
@@ -1,5 +1,6 @@
 import os
 import math
 import string
 import threading
 import multiprocessing
 import pandas as pd
@@ -101,10 +102,15 @@ def apply_by_multiprocessing(df,func,**kwargs):
 def square(x):
    return x**x
-if __name__ == '__main__':
+# if __name__ == '__main__':
-    df = pd.DataFrame({'a':range(10), 'b':range(10)})
+#     df = pd.DataFrame({'a':range(10), 'b':range(10)})
-    apply_by_multiprocessing(df, square, axis=1, workers=4)
+#     apply_by_multiprocessing(df, square, axis=1, workers=4)
 def hms_string(sec_elapsed):
    h = int(sec_elapsed / (60 * 60))
    m = int((sec_elapsed % (60 * 60)) / 60)
    s = sec_elapsed % 60.
    return "{}:{:>02}:{:>05.2f}".format(h, m, s)
 def rm_rf(d):
    for path in (os.path.join(d,f) for f in os.listdir(d)):
@@ -122,6 +128,22 @@ def create_dir(direc):
        create_dir(direc)
 def format_filename(s):
    """
    Take a string and return a valid filename constructed from the string.
    Uses a whitelist approach: any characters not present in valid_chars are
    removed. Also spaces are replaced with underscores.
    Note: this method may produce invalid filenames such as ``, `.` or `..`
    When I use this method I prepend a date string like '2009_01_15_19_46_32_'
    and append a file extension like '.txt', so I avoid the potential of using
    an invalid filename.
    """
    valid_chars = "-_.() %s%s" % (string.ascii_letters, string.digits)
    filename = ''.join(c for c in s if c in valid_chars)
    filename = filename.replace(' ','_') # I don't like spaces in filenames.
    return filename
 #################### Now make the data generator threadsafe ####################
 class threadsafe_iter: