Merge branch 'master' of /home/ilml/Public/Repos/speech_scoring

master
Malar Kannan 2017-11-24 14:26:40 +05:30
commit ec317b6628
2 changed files with 214 additions and 0 deletions

141
speech_similar.py Normal file
View File

@ -0,0 +1,141 @@
import pandas as pd
import pronouncing
import re
import numpy as np
import random
# mapping = {
# s.split()[0]: s.split()[1]
# for s in """
# AA AA
# AE AE
# AH UX
# AO AO
# AW AW
# AY AY
# B b
# CH C
# D d
# DH D
# EH EH
# ER UXr
# EY EY
# F f
# G g
# HH h
# IH IH
# IY IY
# JH J
# K k
# L l
# M m
# N n
# NG N
# OW OW
# OY OY
# P p
# R r
# S s
# SH S
# T t
# TH T
# UH UH
# UW UW
# V v
# W w
# Y y
# Z z
# ZH Z
# """.strip().split('\n')
# }
# sim_mat = pd.read_csv('./similarity.csv', header=0, index_col=0)
#
#
# def convert_ph(ph):
# stress_level = re.search("(\w+)([0-9])", ph)
# if stress_level:
# return stress_level.group(2) + mapping[stress_level.group(1)]
# else:
# return mapping[ph]
#
#
# def sim_mat_to_apple_table(smt):
# colnames = [convert_ph(ph) for ph in smt.index.tolist()]
# smt = pd.DataFrame(np.nan_to_num(smt.values))
# fsmt = (smt.T + smt)
# np.fill_diagonal(fsmt.values, 100.0)
# asmt = pd.DataFrame.copy(fsmt)
# asmt.columns = colnames
# asmt.index = colnames
# apple_sim_table = asmt.stack().reset_index()
# apple_sim_table.columns = ['q', 'r', 's']
# return apple_sim_table
#
#
# apple_sim_table = sim_mat_to_apple_table(sim_mat)
#
#
# def top_match(ph):
# selected = apple_sim_table[(apple_sim_table.q == ph)
# & (apple_sim_table.s < 100) &
# (apple_sim_table.s >= 70)]
# tm = ph
# if len(selected) > 0:
# tm = pd.DataFrame.sort_values(selected, 's', ascending=False).iloc[0].r
# return tm
apple_phonemes = [
'%', '@', 'AE', 'EY', 'AO', 'AX', 'IY', 'EH', 'IH', 'AY', 'IX', 'AA', 'UW',
'UH', 'UX', 'OW', 'AW', 'OY', 'b', 'C', 'd', 'D', 'f', 'g', 'h', 'J', 'k',
'l', 'm', 'n', 'N', 'p', 'r', 's', 'S', 't', 'T', 'v', 'w', 'y', 'z', 'Z'
]
class ApplePhoneme(object):
"""docstring for ApplePhoneme."""
def __init__(self, phone, stress, vowel=False):
super(ApplePhoneme, self).__init__()
self.phone = phone
self.stress = stress
self.vowel = vowel
def __str__(self):
return (str(self.stress) if (self.vowel and self.stress>0) else '') + self.phone
def __repr__(self):
return "'{}'".format(str(self))
def adjust_stress(self):
self.stress = random.choice([i for i in range(3) if i != self.stress])
def parse_apple_phonemes(ph_str):
for i in range(len(ph_str)):
pref, rest = ph_str[:i + 1], ph_str[i + 1:]
if pref in apple_phonemes:
vowel = pref[0] in 'AEIOU'
return [ApplePhoneme(pref, 0, vowel)] + parse_apple_phonemes(rest)
elif pref[0].isdigit() and pref[1:] in apple_phonemes:
return [ApplePhoneme(pref[1:], int(pref[0]) , True)] + parse_apple_phonemes(rest)
elif not pref.isalnum():
return [ApplePhoneme(pref, 0, False)] + parse_apple_phonemes(rest)
return []
def similar_phoneme_word(ph_str):
phons = parse_apple_phonemes(ph_str)
vowels = [i for i in phons if i.vowel]
random.choice(vowels).adjust_stress()
return ''.join([str(i) for i in phons])
def similar_phoneme_phrase(ph_str):
return ' '.join([similar_phoneme_word(w) for w in ph_str.split()])
def similar_word(word_str):
similar = pronouncing.rhymes(word_str)
return random.choice(similar) if len(similar) > 0 else word_str
def similar_phrase(ph_str):
return ' '.join([similar_word(w) for w in ph_str.split()])

73
speech_tts_queue.py Normal file
View File

@ -0,0 +1,73 @@
import objc
from AppKit import *
from Foundation import NSURL
from PyObjCTools import AppHelper
from time import time
apple_phonemes = [
'%', '@', 'AE', 'EY', 'AO', 'AX', 'IY', 'EH', 'IH', 'AY', 'IX', 'AA', 'UW',
'UH', 'UX', 'OW', 'AW', 'OY', 'b', 'C', 'd', 'D', 'f', 'g', 'h', 'J', 'k',
'l', 'm', 'n', 'N', 'p', 'r', 's', 'S', 't', 'T', 'v', 'w', 'y', 'z', 'Z'
]
len(apple_phonemes)
speech_phoneme_data = []
class SpeechDelegate (NSObject):
def speechSynthesizer_willSpeakWord_ofString_(self, sender, word, text):
'''Called automatically when the application has launched'''
print("Speaking word {} in sentence {}".format(word,text))
def speechSynthesizer_willSpeakPhoneme_(self,sender,phoneme):
phon_ch = apple_phonemes[phoneme]
# print('first',speech_phoneme_data)
# prev_time = speech_phoneme_data[-1][1]
# print('prev_time',prev_time)
speech_phoneme_data.append((phon_ch,time()))
print("phoneme boundary for {} time {}".format(phon_ch,time()))
# NSApp().terminate_(self)
def speechSynthesizer_didFinishSpeaking_(self,synth,didFinishSpeaking):
speech_phoneme_data.append(('%',time()))
print("finished speaking time {}".format(time()))
diff_time = []
for i in range(len(speech_phoneme_data)-1):
dur = speech_phoneme_data[i+1][1] - speech_phoneme_data[i][1]
diff_time.append((speech_phoneme_data[i][0],dur))
print(diff_time)
# del SpeechDelegate
class Delegate (NSObject):
def applicationDidFinishLaunching_(self, aNotification):
'''Called automatically when the application has launched'''
print("Window, World!")
def windowWillClose_(self, aNotification):
'''Called automatically when the window is closed'''
print("Window has been closed")
# Terminate the application
NSApp().terminate_(self)
def main():
speech_delg = SpeechDelegate.alloc().init()
speech_delg.speechSynthesizer_didFinishSpeaking_('t',True)
voices = NSSpeechSynthesizer.availableVoices()
identifier = voices[2]
time()
alex_voice = NSSpeechSynthesizer.alloc().initWithVoice_(identifier)
alex_voice.setDelegate_(speech_delg)
alex_voice.startSpeakingString_("This is a test for speech synthesis generation")
# Create a new application instance ...
a=NSApplication.sharedApplication()
# ... and create its delgate. Note the use of the
# Objective C constructors below, because Delegate
# is a subcalss of an Objective C class, NSObject
delegate = Delegate.alloc().init()
# Tell the application which delegate object to use.
a.setDelegate_(delegate)
AppHelper.runEventLoop()
if __name__ == '__main__':
main()