implemented segmentation visualization

master
Malar Kannan 2017-11-30 14:49:55 +05:30
parent 0b1152b5c3
commit 6ef4e86f41
4 changed files with 132 additions and 20 deletions

View File

@ -1,12 +1,77 @@
import pandas as pd import pandas as pd
import numpy as np
import random
from functools import reduce
from speech_pitch import *
# %matplotlib inline
def fix_csv(collection_name = 'test'): def fix_csv(collection_name = 'test'):
seg_data = pd.read_csv('./outputs/'+collection_name+'.csv',names=['phrase','filename' seg_data = pd.read_csv('./outputs/'+collection_name+'.csv',names=['phrase','filename'
,'start_phoneme','end_phoneme','start_time','end_time']) ,'start_phoneme','end_phoneme','start_time','end_time'])
seg_data.to_csv('./outputs/'+collection_name+'.fixed.csv') seg_data.to_csv('./outputs/'+collection_name+'.fixed.csv')
def pick_random_phrases(collection_name='test'):
def segment_data_gen(collection_name = 'test'):
collection_name = 'test' collection_name = 'test'
seg_data = pd.read_csv('./outputs/'+collection_name+'.fixed.csv',index_col=0) seg_data = pd.read_csv('./outputs/'+collection_name+'.fixed.csv',index_col=0)
phrase_groups = random.sample([i for i in seg_data.groupby(['phrase'])],10)
result = []
for ph,g in phrase_groups:
result.append(ph)
pd.DataFrame(result,columns=['phrase']).to_csv('./outputs/'+collection_name+'.random.csv')
# pick_random_phrases()
def plot_random_phrases(collection_name = 'test'):
collection_name = 'test'
rand_words = pd.read_csv('./outputs/'+collection_name+'.random.csv',index_col=0)
rand_w_list = rand_words['phrase'].tolist()
seg_data = pd.read_csv('./outputs/'+collection_name+'.fixed.csv',index_col=0)
result = (seg_data['phrase'] == rand_w_list[0])
for i in rand_w_list[1:]:
result |= (seg_data['phrase'] == i)
# seg_data[result]
phrase_groups = [i for i in seg_data[result].groupby(['phrase'])]
self_files = ['a_wrong_turn-low1.aiff','great_pin-low1.aiff'
,'he_set_off_at_once_to_find_the_beast-low1.aiff'
,'hound-low1.aiff','noises-low1.aiff','po_burped-low1.aiff'
,'she_loves_the_roses-low1.aiff','the_busy_spider-low1.aiff'
,'the_rain_helped-low1.aiff','to_go_to_the_doctor-low1.aiff']
co_files = map(lambda x: './inputs/self/'+x,self_files)
for ((ph,g),s_f) in zip(phrase_groups,co_files):
# ph,g = phrase_groups[0]
file_path = './outputs/test/'+g.iloc[0]['filename']
phrase_sample = pm_snd(file_path)
self_sample = pm_snd(s_f)
player,closer = play_sound()
# rows = [i for i in g.iterrows()]
# random.shuffle(rows)
print(ph)
phon_stops = []
for (i,phon) in g.iterrows():
end_t = phon['end_time']/1000
phon_ch = phon['start_phoneme']
phon_stops.append((end_t,phon_ch))
plot_sample_pitch(phrase_sample,phons = phon_stops)
plot_sample_pitch(self_sample)
# player(phrase_sample)
# input()
# for (i,phon) in g.iterrows():
# # phon = g.iloc[1]
# start_t = phon['start_time']/1000
# end_t = phon['end_time']/1000
# phon_ch = phon['start_phoneme']
# phon_sample = phrase_sample.extract_part(from_time=start_t,to_time=end_t)
# if phon_sample.n_samples*phon_sample.sampling_period < 6.4/100:
# continue
# # if phon_ch[0] not in 'AEIOU':
# # continue
# # phon_sample
# # player(phon_sample)
# # plot_sample_intensity(phon_sample)
# print(phon_ch)
# plot_sample_pitch(phon_sample)
closer()
# print(phg)#['start_phoneme'],g['start_time'])
plot_random_phrases()

View File

@ -22,18 +22,30 @@ def accuracy(y_true, y_pred):
''' '''
return K.mean(K.equal(y_true, K.cast(y_pred > 0.5, y_true.dtype))) return K.mean(K.equal(y_true, K.cast(y_pred > 0.5, y_true.dtype)))
def ctc_lambda_func(args):
y_pred, labels, input_length, label_length = args
# the 2 is critical here since the first couple outputs of the RNN
# tend to be garbage:
y_pred = y_pred[:, 2:, :]
return K.ctc_batch_cost(labels, y_pred, input_length, label_length)
def segment_model(input_dim): def segment_model(input_dim):
input_dim = (100,100,1)
inp = Input(shape=input_dim) inp = Input(shape=input_dim)
# ls0 = LSTM(512, return_sequences=True)(inp) cnv1 = Conv2D(filters=32, kernel_size=(5,9))(inp)
cnv1 = Conv2D(filters=512, kernel_size=(5,9))(inp)
cnv2 = Conv2D(filters=1, kernel_size=(5,9))(cnv1) cnv2 = Conv2D(filters=1, kernel_size=(5,9))(cnv1)
dr_cnv2 = Dropout(rate=0.95)(cnv2) dr_cnv2 = Dropout(rate=0.95)(cnv2)
# dr_cnv2
cn_rnn_dim = (dr_cnv2.shape[1].value,dr_cnv2.shape[2].value) cn_rnn_dim = (dr_cnv2.shape[1].value,dr_cnv2.shape[2].value)
r_dr_cnv2 = Reshape(target_shape=cn_rnn_dim)(dr_cnv2) r_dr_cnv2 = Reshape(target_shape=cn_rnn_dim)(dr_cnv2)
b_gr1 = Bidirectional(GRU(512, return_sequences=True))(r_dr_cnv2) b_gr1 = Bidirectional(GRU(512, return_sequences=True),merge_mode='sum')(r_dr_cnv2)
b_gr2 = Bidirectional(GRU(512, return_sequences=True))(b_gr1) # b_gr1
b_gr3 = Bidirectional(GRU(512))(b_gr2) b_gr2 = Bidirectional(GRU(512, return_sequences=True),merge_mode='sum')(b_gr1)
return Model(inp, b_gr3) b_gr3 = Bidirectional(GRU(512, return_sequences=True),merge_mode='sum')(b_gr2)
# b_gr3
oup = Dense(2, activation='softmax')(b_gr3)
# oup
return Model(inp, oup)
def write_model_arch(mod,mod_file): def write_model_arch(mod,mod_file):
model_f = open(mod_file,'w') model_f = open(mod_file,'w')
@ -58,7 +70,7 @@ def train_segment(collection_name = 'test'):
model = segment_model(input_dim) model = segment_model(input_dim)
plot_model(model,show_shapes=True, to_file=model_dir+'/model.png') plot_model(model,show_shapes=True, to_file=model_dir+'/model.png')
# loss_out = Lambda(ctc_lambda_func, output_shape=(1,), name='ctc')([y_pred, labels, input_length, label_length])
tb_cb = TensorBoard( tb_cb = TensorBoard(
log_dir=log_dir, log_dir=log_dir,
histogram_freq=1, histogram_freq=1,
@ -100,5 +112,4 @@ def train_segment(collection_name = 'test'):
if __name__ == '__main__': if __name__ == '__main__':
import pdb; pdb.set_trace()
train_segment('test') train_segment('test')

View File

@ -3,6 +3,7 @@ from pysndfile import sndio as snd
import numpy as np import numpy as np
import matplotlib.pyplot as plt import matplotlib.pyplot as plt
import seaborn as sns import seaborn as sns
import pyaudio as pa
sns.set() # Use seaborn's default style to make graphs more pretty sns.set() # Use seaborn's default style to make graphs more pretty
@ -84,8 +85,11 @@ def plot_sample_raw(sample_file='outputs/audio/sunflowers-Victoria-180-normal-87
plt.ylabel("amplitude") plt.ylabel("amplitude")
plt.show() plt.show()
def plot_sample_intensity(sample_file='outputs/audio/sunflowers-Victoria-180-normal-870.aiff'): def plot_file_intensity(sample_file='outputs/audio/sunflowers-Victoria-180-normal-870.aiff'):
snd_d = pm_snd(sample_file) snd_d = pm_snd(sample_file)
plot_sample_intensity(snd_d)
def plot_sample_intensity(snd_d):
intensity = snd_d.to_intensity() intensity = snd_d.to_intensity()
spectrogram = snd_d.to_spectrogram() spectrogram = snd_d.to_spectrogram()
plt.figure() plt.figure()
@ -95,17 +99,41 @@ def plot_sample_intensity(sample_file='outputs/audio/sunflowers-Victoria-180-nor
plt.xlim([snd_d.xmin, snd_d.xmax]) plt.xlim([snd_d.xmin, snd_d.xmax])
plt.show() plt.show()
def plot_sample_pitch(sample_file='outputs/audio/sunflowers-Victoria-180-normal-870.aiff'): def plot_file_pitch(sample_file='outputs/audio/sunflowers-Victoria-180-normal-870.aiff'):
snd_d = pm_snd(sample_file) snd_d = pm_snd(sample_file)
plot_sample_pitch(snd_d)
def plot_sample_pitch(snd_d,phons = []):
pitch = snd_d.to_pitch() pitch = snd_d.to_pitch()
spectrogram = snd_d.to_spectrogram(window_length=0.03, maximum_frequency=8000) spectrogram = snd_d.to_spectrogram(window_length=0.03, maximum_frequency=8000)
plt.figure() plt.figure()
draw_spectrogram(spectrogram) draw_spectrogram(spectrogram)
plt.twinx() plt.twinx()
draw_pitch(pitch) draw_pitch(pitch)
for (p,c) in phons:
plt.axvline(x=p)
plt.text(p,-1,c)
plt.xlim([snd_d.xmin, snd_d.xmax]) plt.xlim([snd_d.xmin, snd_d.xmax])
plt.show() plt.show()
def play_sound(samplerate=22050):
#snd_sample = pm_snd('outputs/test/a_warm_smile_and_a_good_heart-1917.aiff')
p_oup = pa.PyAudio()
stream = p_oup.open(
format=pa.paFloat32,
channels=2,
rate=samplerate,
output=True)
def sample_player(snd_sample=None):
samples = snd_sample.as_array()[:,0]
one_channel = np.asarray([samples, samples]).T.reshape(-1)
audio_data = one_channel.astype(np.float32).tobytes()
stream.write(audio_data)
def close_player():
stream.close()
p_oup.terminate()
return sample_player,close_player
# snd_part = snd_d.extract_part(from_time=0.9, preserve_times=True) # snd_part = snd_d.extract_part(from_time=0.9, preserve_times=True)
# plt.figure() # plt.figure()
# plt.plot(snd_part.xs(), snd_part.values, linewidth=0.5) # plt.plot(snd_part.xs(), snd_part.values, linewidth=0.5)
@ -116,8 +144,16 @@ def plot_sample_pitch(sample_file='outputs/audio/sunflowers-Victoria-180-normal-
if __name__ == '__main__': if __name__ == '__main__':
mom_snd = pm_snd('outputs/test/moms_are_engineers-7608.aiff') # mom_snd = pm_snd('outputs/test/moms_are_engineers-7608.aiff')
plot_sample_pitch('outputs/audio/sunflowers-Victoria-180-normal-870.aiff') plot_file_pitch('outputs/audio/sunflowers-Victoria-180-normal-870.aiff')
plot_sample_pitch('inputs/self-apple/apple-low1.aiff') plot_file_pitch('outputs/test/a_warm_smile_and_a_good_heart-1917.aiff')
plot_sample_pitch('inputs/self-apple/apple-low2.aiff') play_sound(pm_snd('outputs/test/a_warm_smile_and_a_good_heart-1917.aiff'))
plot_sample_pitch('inputs/self-apple/apple-medium1.aiff') plot_file_pitch('outputs/test/a_wrong_turn-3763.aiff')
play_sound(pm_snd('outputs/test/a_wrong_turn-3763.aiff'))
plot_file_pitch('inputs/self/a_wrong_turn-low1.aiff')
play_sound(pm_snd('inputs/self/a_wrong_turn-low1.aiff'))
plot_file_pitch('inputs/self/a_wrong_turn-low2.aiff')
play_sound(pm_snd('inputs/self/a_wrong_turn-low2.aiff'))
plot_file_pitch('inputs/self/apple-low1.aiff')
plot_file_pitch('inputs/self/apple-low2.aiff')
plot_file_pitch('inputs/self/apple-medium1.aiff')

View File

@ -20,7 +20,7 @@ apple_phonemes = [
'l', 'm', 'n', 'N', 'p', 'r', 's', 'S', 't', 'T', 'v', 'w', 'y', 'z', 'Z' 'l', 'm', 'n', 'N', 'p', 'r', 's', 'S', 't', 'T', 'v', 'w', 'y', 'z', 'Z'
] ]
OUTPUT_NAME = 'test' OUTPUT_NAME = 'story_phrases_segments'
dest_dir = os.path.abspath('.') + '/outputs/' + OUTPUT_NAME + '/' dest_dir = os.path.abspath('.') + '/outputs/' + OUTPUT_NAME + '/'
csv_dest_file = os.path.abspath('.') + '/outputs/' + OUTPUT_NAME + '.csv' csv_dest_file = os.path.abspath('.') + '/outputs/' + OUTPUT_NAME + '.csv'
@ -184,7 +184,7 @@ def story_texts():
def generate_audio(): def generate_audio():
synthQ = SynthesizerQueue() synthQ = SynthesizerQueue()
phrases = random.sample(story_texts(), 100) # story_texts() phrases = story_texts()#random.sample(story_texts(), 100) #
f = open(csv_dest_file, 'w') f = open(csv_dest_file, 'w')
s_csv_w = csv.writer(f, quoting=csv.QUOTE_MINIMAL) s_csv_w = csv.writer(f, quoting=csv.QUOTE_MINIMAL)
i = 0 i = 0