implemented segmentation visualization
parent
0b1152b5c3
commit
6ef4e86f41
|
|
@ -1,12 +1,77 @@
|
|||
import pandas as pd
|
||||
import numpy as np
|
||||
import random
|
||||
from functools import reduce
|
||||
from speech_pitch import *
|
||||
# %matplotlib inline
|
||||
|
||||
def fix_csv(collection_name = 'test'):
|
||||
seg_data = pd.read_csv('./outputs/'+collection_name+'.csv',names=['phrase','filename'
|
||||
,'start_phoneme','end_phoneme','start_time','end_time'])
|
||||
seg_data.to_csv('./outputs/'+collection_name+'.fixed.csv')
|
||||
|
||||
|
||||
def segment_data_gen(collection_name = 'test'):
|
||||
def pick_random_phrases(collection_name='test'):
|
||||
collection_name = 'test'
|
||||
seg_data = pd.read_csv('./outputs/'+collection_name+'.fixed.csv',index_col=0)
|
||||
|
||||
phrase_groups = random.sample([i for i in seg_data.groupby(['phrase'])],10)
|
||||
result = []
|
||||
for ph,g in phrase_groups:
|
||||
result.append(ph)
|
||||
pd.DataFrame(result,columns=['phrase']).to_csv('./outputs/'+collection_name+'.random.csv')
|
||||
|
||||
# pick_random_phrases()
|
||||
|
||||
def plot_random_phrases(collection_name = 'test'):
|
||||
collection_name = 'test'
|
||||
rand_words = pd.read_csv('./outputs/'+collection_name+'.random.csv',index_col=0)
|
||||
rand_w_list = rand_words['phrase'].tolist()
|
||||
seg_data = pd.read_csv('./outputs/'+collection_name+'.fixed.csv',index_col=0)
|
||||
result = (seg_data['phrase'] == rand_w_list[0])
|
||||
for i in rand_w_list[1:]:
|
||||
result |= (seg_data['phrase'] == i)
|
||||
# seg_data[result]
|
||||
phrase_groups = [i for i in seg_data[result].groupby(['phrase'])]
|
||||
self_files = ['a_wrong_turn-low1.aiff','great_pin-low1.aiff'
|
||||
,'he_set_off_at_once_to_find_the_beast-low1.aiff'
|
||||
,'hound-low1.aiff','noises-low1.aiff','po_burped-low1.aiff'
|
||||
,'she_loves_the_roses-low1.aiff','the_busy_spider-low1.aiff'
|
||||
,'the_rain_helped-low1.aiff','to_go_to_the_doctor-low1.aiff']
|
||||
co_files = map(lambda x: './inputs/self/'+x,self_files)
|
||||
|
||||
for ((ph,g),s_f) in zip(phrase_groups,co_files):
|
||||
# ph,g = phrase_groups[0]
|
||||
file_path = './outputs/test/'+g.iloc[0]['filename']
|
||||
phrase_sample = pm_snd(file_path)
|
||||
self_sample = pm_snd(s_f)
|
||||
player,closer = play_sound()
|
||||
# rows = [i for i in g.iterrows()]
|
||||
# random.shuffle(rows)
|
||||
print(ph)
|
||||
phon_stops = []
|
||||
for (i,phon) in g.iterrows():
|
||||
end_t = phon['end_time']/1000
|
||||
phon_ch = phon['start_phoneme']
|
||||
phon_stops.append((end_t,phon_ch))
|
||||
plot_sample_pitch(phrase_sample,phons = phon_stops)
|
||||
plot_sample_pitch(self_sample)
|
||||
# player(phrase_sample)
|
||||
# input()
|
||||
# for (i,phon) in g.iterrows():
|
||||
# # phon = g.iloc[1]
|
||||
# start_t = phon['start_time']/1000
|
||||
# end_t = phon['end_time']/1000
|
||||
# phon_ch = phon['start_phoneme']
|
||||
# phon_sample = phrase_sample.extract_part(from_time=start_t,to_time=end_t)
|
||||
# if phon_sample.n_samples*phon_sample.sampling_period < 6.4/100:
|
||||
# continue
|
||||
# # if phon_ch[0] not in 'AEIOU':
|
||||
# # continue
|
||||
# # phon_sample
|
||||
# # player(phon_sample)
|
||||
# # plot_sample_intensity(phon_sample)
|
||||
# print(phon_ch)
|
||||
# plot_sample_pitch(phon_sample)
|
||||
closer()
|
||||
# print(phg)#['start_phoneme'],g['start_time'])
|
||||
|
||||
plot_random_phrases()
|
||||
|
|
|
|||
|
|
@ -22,18 +22,30 @@ def accuracy(y_true, y_pred):
|
|||
'''
|
||||
return K.mean(K.equal(y_true, K.cast(y_pred > 0.5, y_true.dtype)))
|
||||
|
||||
def ctc_lambda_func(args):
|
||||
y_pred, labels, input_length, label_length = args
|
||||
# the 2 is critical here since the first couple outputs of the RNN
|
||||
# tend to be garbage:
|
||||
y_pred = y_pred[:, 2:, :]
|
||||
return K.ctc_batch_cost(labels, y_pred, input_length, label_length)
|
||||
|
||||
def segment_model(input_dim):
|
||||
input_dim = (100,100,1)
|
||||
inp = Input(shape=input_dim)
|
||||
# ls0 = LSTM(512, return_sequences=True)(inp)
|
||||
cnv1 = Conv2D(filters=512, kernel_size=(5,9))(inp)
|
||||
cnv1 = Conv2D(filters=32, kernel_size=(5,9))(inp)
|
||||
cnv2 = Conv2D(filters=1, kernel_size=(5,9))(cnv1)
|
||||
dr_cnv2 = Dropout(rate=0.95)(cnv2)
|
||||
# dr_cnv2
|
||||
cn_rnn_dim = (dr_cnv2.shape[1].value,dr_cnv2.shape[2].value)
|
||||
r_dr_cnv2 = Reshape(target_shape=cn_rnn_dim)(dr_cnv2)
|
||||
b_gr1 = Bidirectional(GRU(512, return_sequences=True))(r_dr_cnv2)
|
||||
b_gr2 = Bidirectional(GRU(512, return_sequences=True))(b_gr1)
|
||||
b_gr3 = Bidirectional(GRU(512))(b_gr2)
|
||||
return Model(inp, b_gr3)
|
||||
b_gr1 = Bidirectional(GRU(512, return_sequences=True),merge_mode='sum')(r_dr_cnv2)
|
||||
# b_gr1
|
||||
b_gr2 = Bidirectional(GRU(512, return_sequences=True),merge_mode='sum')(b_gr1)
|
||||
b_gr3 = Bidirectional(GRU(512, return_sequences=True),merge_mode='sum')(b_gr2)
|
||||
# b_gr3
|
||||
oup = Dense(2, activation='softmax')(b_gr3)
|
||||
# oup
|
||||
return Model(inp, oup)
|
||||
|
||||
def write_model_arch(mod,mod_file):
|
||||
model_f = open(mod_file,'w')
|
||||
|
|
@ -58,7 +70,7 @@ def train_segment(collection_name = 'test'):
|
|||
|
||||
model = segment_model(input_dim)
|
||||
plot_model(model,show_shapes=True, to_file=model_dir+'/model.png')
|
||||
|
||||
# loss_out = Lambda(ctc_lambda_func, output_shape=(1,), name='ctc')([y_pred, labels, input_length, label_length])
|
||||
tb_cb = TensorBoard(
|
||||
log_dir=log_dir,
|
||||
histogram_freq=1,
|
||||
|
|
@ -100,5 +112,4 @@ def train_segment(collection_name = 'test'):
|
|||
|
||||
|
||||
if __name__ == '__main__':
|
||||
import pdb; pdb.set_trace()
|
||||
train_segment('test')
|
||||
|
|
|
|||
|
|
@ -3,6 +3,7 @@ from pysndfile import sndio as snd
|
|||
import numpy as np
|
||||
import matplotlib.pyplot as plt
|
||||
import seaborn as sns
|
||||
import pyaudio as pa
|
||||
sns.set() # Use seaborn's default style to make graphs more pretty
|
||||
|
||||
|
||||
|
|
@ -84,8 +85,11 @@ def plot_sample_raw(sample_file='outputs/audio/sunflowers-Victoria-180-normal-87
|
|||
plt.ylabel("amplitude")
|
||||
plt.show()
|
||||
|
||||
def plot_sample_intensity(sample_file='outputs/audio/sunflowers-Victoria-180-normal-870.aiff'):
|
||||
def plot_file_intensity(sample_file='outputs/audio/sunflowers-Victoria-180-normal-870.aiff'):
|
||||
snd_d = pm_snd(sample_file)
|
||||
plot_sample_intensity(snd_d)
|
||||
|
||||
def plot_sample_intensity(snd_d):
|
||||
intensity = snd_d.to_intensity()
|
||||
spectrogram = snd_d.to_spectrogram()
|
||||
plt.figure()
|
||||
|
|
@ -95,17 +99,41 @@ def plot_sample_intensity(sample_file='outputs/audio/sunflowers-Victoria-180-nor
|
|||
plt.xlim([snd_d.xmin, snd_d.xmax])
|
||||
plt.show()
|
||||
|
||||
def plot_sample_pitch(sample_file='outputs/audio/sunflowers-Victoria-180-normal-870.aiff'):
|
||||
def plot_file_pitch(sample_file='outputs/audio/sunflowers-Victoria-180-normal-870.aiff'):
|
||||
snd_d = pm_snd(sample_file)
|
||||
plot_sample_pitch(snd_d)
|
||||
|
||||
def plot_sample_pitch(snd_d,phons = []):
|
||||
pitch = snd_d.to_pitch()
|
||||
spectrogram = snd_d.to_spectrogram(window_length=0.03, maximum_frequency=8000)
|
||||
plt.figure()
|
||||
draw_spectrogram(spectrogram)
|
||||
plt.twinx()
|
||||
draw_pitch(pitch)
|
||||
for (p,c) in phons:
|
||||
plt.axvline(x=p)
|
||||
plt.text(p,-1,c)
|
||||
plt.xlim([snd_d.xmin, snd_d.xmax])
|
||||
plt.show()
|
||||
|
||||
def play_sound(samplerate=22050):
|
||||
#snd_sample = pm_snd('outputs/test/a_warm_smile_and_a_good_heart-1917.aiff')
|
||||
p_oup = pa.PyAudio()
|
||||
stream = p_oup.open(
|
||||
format=pa.paFloat32,
|
||||
channels=2,
|
||||
rate=samplerate,
|
||||
output=True)
|
||||
def sample_player(snd_sample=None):
|
||||
samples = snd_sample.as_array()[:,0]
|
||||
|
||||
one_channel = np.asarray([samples, samples]).T.reshape(-1)
|
||||
audio_data = one_channel.astype(np.float32).tobytes()
|
||||
stream.write(audio_data)
|
||||
def close_player():
|
||||
stream.close()
|
||||
p_oup.terminate()
|
||||
return sample_player,close_player
|
||||
# snd_part = snd_d.extract_part(from_time=0.9, preserve_times=True)
|
||||
# plt.figure()
|
||||
# plt.plot(snd_part.xs(), snd_part.values, linewidth=0.5)
|
||||
|
|
@ -116,8 +144,16 @@ def plot_sample_pitch(sample_file='outputs/audio/sunflowers-Victoria-180-normal-
|
|||
|
||||
|
||||
if __name__ == '__main__':
|
||||
mom_snd = pm_snd('outputs/test/moms_are_engineers-7608.aiff')
|
||||
plot_sample_pitch('outputs/audio/sunflowers-Victoria-180-normal-870.aiff')
|
||||
plot_sample_pitch('inputs/self-apple/apple-low1.aiff')
|
||||
plot_sample_pitch('inputs/self-apple/apple-low2.aiff')
|
||||
plot_sample_pitch('inputs/self-apple/apple-medium1.aiff')
|
||||
# mom_snd = pm_snd('outputs/test/moms_are_engineers-7608.aiff')
|
||||
plot_file_pitch('outputs/audio/sunflowers-Victoria-180-normal-870.aiff')
|
||||
plot_file_pitch('outputs/test/a_warm_smile_and_a_good_heart-1917.aiff')
|
||||
play_sound(pm_snd('outputs/test/a_warm_smile_and_a_good_heart-1917.aiff'))
|
||||
plot_file_pitch('outputs/test/a_wrong_turn-3763.aiff')
|
||||
play_sound(pm_snd('outputs/test/a_wrong_turn-3763.aiff'))
|
||||
plot_file_pitch('inputs/self/a_wrong_turn-low1.aiff')
|
||||
play_sound(pm_snd('inputs/self/a_wrong_turn-low1.aiff'))
|
||||
plot_file_pitch('inputs/self/a_wrong_turn-low2.aiff')
|
||||
play_sound(pm_snd('inputs/self/a_wrong_turn-low2.aiff'))
|
||||
plot_file_pitch('inputs/self/apple-low1.aiff')
|
||||
plot_file_pitch('inputs/self/apple-low2.aiff')
|
||||
plot_file_pitch('inputs/self/apple-medium1.aiff')
|
||||
|
|
|
|||
|
|
@ -20,7 +20,7 @@ apple_phonemes = [
|
|||
'l', 'm', 'n', 'N', 'p', 'r', 's', 'S', 't', 'T', 'v', 'w', 'y', 'z', 'Z'
|
||||
]
|
||||
|
||||
OUTPUT_NAME = 'test'
|
||||
OUTPUT_NAME = 'story_phrases_segments'
|
||||
|
||||
dest_dir = os.path.abspath('.') + '/outputs/' + OUTPUT_NAME + '/'
|
||||
csv_dest_file = os.path.abspath('.') + '/outputs/' + OUTPUT_NAME + '.csv'
|
||||
|
|
@ -184,7 +184,7 @@ def story_texts():
|
|||
|
||||
def generate_audio():
|
||||
synthQ = SynthesizerQueue()
|
||||
phrases = random.sample(story_texts(), 100) # story_texts()
|
||||
phrases = story_texts()#random.sample(story_texts(), 100) #
|
||||
f = open(csv_dest_file, 'w')
|
||||
s_csv_w = csv.writer(f, quoting=csv.QUOTE_MINIMAL)
|
||||
i = 0
|
||||
|
|
|
|||
Loading…
Reference in New Issue