diff --git a/speech_data.py b/speech_data.py index c48950d..b254bee 100644 --- a/speech_data.py +++ b/speech_data.py @@ -1,7 +1,8 @@ import pandas as pd from pandas_parallel import apply_by_multiprocessing -import dask as dd -import dask.dataframe as ddf +# import dask as dd +# import dask.dataframe as ddf +import tensorflow as tf import numpy as np from spectro_gen import generate_aiff_spectrogram from sklearn.model_selection import train_test_split @@ -91,13 +92,14 @@ def create_spectrogram_data(audio_group='audio'): audio_samples.to_pickle('outputs/{}-spectrogram.pkl'.format(audio_group)) def create_spectrogram_tfrecords(audio_group='audio'): + # audio_group = 'story_words' audio_samples = pd.read_csv( './outputs/' + audio_group + '.csv' , names=['word','phonemes', 'voice', 'language', 'rate', 'variant', 'file'] , quoting=csv.QUOTE_NONE) # audio_samples = audio_samples.loc[audio_samples['word'] == # 'sunflowers'].reset_index(drop=True) - audio_samples['file_paths'] = audio_samples.loc[:, 'file'].apply(lambda x: 'outputs/' + audio_group + '/' + x) - audio_samples['file_exists'] = apply_by_multiprocessing(audio_samples['file_paths'], os.path.exists) + audio_samples['file_path'] = audio_samples.loc[:, 'file'].apply(lambda x: 'outputs/' + audio_group + '/' + x) + audio_samples['file_exists'] = apply_by_multiprocessing(audio_samples['file_path'], os.path.exists) audio_samples = audio_samples[audio_samples['file_exists'] == True].reset_index() # audio_samples['spectrogram'] = apply_by_multiprocessing(audio_samples['file_paths'],generate_aiff_spectrogram)#.apply( # audio_samples['window_count'] = audio_samples.loc[:,'spectrogram'].apply(lambda x: x.shape[0]) @@ -111,17 +113,29 @@ def create_spectrogram_tfrecords(audio_group='audio'): def _bytes_feature(value): return tf.train.Feature(bytes_list=tf.train.BytesList(value=value)) - writer = tf.python_io.TFRecordWriter(output_path) - for sample in audio_samples: + writer = tf.python_io.TFRecordWriter('./outputs/' + audio_group + '.tfrecords') + audio_samples = audio_samples[:100] + for (i,sample) in audio_samples.iterrows(): + spectrogram = generate_aiff_spectrogram(sample['file_path']) + spec_n = spectrogram.shape[0] + spec_w = spectrogram.shape[1] + spec = spectrogram.reshape(-1) example = tf.train.Example(features=tf.train.Features( feature={ - 'label': _int64_feature([label]), - 'path': _bytes_feature([image_path]), - 'instance' : _bytes_feature([instance_id]) + 'word': _bytes_feature([sample['word'].encode('utf-8')]), + 'phoneme': _bytes_feature([sample['phonemes'].encode('utf-8')]), + 'voice': _bytes_feature([sample['voice'].encode('utf-8')]), + 'language': _bytes_feature([sample['language'].encode('utf-8')]), + 'rate':_int64_feature([sample['rate']]), + 'variant': _bytes_feature([sample['variant'].encode('utf-8')]), + 'file': _bytes_feature([sample['file'].encode('utf-8')]), + 'spec':_float_feature(spec), + 'spec_n':_int64_feature([spec_n]), + 'spec_w':_int64_feature([spec_w]) } )) - writer.write(example.SerializeToString()) + writer.write(example.SerializeToString()) writer.close() def create_tagged_data(audio_samples): @@ -177,7 +191,8 @@ def speech_model_data(): if __name__ == '__main__': # sunflower_pairs_data() # create_spectrogram_data() - create_spectrogram_data('story_words') + # create_spectrogram_data('story_words') + create_spectrogram_tfrecords('story_words') # create_padded_spectrogram() # create_speech_pairs_data() # print(speech_model_data())