diff --git a/speech_data.py b/speech_data.py index 23d741f..c7356b6 100644 --- a/speech_data.py +++ b/speech_data.py @@ -83,11 +83,10 @@ def create_spectrogram_data(audio_group='audio'): , quoting=csv.QUOTE_NONE) # audio_samples = audio_samples.loc[audio_samples['word'] == # 'sunflowers'].reset_index(drop=True) - file_names = audio_samples.loc[:, 'file'].apply(lambda x: 'outputs/' + audio_group + '/' + x) - audio_samples['file_exists'] = apply_by_multiprocessing(file_names,os.path.exists) - audio_samples = audio_samples[audio_samples['file_exists'] == False] - audio_samples['spectrogram'] = apply_by_multiprocessing(file_names,generate_aiff_spectrogram)#.apply( - #generate_aiff_spectrogram) + audio_samples['file_paths'] = audio_samples.loc[:, 'file'].apply(lambda x: 'outputs/' + audio_group + '/' + x) + audio_samples['file_exists'] = apply_by_multiprocessing(audio_samples['file_paths'], os.path.exists) + audio_samples = audio_samples[audio_samples['file_exists'] == True].reset_index() + audio_samples['spectrogram'] = apply_by_multiprocessing(audio_samples['file_paths'],generate_aiff_spectrogram)#.apply( audio_samples['window_count'] = audio_samples.loc[:,'spectrogram'].apply(lambda x: x.shape[0]) audio_samples.to_pickle('outputs/{}-spectrogram.pkl'.format(audio_group))