diff --git a/speech_data.py b/speech_data.py index 2659eb7..c67b015 100644 --- a/speech_data.py +++ b/speech_data.py @@ -110,67 +110,6 @@ def padd_zeros(spgr, max_samples): return np.lib.pad(spgr, [(0, max_samples - spgr.shape[0]), (0, 0)], 'constant') -def find_max_n(trf): - max_n,n_records = 0,0 - max_n_it = tf.python_io.tf_record_iterator(path=trf) - for string_record in max_n_it: - example = tf.train.Example() - example.ParseFromString(string_record) - spec_n1 = example.features.feature['spec_n1'].int64_list.value[0] - spec_n2 = example.features.feature['spec_n2'].int64_list.value[0] - max_n = max([max_n,spec_n1,spec_n2]) - n_records+=1 - return (max_n,n_records) - -def padd_zeros_siamese_tfrecords(audio_group='audio'): - records_file = os.path.join('./outputs',audio_group+'.tfrecords') - record_iterator = tf.python_io.tf_record_iterator(path=records_file) - print('finding max_n...') - max_n,n_records = find_max_n(records_file) - p_spec1 = None - print('reading tfrecords...') - writer = tf.python_io.TFRecordWriter('./outputs/' + audio_group + '_padded.tfrecords') - for string_record in tqdm(record_iterator,desc='padding siamese record',total=n_records): - example = tf.train.Example() - example.ParseFromString(string_record) - spec_n1 = example.features.feature['spec_n1'].int64_list.value[0] - spec_n2 = example.features.feature['spec_n2'].int64_list.value[0] - spec_w1 = example.features.feature['spec_w1'].int64_list.value[0] - spec_w2 = example.features.feature['spec_w2'].int64_list.value[0] - spec1 = np.array(example.features.feature['spec1'].float_list.value).reshape(spec_n1,spec_w1) - spec2 = np.array(example.features.feature['spec2'].float_list.value).reshape(spec_n2,spec_w2) - p_spec1,p_spec2 = padd_zeros(spec1,max_n),padd_zeros(spec2,max_n) - output = example.features.feature['output'].int64_list.value - w_example = tf.train.Example(features=tf.train.Features( - feature={ - 'spec1':_float_feature(p_spec1.reshape(-1)), - 'spec2':_float_feature(p_spec2.reshape(-1)), - 'output':_int64_feature(output) - } - )) - writer.write(w_example.SerializeToString()) - const_file = os.path.join('./outputs',audio_group+'.constants') - pickle.dump((max_n,p_spec1.shape[1],n_records),open(const_file,'wb')) - writer.close() - -def pickle_constants(audio_group='audio'): - records_file = os.path.join('./outputs',audio_group+'_padded.tfrecords') - record_iterator = tf.python_io.tf_record_iterator(path=records_file) - print('finding max_n...') - max_n,n_records = find_max_n(records_file) - spec1 = 0 - print('finding spec_w1...') - record_iterator = tf.python_io.tf_record_iterator(path=records_file) - for string_record in record_iterator: - example = tf.train.Example() - example.ParseFromString(string_record) - spec1 = len(example.features.feature['spec1'].float_list.value)//max_n - print('found spec_w1...') - break - const_file = os.path.join('./outputs',audio_group+'.constants') - print(max_n,spec1,n_records) - pickle.dump((max_n,spec1,n_records),open(const_file,'wb')) - def reservoir_sample(iterable, k): it = iter(iterable) if not (k > 0): @@ -185,36 +124,6 @@ def reservoir_sample(iterable, k): sample[j] = item # replace item with gradually decreasing probability return sample -def read_siamese_tfrecords_oneshot(audio_group='audio',sample_size=3000): - records_file = os.path.join('./outputs',audio_group+'_padded.tfrecords') - record_iterator = tf.python_io.tf_record_iterator(path=records_file) - input_pairs = [] - output_class = [] - const_file = os.path.join('./outputs',audio_group+'.constants') - (n_spec,n_features,n_records) = pickle.load(open(const_file,'rb')) - print('reading tfrecords({})...'.format(audio_group)) - samples = min([sample_size,n_records]) - input_data = np.zeros((samples,2,n_spec,n_features)) - output_data = np.zeros((samples,2)) - random_samples = enumerate(reservoir_sample(record_iterator,samples)) - for (i,string_record) in tqdm(random_samples,total=samples): - example = tf.train.Example() - example.ParseFromString(string_record) - spec_n1 = example.features.feature['spec_n1'].int64_list.value[0] - spec_n2 = example.features.feature['spec_n2'].int64_list.value[0] - spec_w1 = example.features.feature['spec_w1'].int64_list.value[0] - spec_w2 = example.features.feature['spec_w2'].int64_list.value[0] - spec1 = np.array(example.features.feature['spec1'].float_list.value).reshape(spec_n1,spec_w1) - spec2 = np.array(example.features.feature['spec2'].float_list.value).reshape(spec_n2,spec_w2) - p_spec1,p_spec2 = padd_zeros(spec1,n_spec),padd_zeros(spec2,n_spec) - input_data[i] = np.asarray([spec1,spec2]) - output = example.features.feature['output'].int64_list.value - output_data[i] = np.asarray(output) - # print('converting to nparray...') - # tr_pairs,te_pairs,tr_y,te_y = train_test_split(input_data,output_data,test_size=0.1) - # result = (tr_pairs,te_pairs,tr_y,te_y,n_spec,n_features) - return input_data,output_data - def read_siamese_tfrecords_generator(audio_group='audio',batch_size=32,sample_size=100): records_file = os.path.join('./outputs',audio_group+'.train.tfrecords') input_pairs = [] @@ -273,120 +182,8 @@ def read_siamese_tfrecords_generator(audio_group='audio',batch_size=32,sample_si return record_generator,input_data,output_data,n_spec,n_features,n_records -def read_siamese_tfrecords_generator_old(audio_group='audio',batch_size=32): - records_file = os.path.join('./outputs',audio_group+'_padded.tfrecords') - input_pairs = [] - output_class = [] - const_file = os.path.join('./outputs',audio_group+'.constants') - (n_spec,n_features,n_records) = pickle.load(open(const_file,'rb')) - print('reading tfrecords({})...'.format(audio_group)) - def record_generator(): - input_data = [] - output_data = [] - while True: - record_iterator = tf.python_io.tf_record_iterator(path=records_file) - for (i,string_record) in tqdm(enumerate(record_iterator),total=n_records): - example = tf.train.Example() - example.ParseFromString(string_record) - spec1 = np.array(example.features.feature['spec1'].float_list.value).reshape(n_spec,n_features) - spec2 = np.array(example.features.feature['spec2'].float_list.value).reshape(n_spec,n_features) - input_data.append(np.asarray([spec1,spec2])) - output = example.features.feature['output'].int64_list.value - output_data.append(np.asarray(output)) - if len(input_data) == batch_size: - input_arr = np.asarray(input_data) - output_arr = np.asarray(output_data) - yield ([input_arr[:, 0], input_arr[:, 1]],output_arr) - input_data = [] - output_data = [] - return record_generator,n_spec,n_features,n_records - -def read_siamese_tfrecords(audio_group='audio'): - audio_group='story_words_test' - - record_file = os.path.join('./outputs',audio_group+'_padded.tfrecords') - const_file = os.path.join('./outputs',audio_group+'.constants') - (n_spec,n_features) = pickle.load(open(const_file,'rb')) - - filename_queue = tf.train.string_input_producer([record_file]) - reader = tf.TFRecordReader() - _, serialized_example = reader.read(filename_queue) - features = tf.parse_single_example(serialized_example, - features={ - 'spec1': tf.FixedLenFeature([1,n_spec,n_features], tf.float32), - 'spec2': tf.FixedLenFeature([1,n_spec,n_features], tf.float32), - 'output':tf.FixedLenFeature([2], tf.int64) - }) - spec1 = features['spec1'] - spec1 = tf.cast(spec1, tf.float32) * (1. / 255) - spec2 = features['spec2'] - spec2 = tf.cast(spec2, tf.float32) * (1. / 255) - output = tf.cast(features['output'], tf.int32) - return spec1,spec2, output,n_spec,n_features - -def read_siamese_tfrecords_batch(audio_group='audio', batch_size=32): - audio_group='story_words_test' - record_file = os.path.join('./outputs',audio_group+'_padded.tfrecords') - """ Return tensor to read from TFRecord """ - print('Creating graph for loading {} ...'.format(record_file)) - const_file = os.path.join('./outputs',audio_group+'.constants') - (n_spec,n_features) = pickle.load(open(const_file,'rb')) - records_file = os.path.join('./outputs',audio_group+'.tfrecords') - record_iterator = tf.python_io.tf_record_iterator(path=records_file) - n_records = len([i for i in record_iterator]) - batch_shape=[batch_size, n_spec, n_features] - with tf.variable_scope("SiameseTFRecords"): - record_input = data_flow_ops.RecordInput(record_file, batch_size=batch_size) - records_op = record_input.get_yield_op() - records_op = tf.split(records_op, batch_shape[0], 0) - records_op = [tf.reshape(record, []) for record in records_op] - specs1, specs2 = [],[] - outputs = [] - for i, serialized_example in tqdm(enumerate(records_op)): - with tf.variable_scope("parse_siamese_pairs", reuse=True): - features = tf.parse_single_example( - serialized_example, - features={ - 'spec1': tf.FixedLenFeature([n_spec,n_features], tf.float32), - 'spec2': tf.FixedLenFeature([n_spec,n_features], tf.float32), - 'output':tf.FixedLenFeature([2], tf.int64) - }) - spec1 = features['spec1'] - spec1 = tf.cast(spec1, tf.float32) * (1. / 255) - spec2 = features['spec2'] - output = tf.cast(spec2, tf.float32) * (1. / 255) - output = tf.cast(features['output'], tf.float32) - specs1.append(spec1) - specs2.append(spec2) - outputs.append(output) - - specs1 = tf.parallel_stack(specs1, 0) - specs2 = tf.parallel_stack(specs2, 0) - outputs = tf.parallel_stack(outputs, 0) - specs1 = tf.cast(specs1, tf.float32) - specs2 = tf.cast(specs2, tf.float32) - - specs1 = tf.reshape(specs1, shape=batch_shape) - specs2 = tf.reshape(specs1, shape=batch_shape) - specs1_shape = specs1.get_shape() - specs2_shape = specs2.get_shape() - outputs_shape = outputs.get_shape() - copy_stage = data_flow_ops.StagingArea( - [tf.float32, tf.float32, tf.float32], - shapes=[specs1_shape, specs2_shape, outputs_shape]) - copy_stage_op = copy_stage.put( - [specs1, specs2, outputs]) - staged_specs1, staged_specs2, staged_outputs = copy_stage.get() - return specs1, spec2, outputs,n_spec,n_features,n_records - def audio_samples_word_count(audio_group='audio'): - audio_group = 'story_all' - audio_samples = pd.read_csv( './outputs/' + audio_group + '.csv' - , names=['word','phonemes', 'voice', 'language', 'rate', 'variant', 'file'] - , quoting=csv.QUOTE_NONE) - audio_samples['file_path'] = audio_samples.loc[:, 'file'].apply(lambda x: 'outputs/' + audio_group + '/' + x) - audio_samples['file_exists'] = apply_by_multiprocessing(audio_samples['file_path'], os.path.exists) - audio_samples = audio_samples[audio_samples['file_exists'] == True].reset_index() + audio_samples = pd.read_csv( './outputs/' + audio_group + '.csv') return len(audio_samples.groupby(audio_samples['word'])) def fix_csv(audio_group='audio'):