import os import math import string import threading import itertools import random import multiprocessing import subprocess import pandas as pd import numpy as np import pyaudio from pysndfile import sndio as snd # from matplotlib import pyplot as plt from speech_spectrum import plot_stft, generate_spec_frec,generate_aiff_spectrogram SAMPLE_RATE = 22050 N_CHANNELS = 2 devnull = open(os.devnull, 'w') def step_count(n_records,batch_size): return int(math.ceil(n_records*1.0/batch_size)) def file_player(): p_oup = pyaudio.PyAudio() def play_file(audiopath,plot=False): print('playing',audiopath) samples, samplerate, form = snd.read(audiopath) stream = p_oup.open( format=pyaudio.paFloat32, channels=2, rate=samplerate, output=True) one_channel = np.asarray([samples, samples]).T.reshape(-1) audio_data = one_channel.astype(np.float32).tobytes() stream.write(audio_data) stream.close() if plot: plot_stft(samples, SAMPLE_RATE) def close_player(): p_oup.terminate() return play_file,close_player def reservoir_sample(iterable, k): it = iter(iterable) if not (k > 0): raise ValueError("sample size must be positive") sample = list(itertools.islice(it, k)) # fill the reservoir random.shuffle(sample) # if number of items less then *k* then # return all items in random order. for i, item in enumerate(it, start=k+1): j = random.randrange(i) # random [0..i) if j < k: sample[j] = item # replace item with gradually decreasing probability return sample def padd_zeros(spgr, max_samples): return np.lib.pad(spgr, [(0, max_samples - spgr.shape[0]), (0, 0)], 'constant') def read_seg_file(aiff_name): base_name = aiff_name.rsplit('.aiff',1)[0] seg_file = base_name+'-palign.csv' seg_data = pd.read_csv(seg_file,names=['action','start','end','phoneme']) seg_data = seg_data[(seg_data['action'] == 'PhonAlign') & (seg_data['phoneme'] != '#')] return seg_data def record_spectrogram(n_sec, plot=False, playback=False): # show_record_prompt() N_SEC = n_sec CHUNKSIZE = int(SAMPLE_RATE * N_SEC / N_CHANNELS) # fixed chunk size input('Press [Enter] to start recording sample... ') p_inp = pyaudio.PyAudio() stream = p_inp.open( format=pyaudio.paFloat32, channels=N_CHANNELS, rate=SAMPLE_RATE, input=True, frames_per_buffer=CHUNKSIZE) data = stream.read(CHUNKSIZE) numpydata = np.frombuffer(data, dtype=np.float32) multi_channel = np.abs(np.reshape(numpydata, (-1, 2))).mean(axis=1) one_channel = np.asarray([multi_channel, -1 * multi_channel]).T.reshape(-1) mean_channel_data = one_channel.tobytes() stream.stop_stream() stream.close() p_inp.terminate() if plot: plot_stft(one_channel, SAMPLE_RATE) if playback: p_oup = pyaudio.PyAudio() stream = p_oup.open( format=pyaudio.paFloat32, channels=2, rate=SAMPLE_RATE, output=True) stream.write(mean_channel_data) stream.close() p_oup.terminate() ims, _ = generate_spec_frec(one_channel, SAMPLE_RATE) return ims def pair_for_word(phrase='able'): spec1 = generate_aiff_spectrogram('./inputs/pairs/good/'+phrase+'.aiff') spec2 = generate_aiff_spectrogram('./inputs/pairs/test/'+phrase+'.aiff') return spec1,spec2 def transribe_audio_text(aiff_name,phrase): base_name = aiff_name.rsplit('.aiff',1)[0] wav_name = base_name+'.wav' txt_name = base_name+'.txt' params = ['ffmpeg', '-y', '-i',aiff_name,wav_name] subprocess.call(params,stdout=devnull,stderr=devnull) trcr_f = open(txt_name,'w') trcr_f.write(phrase) trcr_f.close() def _apply_df(args): df, func, num, kwargs = args return num, df.apply(func, **kwargs) def apply_by_multiprocessing(df,func,**kwargs): cores = multiprocessing.cpu_count() workers=kwargs.pop('workers') if 'workers' in kwargs else cores pool = multiprocessing.Pool(processes=workers) result = pool.map(_apply_df, [(d, func, i, kwargs) for i,d in enumerate(np.array_split(df, workers))]) pool.close() result=sorted(result,key=lambda x:x[0]) return pd.concat([i[1] for i in result]) def square(x): return x**x # if __name__ == '__main__': # df = pd.DataFrame({'a':range(10), 'b':range(10)}) # apply_by_multiprocessing(df, square, axis=1, workers=4) def hms_string(sec_elapsed): h = int(sec_elapsed / (60 * 60)) m = int((sec_elapsed % (60 * 60)) / 60) s = sec_elapsed % 60. return "{}:{:>02}:{:>05.2f}".format(h, m, s) def rm_rf(d): for path in (os.path.join(d,f) for f in os.listdir(d)): if os.path.isdir(path): rm_rf(path) else: os.unlink(path) os.rmdir(d) def create_dir(direc): if not os.path.exists(direc): os.makedirs(direc) else: rm_rf(direc) create_dir(direc) def format_filename(s): """ Take a string and return a valid filename constructed from the string. Uses a whitelist approach: any characters not present in valid_chars are removed. Also spaces are replaced with underscores. Note: this method may produce invalid filenames such as ``, `.` or `..` When I use this method I prepend a date string like '2009_01_15_19_46_32_' and append a file extension like '.txt', so I avoid the potential of using an invalid filename. """ valid_chars = "-_.() %s%s" % (string.ascii_letters, string.digits) filename = ''.join(c for c in s if c in valid_chars) filename = filename.replace(' ','_') # I don't like spaces in filenames. return filename #################### Now make the data generator threadsafe #################### class threadsafe_iter: """Takes an iterator/generator and makes it thread-safe by serializing call to the `next` method of given iterator/generator. """ def __init__(self, it): self.it = it self.lock = threading.Lock() def __iter__(self): return self def __next__(self): # Py3 with self.lock: return next(self.it) def next(self): # Py2 with self.lock: return self.it.next() def threadsafe_generator(f): """A decorator that takes a generator function and makes it thread-safe. """ def g(*a, **kw): return threadsafe_iter(f(*a, **kw)) return g