import os import math import string import threading import itertools import random import multiprocessing import pandas as pd import numpy as np import pyaudio from pysndfile import sndio as snd # from matplotlib import pyplot as plt from speech_spectrum import plot_stft, generate_spec_frec SAMPLE_RATE = 22050 N_CHANNELS = 2 def step_count(n_records,batch_size): return int(math.ceil(n_records*1.0/batch_size)) def file_player(): p_oup = pyaudio.PyAudio() def play_file(audiopath,plot=False): print('playing',audiopath) samples, samplerate, form = snd.read(audiopath) stream = p_oup.open( format=pyaudio.paFloat32, channels=2, rate=samplerate, output=True) one_channel = np.asarray([samples, samples]).T.reshape(-1) audio_data = one_channel.astype(np.float32).tobytes() stream.write(audio_data) stream.close() if plot: plot_stft(samples, SAMPLE_RATE) def close_player(): p_oup.terminate() return play_file,close_player def reservoir_sample(iterable, k): it = iter(iterable) if not (k > 0): raise ValueError("sample size must be positive") sample = list(itertools.islice(it, k)) # fill the reservoir random.shuffle(sample) # if number of items less then *k* then # return all items in random order. for i, item in enumerate(it, start=k+1): j = random.randrange(i) # random [0..i) if j < k: sample[j] = item # replace item with gradually decreasing probability return sample def record_spectrogram(n_sec, plot=False, playback=False): # show_record_prompt() N_SEC = n_sec CHUNKSIZE = int(SAMPLE_RATE * N_SEC / N_CHANNELS) # fixed chunk size input('Press [Enter] to start recording sample... ') p_inp = pyaudio.PyAudio() stream = p_inp.open( format=pyaudio.paFloat32, channels=N_CHANNELS, rate=SAMPLE_RATE, input=True, frames_per_buffer=CHUNKSIZE) data = stream.read(CHUNKSIZE) numpydata = np.frombuffer(data, dtype=np.float32) multi_channel = np.abs(np.reshape(numpydata, (-1, 2))).mean(axis=1) one_channel = np.asarray([multi_channel, -1 * multi_channel]).T.reshape(-1) mean_channel_data = one_channel.tobytes() stream.stop_stream() stream.close() p_inp.terminate() if plot: plot_stft(one_channel, SAMPLE_RATE) if playback: p_oup = pyaudio.PyAudio() stream = p_oup.open( format=pyaudio.paFloat32, channels=2, rate=SAMPLE_RATE, output=True) stream.write(mean_channel_data) stream.close() p_oup.terminate() ims, _ = generate_spec_frec(one_channel, SAMPLE_RATE) return ims def _apply_df(args): df, func, num, kwargs = args return num, df.apply(func, **kwargs) def apply_by_multiprocessing(df,func,**kwargs): cores = multiprocessing.cpu_count() workers=kwargs.pop('workers') if 'workers' in kwargs else cores pool = multiprocessing.Pool(processes=workers) result = pool.map(_apply_df, [(d, func, i, kwargs) for i,d in enumerate(np.array_split(df, workers))]) pool.close() result=sorted(result,key=lambda x:x[0]) return pd.concat([i[1] for i in result]) def square(x): return x**x # if __name__ == '__main__': # df = pd.DataFrame({'a':range(10), 'b':range(10)}) # apply_by_multiprocessing(df, square, axis=1, workers=4) def hms_string(sec_elapsed): h = int(sec_elapsed / (60 * 60)) m = int((sec_elapsed % (60 * 60)) / 60) s = sec_elapsed % 60. return "{}:{:>02}:{:>05.2f}".format(h, m, s) def rm_rf(d): for path in (os.path.join(d,f) for f in os.listdir(d)): if os.path.isdir(path): rm_rf(path) else: os.unlink(path) os.rmdir(d) def create_dir(direc): if not os.path.exists(direc): os.makedirs(direc) else: rm_rf(direc) create_dir(direc) def format_filename(s): """ Take a string and return a valid filename constructed from the string. Uses a whitelist approach: any characters not present in valid_chars are removed. Also spaces are replaced with underscores. Note: this method may produce invalid filenames such as ``, `.` or `..` When I use this method I prepend a date string like '2009_01_15_19_46_32_' and append a file extension like '.txt', so I avoid the potential of using an invalid filename. """ valid_chars = "-_.() %s%s" % (string.ascii_letters, string.digits) filename = ''.join(c for c in s if c in valid_chars) filename = filename.replace(' ','_') # I don't like spaces in filenames. return filename #################### Now make the data generator threadsafe #################### class threadsafe_iter: """Takes an iterator/generator and makes it thread-safe by serializing call to the `next` method of given iterator/generator. """ def __init__(self, it): self.it = it self.lock = threading.Lock() def __iter__(self): return self def __next__(self): # Py3 with self.lock: return next(self.it) def next(self): # Py2 with self.lock: return self.it.next() def threadsafe_generator(f): """A decorator that takes a generator function and makes it thread-safe. """ def g(*a, **kw): return threadsafe_iter(f(*a, **kw)) return g