speech-scoring/speech_tools.py

161 lines
4.8 KiB
Python
Raw Normal View History

import os
import math
import string
import threading
import multiprocessing
import pandas as pd
import numpy as np
2017-10-24 06:24:15 +00:00
import pyaudio
from pysndfile import sndio as snd
# from matplotlib import pyplot as plt
from speech_spectrum import plot_stft, generate_spec_frec
2017-10-24 06:24:15 +00:00
SAMPLE_RATE = 22050
N_CHANNELS = 2
def step_count(n_records,batch_size):
return int(math.ceil(n_records*1.0/batch_size))
def file_player():
p_oup = pyaudio.PyAudio()
def play_file(audiopath,plot=False):
print('playing',audiopath)
samples, samplerate, form = snd.read(audiopath)
stream = p_oup.open(
format=pyaudio.paFloat32,
channels=2,
rate=samplerate,
output=True)
one_channel = np.asarray([samples, samples]).T.reshape(-1)
audio_data = one_channel.astype(np.float32).tobytes()
stream.write(audio_data)
stream.close()
if plot:
plot_stft(samples, SAMPLE_RATE)
def close_player():
p_oup.terminate()
return play_file,close_player
2017-10-24 06:24:15 +00:00
def record_spectrogram(n_sec, plot=False, playback=False):
# show_record_prompt()
N_SEC = n_sec
CHUNKSIZE = int(SAMPLE_RATE * N_SEC / N_CHANNELS) # fixed chunk size
input('Press [Enter] to start recording sample... ')
p_inp = pyaudio.PyAudio()
stream = p_inp.open(
format=pyaudio.paFloat32,
channels=N_CHANNELS,
rate=SAMPLE_RATE,
input=True,
frames_per_buffer=CHUNKSIZE)
data = stream.read(CHUNKSIZE)
numpydata = np.frombuffer(data, dtype=np.float32)
multi_channel = np.abs(np.reshape(numpydata, (-1, 2))).mean(axis=1)
one_channel = np.asarray([multi_channel, -1 * multi_channel]).T.reshape(-1)
mean_channel_data = one_channel.tobytes()
stream.stop_stream()
stream.close()
p_inp.terminate()
if plot:
plot_stft(one_channel, SAMPLE_RATE)
if playback:
p_oup = pyaudio.PyAudio()
stream = p_oup.open(
format=pyaudio.paFloat32,
channels=2,
rate=SAMPLE_RATE,
output=True)
stream.write(mean_channel_data)
stream.close()
p_oup.terminate()
2017-10-27 13:23:22 +00:00
ims, _ = generate_spec_frec(one_channel, SAMPLE_RATE)
return ims
def _apply_df(args):
df, func, num, kwargs = args
return num, df.apply(func, **kwargs)
def apply_by_multiprocessing(df,func,**kwargs):
cores = multiprocessing.cpu_count()
workers=kwargs.pop('workers') if 'workers' in kwargs else cores
pool = multiprocessing.Pool(processes=workers)
result = pool.map(_apply_df, [(d, func, i, kwargs) for i,d in enumerate(np.array_split(df, workers))])
pool.close()
result=sorted(result,key=lambda x:x[0])
return pd.concat([i[1] for i in result])
def square(x):
return x**x
# if __name__ == '__main__':
# df = pd.DataFrame({'a':range(10), 'b':range(10)})
# apply_by_multiprocessing(df, square, axis=1, workers=4)
def hms_string(sec_elapsed):
h = int(sec_elapsed / (60 * 60))
m = int((sec_elapsed % (60 * 60)) / 60)
s = sec_elapsed % 60.
return "{}:{:>02}:{:>05.2f}".format(h, m, s)
def rm_rf(d):
for path in (os.path.join(d,f) for f in os.listdir(d)):
if os.path.isdir(path):
rm_rf(path)
else:
os.unlink(path)
os.rmdir(d)
def create_dir(direc):
if not os.path.exists(direc):
os.makedirs(direc)
else:
rm_rf(direc)
create_dir(direc)
def format_filename(s):
"""
Take a string and return a valid filename constructed from the string.
Uses a whitelist approach: any characters not present in valid_chars are
removed. Also spaces are replaced with underscores.
Note: this method may produce invalid filenames such as ``, `.` or `..`
When I use this method I prepend a date string like '2009_01_15_19_46_32_'
and append a file extension like '.txt', so I avoid the potential of using
an invalid filename.
"""
valid_chars = "-_.() %s%s" % (string.ascii_letters, string.digits)
filename = ''.join(c for c in s if c in valid_chars)
filename = filename.replace(' ','_') # I don't like spaces in filenames.
return filename
#################### Now make the data generator threadsafe ####################
class threadsafe_iter:
"""Takes an iterator/generator and makes it thread-safe by
serializing call to the `next` method of given iterator/generator.
"""
def __init__(self, it):
self.it = it
self.lock = threading.Lock()
def __iter__(self):
return self
def __next__(self): # Py3
with self.lock:
return next(self.it)
def next(self): # Py2
with self.lock:
return self.it.next()
def threadsafe_generator(f):
"""A decorator that takes a generator function and makes it thread-safe.
"""
def g(*a, **kw):
return threadsafe_iter(f(*a, **kw))
return g