speech-scoring/speech_tools.py

import pyaudio
from pysndfile import sndio as snd
import numpy as np
# from matplotlib import pyplot as plt
from speech_spectrum import plot_stft, generate_spec_frec

SAMPLE_RATE = 22050
N_CHANNELS = 2

def file_player():
    p_oup = pyaudio.PyAudio()
    def play_file(audiopath,plot=False):
        print('playing',audiopath)
        samples, samplerate, form = snd.read(audiopath)
        stream = p_oup.open(
            format=pyaudio.paFloat32,
            channels=2,
            rate=samplerate,
            output=True)
        one_channel = np.asarray([samples, samples]).T.reshape(-1)
        audio_data = one_channel.astype(np.float32).tobytes()
        stream.write(audio_data)
        stream.close()
        if plot:
            plot_stft(samples, SAMPLE_RATE)
    def close_player():
        p_oup.terminate()
    return play_file,close_player

def record_spectrogram(n_sec, plot=False, playback=False):
    # show_record_prompt()
    N_SEC = n_sec
    CHUNKSIZE = int(SAMPLE_RATE * N_SEC / N_CHANNELS)  # fixed chunk size
    input('Press [Enter] to start recording sample... ')
    p_inp = pyaudio.PyAudio()
    stream = p_inp.open(
        format=pyaudio.paFloat32,
        channels=N_CHANNELS,
        rate=SAMPLE_RATE,
        input=True,
        frames_per_buffer=CHUNKSIZE)
    data = stream.read(CHUNKSIZE)
    numpydata = np.frombuffer(data, dtype=np.float32)
    multi_channel = np.abs(np.reshape(numpydata, (-1, 2))).mean(axis=1)
    one_channel = np.asarray([multi_channel, -1 * multi_channel]).T.reshape(-1)
    mean_channel_data = one_channel.tobytes()
    stream.stop_stream()
    stream.close()
    p_inp.terminate()
    if plot:
        plot_stft(one_channel, SAMPLE_RATE)
    if playback:
        p_oup = pyaudio.PyAudio()
        stream = p_oup.open(
            format=pyaudio.paFloat32,
            channels=2,
            rate=SAMPLE_RATE,
            output=True)
        stream.write(mean_channel_data)
        stream.close()
        p_oup.terminate()
    ims, _ = generate_spec_frec(one_channel, SAMPLE_RATE)
    return ims
added audio recording snippet 2017-10-24 06:24:15 +00:00			`import pyaudio`
visualizing and playing sound files where prediction fails 2017-11-13 13:52:30 +00:00			`from pysndfile import sndio as snd`
added audio recording snippet 2017-10-24 06:24:15 +00:00			`import numpy as np`
refactored spectrogram and implemented record and generate spectrogram 2017-10-25 08:07:17 +00:00			`# from matplotlib import pyplot as plt`
visualizing and playing sound files where prediction fails 2017-11-13 13:52:30 +00:00			`from speech_spectrum import plot_stft, generate_spec_frec`
added audio recording snippet 2017-10-24 06:24:15 +00:00
visualizing and playing sound files where prediction fails 2017-11-13 13:52:30 +00:00			`SAMPLE_RATE = 22050`
			`N_CHANNELS = 2`

			`def file_player():`
			`p_oup = pyaudio.PyAudio()`
			`def play_file(audiopath,plot=False):`
			`print('playing',audiopath)`
			`samples, samplerate, form = snd.read(audiopath)`
			`stream = p_oup.open(`
			`format=pyaudio.paFloat32,`
			`channels=2,`
			`rate=samplerate,`
			`output=True)`
			`one_channel = np.asarray([samples, samples]).T.reshape(-1)`
			`audio_data = one_channel.astype(np.float32).tobytes()`
			`stream.write(audio_data)`
			`stream.close()`
			`if plot:`
			`plot_stft(samples, SAMPLE_RATE)`
			`def close_player():`
			`p_oup.terminate()`
			`return play_file,close_player`
added audio recording snippet 2017-10-24 06:24:15 +00:00
added code to record and generate spectrogram, wip test model 2017-10-25 10:08:03 +00:00			`def record_spectrogram(n_sec, plot=False, playback=False):`
visualizing and playing sound files where prediction fails 2017-11-13 13:52:30 +00:00			`# show_record_prompt()`
added code to record and generate spectrogram, wip test model 2017-10-25 10:08:03 +00:00			`N_SEC = n_sec`
			`CHUNKSIZE = int(SAMPLE_RATE * N_SEC / N_CHANNELS) # fixed chunk size`
			`input('Press [Enter] to start recording sample... ')`
			`p_inp = pyaudio.PyAudio()`
			`stream = p_inp.open(`
			`format=pyaudio.paFloat32,`
			`channels=N_CHANNELS,`
			`rate=SAMPLE_RATE,`
			`input=True,`
			`frames_per_buffer=CHUNKSIZE)`
			`data = stream.read(CHUNKSIZE)`
			`numpydata = np.frombuffer(data, dtype=np.float32)`
			`multi_channel = np.abs(np.reshape(numpydata, (-1, 2))).mean(axis=1)`
			`one_channel = np.asarray([multi_channel, -1 * multi_channel]).T.reshape(-1)`
			`mean_channel_data = one_channel.tobytes()`
			`stream.stop_stream()`
			`stream.close()`
			`p_inp.terminate()`
			`if plot:`
			`plot_stft(one_channel, SAMPLE_RATE)`
			`if playback:`
			`p_oup = pyaudio.PyAudio()`
			`stream = p_oup.open(`
			`format=pyaudio.paFloat32,`
			`channels=2,`
			`rate=SAMPLE_RATE,`
			`output=True)`
			`stream.write(mean_channel_data)`
			`stream.close()`
			`p_oup.terminate()`
implemented tts gen variants 2017-10-27 13:23:22 +00:00			`ims, _ = generate_spec_frec(one_channel, SAMPLE_RATE)`
added code to record and generate spectrogram, wip test model 2017-10-25 10:08:03 +00:00			`return ims`