From a8f17ef76443742fdb22bc4529a289ee2aac9622 Mon Sep 17 00:00:00 2001 From: Malar Kannan Date: Wed, 25 Oct 2017 13:37:17 +0530 Subject: [PATCH] refactored spectrogram and implemented record and generate spectrogram --- record_mic_speech.py | 51 +++++++++++------------------------ spectro_gen.py | 63 ++++++++++++++++++++++++-------------------- 2 files changed, 51 insertions(+), 63 deletions(-) diff --git a/record_mic_speech.py b/record_mic_speech.py index 8ffacb4..1420502 100644 --- a/record_mic_speech.py +++ b/record_mic_speech.py @@ -1,55 +1,36 @@ import pyaudio import numpy as np -from matplotlib import pyplot as plt +# from matplotlib import pyplot as plt +from spectro_gen import plot_stft -CHUNKSIZE = 44100 * 10 # fixed chunk size +SAMPLE_RATE = 22050 +N_SEC = 1.5 +CHUNKSIZE = int(SAMPLE_RATE * N_SEC) # fixed chunk size -# initialize portaudio p_inp = pyaudio.PyAudio() -# dev_n = p.get_device_count() -# dev_infos = [p.get_device_info_by_index(index) for index in range(dev_n)] -# [i for i in dev_infos] # if i['name'] == 'record'] stream = p_inp.open( - format=pyaudio.paInt24, + format=pyaudio.paFloat32, channels=2, - rate=44100, + rate=SAMPLE_RATE, input=True, frames_per_buffer=CHUNKSIZE) -# do this as long as you want fresh samples data = stream.read(CHUNKSIZE) -len(data) -CHUNKSIZE*10 -numpydata = np.fromstring(data, dtype=np.int16) +numpydata = np.frombuffer(data, dtype=np.float32) +multi_channel = np.abs(np.reshape(numpydata, (-1, 2))).mean(axis=1) +one_channel = np.asarray([multi_channel, -1 * multi_channel]).T.reshape(-1) +mean_channel_data = one_channel.tobytes() +plot_stft(one_channel, SAMPLE_RATE) +# plt.plot(one_channel) +# plt.show() -# plot data -plt.plot(numpydata) -plt.show() - -# close stream stream.stop_stream() stream.close() p_inp.terminate() -# open the file for reading. -# wf = wave.open(sys.argv[1], 'rb') -# create an audio object -# p = pyaudio.PyAudio() - -# open stream based on the wave object which has been input. p_oup = pyaudio.PyAudio() stream = p_oup.open( - format=pyaudio.paInt24, channels=2, rate=44100, output=True) - -# read data (based on the chunk size) -# data = wf.readframes(CHUNKSIZE) - -# play stream (looping from beginning of file to the end) -# while data != '': -# writing to the stream is what *actually* plays the sound. -stream.write(data) -# data = wf.readframes(chunk) - -# cleanup stuff. + format=pyaudio.paFloat32, channels=2, rate=SAMPLE_RATE, output=True) +stream.write(mean_channel_data) stream.close() p_oup.terminate() diff --git a/spectro_gen.py b/spectro_gen.py index c0da090..c0f871f 100644 --- a/spectro_gen.py +++ b/spectro_gen.py @@ -7,6 +7,7 @@ """ # %matplotlib inline import numpy as np +import pyaudio from matplotlib import pyplot as plt from pysndfile import sndio as snd from numpy.lib import stride_tricks @@ -70,32 +71,25 @@ def logscale_spec(spec, sr=44100, factor=20.): """ generate spectrogram for aiff audio with 150ms windows and 50ms overlap""" -def generate_aiff_spectrogram(audiopath): - samples, samplerate, _ = snd.read(audiopath) +def generate_spectrogram(samples, samplerate): # samplerate, samples = wav.read(audiopath) # s = stft(samples, binsize) s = stft(samples, samplerate * 150 // 1000, 1.0 / 3) sshow, freq = logscale_spec(s, factor=1.0, sr=samplerate) ims = 20. * np.log10(np.abs(sshow) / 10e-6) + return ims, freq + + +def generate_aiff_spectrogram(audiopath): + samples, samplerate, _ = snd.read(audiopath) + ims, _ = generate_spectrogram(samples, samplerate) return ims -""" plot spectrogram""" - - -def plotstft(audiopath, binsize=2**10, plotpath=None, colormap="jet"): - samples, samplerate, _ = snd.read(audiopath) - # samplerate, samples = wav.read(audiopath) - # s = stft(samples, binsize) - # print(samplerate*150//1000) - s = stft(samples, samplerate * 150 // 1000, 1.0 / 3) - - sshow, freq = logscale_spec(s, factor=1.0, sr=samplerate) - ims = 20. * np.log10(np.abs(sshow) / 10e-6) # amplitude to decibel - +def plot_stft(samples, samplerate, binsize=2**10, plotpath=None, colormap="jet"): + (ims, freq) = generate_spectrogram(samples, samplerate) timebins, freqbins = np.shape(ims) - # import pdb;pdb.set_trace() plt.figure(figsize=(15, 7.5)) plt.imshow( np.transpose(ims), @@ -118,24 +112,37 @@ def plotstft(audiopath, binsize=2**10, plotpath=None, colormap="jet"): ]) ylocs = np.int16(np.round(np.linspace(0, freqbins - 1, 10))) plt.yticks(ylocs, ["%.02f" % freq[i] for i in ylocs]) - if plotpath: plt.savefig(plotpath, bbox_inches="tight") else: plt.show() - plt.clf() -snd.get_info('./outputs/sunflowers-Alex-150-normal-589.aiff') -snd_data_arr = snd.read('./outputs/sunflowers-Alex-150-normal-589.aiff')[0] -snd_data = snd_data_arr.tobytes() -snd_data_arr.dtype -len(snd_data) +def plot_aiff_stft(audiopath, binsize=2**10, plotpath=None, colormap="jet"): + samples, samplerate, _ = snd.read(audiopath) + plot_stft(samples, samplerate) + + +def play_sunflower(): + sample_r = snd.get_info('./outputs/sunflowers-Alex-150-normal-589.aiff')[0] + snd_data_f64 = snd.read('./outputs/sunflowers-Alex-150-normal-589.aiff')[0] + snd_data_f32 = snd_data_f64.astype(np.float32) + snd_data_f32.shape + snd_data = snd_data_f32.tobytes() + p_oup = pyaudio.PyAudio() + stream = p_oup.open( + format=pyaudio.paFloat32, channels=1, rate=sample_r, output=True) + stream.write(snd_data) + stream.close() + p_oup.terminate() + plot_stft(snd_data_f32, sample_r) + if __name__ == '__main__': - plotstft('./outputs/sunflowers-Alex-150-normal-589.aiff') - plotstft('./outputs/sunflowers-Alex-180-normal-4763.aiff') - plotstft('./outputs/sunflowers-Victoria-180-normal-870.aiff') - plotstft('./outputs/sunflowers-Fred-180-phoneme-9733.aiff') - plotstft('./outputs/sunflowers-Fred-180-normal-6515.aiff') + play_sunflower() + # plot_aiff_stft('./outputs/sunflowers-Alex-150-normal-589.aiff') + # plot_aiff_stft('./outputs/sunflowers-Alex-180-normal-4763.aiff') + # plot_aiff_stft('./outputs/sunflowers-Victoria-180-normal-870.aiff') + # plot_aiff_stft('./outputs/sunflowers-Fred-180-phoneme-9733.aiff') + # plot_aiff_stft('./outputs/sunflowers-Fred-180-normal-6515.aiff')