From a8f17ef76443742fdb22bc4529a289ee2aac9622 Mon Sep 17 00:00:00 2001
From: Malar Kannan <malar@avaz.in>
Date: Wed, 25 Oct 2017 13:37:17 +0530
Subject: [PATCH] refactored spectrogram and implemented record and generate
 spectrogram

---
 record_mic_speech.py | 51 +++++++++++------------------------
 spectro_gen.py       | 63 ++++++++++++++++++++++++--------------------
 2 files changed, 51 insertions(+), 63 deletions(-)

diff --git a/record_mic_speech.py b/record_mic_speech.py
index 8ffacb4..1420502 100644
--- a/record_mic_speech.py
+++ b/record_mic_speech.py
@@ -1,55 +1,36 @@
 import pyaudio
 import numpy as np
-from matplotlib import pyplot as plt
+# from matplotlib import pyplot as plt
+from spectro_gen import plot_stft
 
-CHUNKSIZE = 44100 * 10  # fixed chunk size
+SAMPLE_RATE = 22050
+N_SEC = 1.5
+CHUNKSIZE = int(SAMPLE_RATE * N_SEC)  # fixed chunk size
 
-# initialize portaudio
 p_inp = pyaudio.PyAudio()
-# dev_n = p.get_device_count()
-# dev_infos = [p.get_device_info_by_index(index) for index in range(dev_n)]
-# [i for i in dev_infos] # if i['name'] == 'record']
 stream = p_inp.open(
-    format=pyaudio.paInt24,
+    format=pyaudio.paFloat32,
     channels=2,
-    rate=44100,
+    rate=SAMPLE_RATE,
     input=True,
     frames_per_buffer=CHUNKSIZE)
 
-# do this as long as you want fresh samples
 data = stream.read(CHUNKSIZE)
-len(data)
-CHUNKSIZE*10
-numpydata = np.fromstring(data, dtype=np.int16)
+numpydata = np.frombuffer(data, dtype=np.float32)
+multi_channel = np.abs(np.reshape(numpydata, (-1, 2))).mean(axis=1)
+one_channel = np.asarray([multi_channel, -1 * multi_channel]).T.reshape(-1)
+mean_channel_data = one_channel.tobytes()
+plot_stft(one_channel, SAMPLE_RATE)
+# plt.plot(one_channel)
+# plt.show()
 
-# plot data
-plt.plot(numpydata)
-plt.show()
-
-# close stream
 stream.stop_stream()
 stream.close()
 p_inp.terminate()
-# open the file for reading.
-# wf = wave.open(sys.argv[1], 'rb')
 
-# create an audio object
-# p = pyaudio.PyAudio()
-
-# open stream based on the wave object which has been input.
 p_oup = pyaudio.PyAudio()
 stream = p_oup.open(
-    format=pyaudio.paInt24, channels=2, rate=44100, output=True)
-
-# read data (based on the chunk size)
-# data = wf.readframes(CHUNKSIZE)
-
-# play stream (looping from beginning of file to the end)
-# while data != '':
-# writing to the stream is what *actually* plays the sound.
-stream.write(data)
-# data = wf.readframes(chunk)
-
-# cleanup stuff.
+    format=pyaudio.paFloat32, channels=2, rate=SAMPLE_RATE, output=True)
+stream.write(mean_channel_data)
 stream.close()
 p_oup.terminate()
diff --git a/spectro_gen.py b/spectro_gen.py
index c0da090..c0f871f 100644
--- a/spectro_gen.py
+++ b/spectro_gen.py
@@ -7,6 +7,7 @@
 """
 # %matplotlib inline
 import numpy as np
+import pyaudio
 from matplotlib import pyplot as plt
 from pysndfile import sndio as snd
 from numpy.lib import stride_tricks
@@ -70,32 +71,25 @@ def logscale_spec(spec, sr=44100, factor=20.):
 """ generate spectrogram for aiff audio with 150ms windows and 50ms overlap"""
 
 
-def generate_aiff_spectrogram(audiopath):
-    samples, samplerate, _ = snd.read(audiopath)
+def generate_spectrogram(samples, samplerate):
     # samplerate, samples = wav.read(audiopath)
     # s = stft(samples, binsize)
     s = stft(samples, samplerate * 150 // 1000, 1.0 / 3)
 
     sshow, freq = logscale_spec(s, factor=1.0, sr=samplerate)
     ims = 20. * np.log10(np.abs(sshow) / 10e-6)
+    return ims, freq
+
+
+def generate_aiff_spectrogram(audiopath):
+    samples, samplerate, _ = snd.read(audiopath)
+    ims, _ = generate_spectrogram(samples, samplerate)
     return ims
 
 
-""" plot spectrogram"""
-
-
-def plotstft(audiopath, binsize=2**10, plotpath=None, colormap="jet"):
-    samples, samplerate, _ = snd.read(audiopath)
-    # samplerate, samples = wav.read(audiopath)
-    # s = stft(samples, binsize)
-    # print(samplerate*150//1000)
-    s = stft(samples, samplerate * 150 // 1000, 1.0 / 3)
-
-    sshow, freq = logscale_spec(s, factor=1.0, sr=samplerate)
-    ims = 20. * np.log10(np.abs(sshow) / 10e-6)  # amplitude to decibel
-
+def plot_stft(samples, samplerate, binsize=2**10, plotpath=None, colormap="jet"):
+    (ims, freq) = generate_spectrogram(samples, samplerate)
     timebins, freqbins = np.shape(ims)
-    # import pdb;pdb.set_trace()
     plt.figure(figsize=(15, 7.5))
     plt.imshow(
         np.transpose(ims),
@@ -118,24 +112,37 @@ def plotstft(audiopath, binsize=2**10, plotpath=None, colormap="jet"):
     ])
     ylocs = np.int16(np.round(np.linspace(0, freqbins - 1, 10)))
     plt.yticks(ylocs, ["%.02f" % freq[i] for i in ylocs])
-
     if plotpath:
         plt.savefig(plotpath, bbox_inches="tight")
     else:
         plt.show()
-
     plt.clf()
 
 
-snd.get_info('./outputs/sunflowers-Alex-150-normal-589.aiff')
-snd_data_arr = snd.read('./outputs/sunflowers-Alex-150-normal-589.aiff')[0]
-snd_data = snd_data_arr.tobytes()
-snd_data_arr.dtype
-len(snd_data)
+def plot_aiff_stft(audiopath, binsize=2**10, plotpath=None, colormap="jet"):
+    samples, samplerate, _ = snd.read(audiopath)
+    plot_stft(samples, samplerate)
+
+
+def play_sunflower():
+    sample_r = snd.get_info('./outputs/sunflowers-Alex-150-normal-589.aiff')[0]
+    snd_data_f64 = snd.read('./outputs/sunflowers-Alex-150-normal-589.aiff')[0]
+    snd_data_f32 = snd_data_f64.astype(np.float32)
+    snd_data_f32.shape
+    snd_data = snd_data_f32.tobytes()
+    p_oup = pyaudio.PyAudio()
+    stream = p_oup.open(
+        format=pyaudio.paFloat32, channels=1, rate=sample_r, output=True)
+    stream.write(snd_data)
+    stream.close()
+    p_oup.terminate()
+    plot_stft(snd_data_f32, sample_r)
+
 
 if __name__ == '__main__':
-    plotstft('./outputs/sunflowers-Alex-150-normal-589.aiff')
-    plotstft('./outputs/sunflowers-Alex-180-normal-4763.aiff')
-    plotstft('./outputs/sunflowers-Victoria-180-normal-870.aiff')
-    plotstft('./outputs/sunflowers-Fred-180-phoneme-9733.aiff')
-    plotstft('./outputs/sunflowers-Fred-180-normal-6515.aiff')
+    play_sunflower()
+    # plot_aiff_stft('./outputs/sunflowers-Alex-150-normal-589.aiff')
+    # plot_aiff_stft('./outputs/sunflowers-Alex-180-normal-4763.aiff')
+    # plot_aiff_stft('./outputs/sunflowers-Victoria-180-normal-870.aiff')
+    # plot_aiff_stft('./outputs/sunflowers-Fred-180-phoneme-9733.aiff')
+    # plot_aiff_stft('./outputs/sunflowers-Fred-180-normal-6515.aiff')