refactored spectrogram and implemented record and generate spectrogram
parent
82d0398d2c
commit
a8f17ef764
|
|
@ -1,55 +1,36 @@
|
||||||
import pyaudio
|
import pyaudio
|
||||||
import numpy as np
|
import numpy as np
|
||||||
from matplotlib import pyplot as plt
|
# from matplotlib import pyplot as plt
|
||||||
|
from spectro_gen import plot_stft
|
||||||
|
|
||||||
CHUNKSIZE = 44100 * 10 # fixed chunk size
|
SAMPLE_RATE = 22050
|
||||||
|
N_SEC = 1.5
|
||||||
|
CHUNKSIZE = int(SAMPLE_RATE * N_SEC) # fixed chunk size
|
||||||
|
|
||||||
# initialize portaudio
|
|
||||||
p_inp = pyaudio.PyAudio()
|
p_inp = pyaudio.PyAudio()
|
||||||
# dev_n = p.get_device_count()
|
|
||||||
# dev_infos = [p.get_device_info_by_index(index) for index in range(dev_n)]
|
|
||||||
# [i for i in dev_infos] # if i['name'] == 'record']
|
|
||||||
stream = p_inp.open(
|
stream = p_inp.open(
|
||||||
format=pyaudio.paInt24,
|
format=pyaudio.paFloat32,
|
||||||
channels=2,
|
channels=2,
|
||||||
rate=44100,
|
rate=SAMPLE_RATE,
|
||||||
input=True,
|
input=True,
|
||||||
frames_per_buffer=CHUNKSIZE)
|
frames_per_buffer=CHUNKSIZE)
|
||||||
|
|
||||||
# do this as long as you want fresh samples
|
|
||||||
data = stream.read(CHUNKSIZE)
|
data = stream.read(CHUNKSIZE)
|
||||||
len(data)
|
numpydata = np.frombuffer(data, dtype=np.float32)
|
||||||
CHUNKSIZE*10
|
multi_channel = np.abs(np.reshape(numpydata, (-1, 2))).mean(axis=1)
|
||||||
numpydata = np.fromstring(data, dtype=np.int16)
|
one_channel = np.asarray([multi_channel, -1 * multi_channel]).T.reshape(-1)
|
||||||
|
mean_channel_data = one_channel.tobytes()
|
||||||
|
plot_stft(one_channel, SAMPLE_RATE)
|
||||||
|
# plt.plot(one_channel)
|
||||||
|
# plt.show()
|
||||||
|
|
||||||
# plot data
|
|
||||||
plt.plot(numpydata)
|
|
||||||
plt.show()
|
|
||||||
|
|
||||||
# close stream
|
|
||||||
stream.stop_stream()
|
stream.stop_stream()
|
||||||
stream.close()
|
stream.close()
|
||||||
p_inp.terminate()
|
p_inp.terminate()
|
||||||
# open the file for reading.
|
|
||||||
# wf = wave.open(sys.argv[1], 'rb')
|
|
||||||
|
|
||||||
# create an audio object
|
|
||||||
# p = pyaudio.PyAudio()
|
|
||||||
|
|
||||||
# open stream based on the wave object which has been input.
|
|
||||||
p_oup = pyaudio.PyAudio()
|
p_oup = pyaudio.PyAudio()
|
||||||
stream = p_oup.open(
|
stream = p_oup.open(
|
||||||
format=pyaudio.paInt24, channels=2, rate=44100, output=True)
|
format=pyaudio.paFloat32, channels=2, rate=SAMPLE_RATE, output=True)
|
||||||
|
stream.write(mean_channel_data)
|
||||||
# read data (based on the chunk size)
|
|
||||||
# data = wf.readframes(CHUNKSIZE)
|
|
||||||
|
|
||||||
# play stream (looping from beginning of file to the end)
|
|
||||||
# while data != '':
|
|
||||||
# writing to the stream is what *actually* plays the sound.
|
|
||||||
stream.write(data)
|
|
||||||
# data = wf.readframes(chunk)
|
|
||||||
|
|
||||||
# cleanup stuff.
|
|
||||||
stream.close()
|
stream.close()
|
||||||
p_oup.terminate()
|
p_oup.terminate()
|
||||||
|
|
|
||||||
|
|
@ -7,6 +7,7 @@
|
||||||
"""
|
"""
|
||||||
# %matplotlib inline
|
# %matplotlib inline
|
||||||
import numpy as np
|
import numpy as np
|
||||||
|
import pyaudio
|
||||||
from matplotlib import pyplot as plt
|
from matplotlib import pyplot as plt
|
||||||
from pysndfile import sndio as snd
|
from pysndfile import sndio as snd
|
||||||
from numpy.lib import stride_tricks
|
from numpy.lib import stride_tricks
|
||||||
|
|
@ -70,32 +71,25 @@ def logscale_spec(spec, sr=44100, factor=20.):
|
||||||
""" generate spectrogram for aiff audio with 150ms windows and 50ms overlap"""
|
""" generate spectrogram for aiff audio with 150ms windows and 50ms overlap"""
|
||||||
|
|
||||||
|
|
||||||
def generate_aiff_spectrogram(audiopath):
|
def generate_spectrogram(samples, samplerate):
|
||||||
samples, samplerate, _ = snd.read(audiopath)
|
|
||||||
# samplerate, samples = wav.read(audiopath)
|
# samplerate, samples = wav.read(audiopath)
|
||||||
# s = stft(samples, binsize)
|
# s = stft(samples, binsize)
|
||||||
s = stft(samples, samplerate * 150 // 1000, 1.0 / 3)
|
s = stft(samples, samplerate * 150 // 1000, 1.0 / 3)
|
||||||
|
|
||||||
sshow, freq = logscale_spec(s, factor=1.0, sr=samplerate)
|
sshow, freq = logscale_spec(s, factor=1.0, sr=samplerate)
|
||||||
ims = 20. * np.log10(np.abs(sshow) / 10e-6)
|
ims = 20. * np.log10(np.abs(sshow) / 10e-6)
|
||||||
|
return ims, freq
|
||||||
|
|
||||||
|
|
||||||
|
def generate_aiff_spectrogram(audiopath):
|
||||||
|
samples, samplerate, _ = snd.read(audiopath)
|
||||||
|
ims, _ = generate_spectrogram(samples, samplerate)
|
||||||
return ims
|
return ims
|
||||||
|
|
||||||
|
|
||||||
""" plot spectrogram"""
|
def plot_stft(samples, samplerate, binsize=2**10, plotpath=None, colormap="jet"):
|
||||||
|
(ims, freq) = generate_spectrogram(samples, samplerate)
|
||||||
|
|
||||||
def plotstft(audiopath, binsize=2**10, plotpath=None, colormap="jet"):
|
|
||||||
samples, samplerate, _ = snd.read(audiopath)
|
|
||||||
# samplerate, samples = wav.read(audiopath)
|
|
||||||
# s = stft(samples, binsize)
|
|
||||||
# print(samplerate*150//1000)
|
|
||||||
s = stft(samples, samplerate * 150 // 1000, 1.0 / 3)
|
|
||||||
|
|
||||||
sshow, freq = logscale_spec(s, factor=1.0, sr=samplerate)
|
|
||||||
ims = 20. * np.log10(np.abs(sshow) / 10e-6) # amplitude to decibel
|
|
||||||
|
|
||||||
timebins, freqbins = np.shape(ims)
|
timebins, freqbins = np.shape(ims)
|
||||||
# import pdb;pdb.set_trace()
|
|
||||||
plt.figure(figsize=(15, 7.5))
|
plt.figure(figsize=(15, 7.5))
|
||||||
plt.imshow(
|
plt.imshow(
|
||||||
np.transpose(ims),
|
np.transpose(ims),
|
||||||
|
|
@ -118,24 +112,37 @@ def plotstft(audiopath, binsize=2**10, plotpath=None, colormap="jet"):
|
||||||
])
|
])
|
||||||
ylocs = np.int16(np.round(np.linspace(0, freqbins - 1, 10)))
|
ylocs = np.int16(np.round(np.linspace(0, freqbins - 1, 10)))
|
||||||
plt.yticks(ylocs, ["%.02f" % freq[i] for i in ylocs])
|
plt.yticks(ylocs, ["%.02f" % freq[i] for i in ylocs])
|
||||||
|
|
||||||
if plotpath:
|
if plotpath:
|
||||||
plt.savefig(plotpath, bbox_inches="tight")
|
plt.savefig(plotpath, bbox_inches="tight")
|
||||||
else:
|
else:
|
||||||
plt.show()
|
plt.show()
|
||||||
|
|
||||||
plt.clf()
|
plt.clf()
|
||||||
|
|
||||||
|
|
||||||
snd.get_info('./outputs/sunflowers-Alex-150-normal-589.aiff')
|
def plot_aiff_stft(audiopath, binsize=2**10, plotpath=None, colormap="jet"):
|
||||||
snd_data_arr = snd.read('./outputs/sunflowers-Alex-150-normal-589.aiff')[0]
|
samples, samplerate, _ = snd.read(audiopath)
|
||||||
snd_data = snd_data_arr.tobytes()
|
plot_stft(samples, samplerate)
|
||||||
snd_data_arr.dtype
|
|
||||||
len(snd_data)
|
|
||||||
|
def play_sunflower():
|
||||||
|
sample_r = snd.get_info('./outputs/sunflowers-Alex-150-normal-589.aiff')[0]
|
||||||
|
snd_data_f64 = snd.read('./outputs/sunflowers-Alex-150-normal-589.aiff')[0]
|
||||||
|
snd_data_f32 = snd_data_f64.astype(np.float32)
|
||||||
|
snd_data_f32.shape
|
||||||
|
snd_data = snd_data_f32.tobytes()
|
||||||
|
p_oup = pyaudio.PyAudio()
|
||||||
|
stream = p_oup.open(
|
||||||
|
format=pyaudio.paFloat32, channels=1, rate=sample_r, output=True)
|
||||||
|
stream.write(snd_data)
|
||||||
|
stream.close()
|
||||||
|
p_oup.terminate()
|
||||||
|
plot_stft(snd_data_f32, sample_r)
|
||||||
|
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
plotstft('./outputs/sunflowers-Alex-150-normal-589.aiff')
|
play_sunflower()
|
||||||
plotstft('./outputs/sunflowers-Alex-180-normal-4763.aiff')
|
# plot_aiff_stft('./outputs/sunflowers-Alex-150-normal-589.aiff')
|
||||||
plotstft('./outputs/sunflowers-Victoria-180-normal-870.aiff')
|
# plot_aiff_stft('./outputs/sunflowers-Alex-180-normal-4763.aiff')
|
||||||
plotstft('./outputs/sunflowers-Fred-180-phoneme-9733.aiff')
|
# plot_aiff_stft('./outputs/sunflowers-Victoria-180-normal-870.aiff')
|
||||||
plotstft('./outputs/sunflowers-Fred-180-normal-6515.aiff')
|
# plot_aiff_stft('./outputs/sunflowers-Fred-180-phoneme-9733.aiff')
|
||||||
|
# plot_aiff_stft('./outputs/sunflowers-Fred-180-normal-6515.aiff')
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue