import parselmouth as pm from pysndfile import sndio as snd import numpy as np import matplotlib.pyplot as plt import seaborn as sns import pyaudio as pa sns.set() # Use seaborn's default style to make graphs more pretty def pm_snd(sample_file): # sample_file = 'inputs/self-apple/apple-low1.aiff' samples, samplerate, _ = snd.read(sample_file) return pm.Sound(values=samples,sampling_frequency=samplerate) def pitch_array(sample_file='outputs/audio/sunflowers-Victoria-180-normal-870.aiff'): sample_sound = pm_snd(sample_file) sample_pitch = sample_sound.to_pitch() return sample_pitch.to_matrix().as_array() def intensity_array(sample_file='outputs/audio/sunflowers-Victoria-180-normal-870.aiff'): sample_sound = pm_snd(sample_file) sample_intensity = sample_sound.to_mfcc() sample_intensity.as_array().shape return sample_pitch.to_matrix().as_array() def compute_mfcc(sample_file='outputs/audio/sunflowers-Victoria-180-normal-870.aiff'): sample_sound = pm_snd(sample_file) sample_mfcc = sample_sound.to_mfcc() # sample_mfcc.to_array().shape return sample_mfcc.to_array() def compute_formants(sample_file='outputs/audio/sunflowers-Victoria-180-normal-870.aiff'): # sample_file='outputs/audio/sunflowers-Victoria-180-normal-870.aiff' sample_sound = pm_snd(sample_file) sample_formant = sample_sound.to_formant_burg() # sample_formant.x_bins() return sample_formant.x_bins() def draw_spectrogram(spectrogram, dynamic_range=70): X, Y = spectrogram.x_grid(), spectrogram.y_grid() sg_db = 10 * np.log10(spectrogram.values.T) plt.pcolormesh(X, Y, sg_db, vmin=sg_db.max() - dynamic_range, cmap='afmhot') plt.ylim([spectrogram.ymin, spectrogram.ymax]) plt.xlabel("time [s]") plt.ylabel("frequency [Hz]") def draw_intensity(intensity): plt.plot(intensity.xs(), intensity.values, linewidth=3, color='w') plt.plot(intensity.xs(), intensity.values, linewidth=1) plt.grid(False) plt.ylim(0) plt.ylabel("intensity [dB]") def draw_pitch(pitch): # Extract selected pitch contour, and # replace unvoiced samples by NaN to not plot pitch_values = pitch.to_matrix().values pitch_values[pitch_values==0] = np.nan plt.plot(pitch.xs(), pitch_values, linewidth=3, color='w') plt.plot(pitch.xs(), pitch_values, linewidth=1) plt.grid(False) plt.ylim(0, pitch.ceiling) plt.ylabel("pitch [Hz]") def draw_formants(formant): # Extract selected pitch contour, and # replace unvoiced samples by NaN to not plot formant_values = formant.to_matrix().values pitch_values[pitch_values==0] = np.nan plt.plot(pitch.xs(), pitch_values, linewidth=3, color='w') plt.plot(pitch.xs(), pitch_values, linewidth=1) plt.grid(False) plt.ylim(0, pitch.ceiling) plt.ylabel("Formants [val]") def plot_sample_raw(sample_file='outputs/audio/sunflowers-Victoria-180-normal-870.aiff'): # %matplotlib inline # sample_file='outputs/audio/sunflowers-Victoria-180-normal-870.aiff snd_d = pm_snd(sample_file) plt.figure() plt.plot(snd_d.xs(), snd_d.values) plt.xlim([snd_d.xmin, snd_d.xmax]) plt.xlabel("time [s]") plt.ylabel("amplitude") plt.show() def plot_file_intensity(sample_file='outputs/audio/sunflowers-Victoria-180-normal-870.aiff'): snd_d = pm_snd(sample_file) plot_sample_intensity(snd_d) def plot_sample_intensity(snd_d): intensity = snd_d.to_intensity() spectrogram = snd_d.to_spectrogram() plt.figure() draw_spectrogram(spectrogram) plt.twinx() draw_intensity(intensity) plt.xlim([snd_d.xmin, snd_d.xmax]) plt.show() def plot_file_pitch(sample_file='outputs/audio/sunflowers-Victoria-180-normal-870.aiff'): snd_d = pm_snd(sample_file) plot_sample_pitch(snd_d) def plot_sample_pitch(snd_d,phons = []): pitch = snd_d.to_pitch() spectrogram = snd_d.to_spectrogram(window_length=0.03, maximum_frequency=8000) plt.figure() draw_spectrogram(spectrogram) plt.twinx() draw_pitch(pitch) for (p,c) in phons: plt.axvline(x=p) plt.text(p,-1,c) plt.xlim([snd_d.xmin, snd_d.xmax]) plt.show() def play_sound(samplerate=22050): #snd_sample = pm_snd('outputs/test/a_warm_smile_and_a_good_heart-1917.aiff') p_oup = pa.PyAudio() stream = p_oup.open( format=pa.paFloat32, channels=2, rate=samplerate, output=True) def sample_player(snd_sample=None): samples = snd_sample.as_array()[:,0] one_channel = np.asarray([samples, samples]).T.reshape(-1) audio_data = one_channel.astype(np.float32).tobytes() stream.write(audio_data) def close_player(): stream.close() p_oup.terminate() return sample_player,close_player # snd_part = snd_d.extract_part(from_time=0.9, preserve_times=True) # plt.figure() # plt.plot(snd_part.xs(), snd_part.values, linewidth=0.5) # plt.xlim([snd_part.xmin, snd_part.xmax]) # plt.xlabel("time [s]") # plt.ylabel("amplitude") # plt.show() if __name__ == '__main__': plot_file_pitch('outputs/audio/sunflowers-Victoria-180-normal-870.aiff') plot_file_pitch('outputs/test/a_warm_smile_and_a_good_heart-1917.aiff') play_sound(pm_snd('outputs/test/a_warm_smile_and_a_good_heart-1917.aiff')) plot_file_pitch('outputs/test/a_wrong_turn-3763.aiff') play_sound(pm_snd('outputs/test/a_wrong_turn-3763.aiff')) plot_file_pitch('inputs/self/a_wrong_turn-low1.aiff') play_sound(pm_snd('inputs/self/a_wrong_turn-low1.aiff')) plot_file_pitch('inputs/self/a_wrong_turn-low2.aiff') play_sound(pm_snd('inputs/self/a_wrong_turn-low2.aiff')) plot_file_pitch('inputs/self/apple-low1.aiff') plot_file_pitch('inputs/self/apple-low2.aiff') plot_file_pitch('inputs/self/apple-medium1.aiff')