diff --git a/speech_pitch.py b/speech_pitch.py index ef76ee9..12cd6af 100644 --- a/speech_pitch.py +++ b/speech_pitch.py @@ -1,5 +1,9 @@ import parselmouth as pm from pysndfile import sndio as snd +import numpy as np +import matplotlib.pyplot as plt +import seaborn as sns +sns.set() # Use seaborn's default style to make graphs more pretty def pitch_array(sample_file='outputs/audio/sunflowers-Victoria-180-normal-870.aiff'): samples, samplerate, _ = snd.read(sample_file) @@ -23,24 +27,110 @@ def compute_mfcc(sample_file='outputs/audio/sunflowers-Victoria-180-normal-870.a # sample_mfcc.to_array().shape return sample_mfcc.to_array() -# sunflowers_vic_180_norm = pitch_array('outputs/audio/sunflowers-Victoria-180-normal-870.aiff') -# sunflowers_fred_180_norm = pitch_array('outputs/audio/sunflowers-Fred-180-normal-6515.aiff') -# sunflowers_vic_180_norm_mfcc = compute_mfcc('outputs/audio/sunflowers-Victoria-180-normal-870.aiff') -fred_180_norm_mfcc = compute_mfcc('outputs/audio/sunflowers-Fred-180-normal-6515.aiff') -alex_mfcc = compute_mfcc('outputs/audio/sunflowers-Alex-180-normal-4763.aiff') -# # sunflowers_vic_180_norm.shape -# # sunflowers_fred_180_norm.shape -# alex_mfcc.shape -# sunflowers_vic_180_norm_mfcc.shape -# sunflowers_fred_180_norm_mfcc.shape -from speech_spectrum import generate_aiff_spectrogram -vic_spec = generate_aiff_spectrogram('outputs/audio/sunflowers-Victoria-180-normal-870.aiff') -alex_spec = generate_aiff_spectrogram('outputs/audio/sunflowers-Alex-180-normal-4763.aiff') -alex150spec = generate_aiff_spectrogram('outputs/audio/sunflowers-Alex-150-normal-589.aiff') -vic_spec.shape -alex_spec.shape -alex150spec.shape -alex_mfcc.shape -fred_180_norm_mfcc.shape -# pm.SoundFileFormat -# pm.Pitch.get_number_of_frames() +def compute_formants(sample_file='outputs/audio/sunflowers-Victoria-180-normal-870.aiff'): + sample_file='outputs/audio/sunflowers-Victoria-180-normal-870.aiff' + samples, samplerate, _ = snd.read(sample_file) + sample_sound = pm.Sound(values=samples,sampling_frequency=samplerate) + sample_formant = sample_sound.to_formant_burg() + sample_formant.x_bins() + # sample_mfcc.to_array().shape + return sample_mfcc.to_array() + +def draw_spectrogram(spectrogram, dynamic_range=70): + X, Y = spectrogram.x_grid(), spectrogram.y_grid() + sg_db = 10 * np.log10(spectrogram.values.T) + plt.pcolormesh(X, Y, sg_db, vmin=sg_db.max() - dynamic_range, cmap='afmhot') + plt.ylim([spectrogram.ymin, spectrogram.ymax]) + plt.xlabel("time [s]") + plt.ylabel("frequency [Hz]") + +def draw_intensity(intensity): + plt.plot(intensity.xs(), intensity.values, linewidth=3, color='w') + plt.plot(intensity.xs(), intensity.values, linewidth=1) + plt.grid(False) + plt.ylim(0) + plt.ylabel("intensity [dB]") + +def draw_pitch(pitch): + # Extract selected pitch contour, and + # replace unvoiced samples by NaN to not plot + pitch_values = pitch.to_matrix().values + pitch_values[pitch_values==0] = np.nan + plt.plot(pitch.xs(), pitch_values, linewidth=3, color='w') + plt.plot(pitch.xs(), pitch_values, linewidth=1) + plt.grid(False) + plt.ylim(0, pitch.ceiling) + plt.ylabel("pitch [Hz]") + +def pm_snd(sample_file): + # sample_file = 'inputs/self-apple/apple-low1.aiff' + samples, samplerate, _ = snd.read(sample_file) + return pm.Sound(values=samples,sampling_frequency=samplerate) +def plot_sample_raw(sample_file='outputs/audio/sunflowers-Victoria-180-normal-870.aiff'): + # %matplotlib inline + # sample_file='outputs/audio/sunflowers-Victoria-180-normal-870.aiff + snd_d = pm_snd(sample_file) + plt.figure() + plt.plot(snd_d.xs(), snd_d.values) + plt.xlim([snd_d.xmin, snd_d.xmax]) + plt.xlabel("time [s]") + plt.ylabel("amplitude") + plt.show() + +def plot_sample_intensity(sample_file='outputs/audio/sunflowers-Victoria-180-normal-870.aiff'): + snd_d = pm_snd(sample_file) + intensity = snd_d.to_intensity() + spectrogram = snd_d.to_spectrogram() + plt.figure() + draw_spectrogram(spectrogram) + plt.twinx() + draw_intensity(intensity) + plt.xlim([snd_d.xmin, snd_d.xmax]) + plt.show() + +def plot_sample_pitch(sample_file='outputs/audio/sunflowers-Victoria-180-normal-870.aiff'): + snd_d = pm_snd(sample_file) + pitch = snd_d.to_pitch() + spectrogram = snd_d.to_spectrogram(window_length=0.03, maximum_frequency=8000) + plt.figure() + draw_spectrogram(spectrogram) + plt.twinx() + draw_pitch(pitch) + plt.xlim([snd_d.xmin, snd_d.xmax]) + plt.show() + + # snd_part = snd_d.extract_part(from_time=0.9, preserve_times=True) + # plt.figure() + # plt.plot(snd_part.xs(), snd_part.values, linewidth=0.5) + # plt.xlim([snd_part.xmin, snd_part.xmax]) + # plt.xlabel("time [s]") + # plt.ylabel("amplitude") + # plt.show() + + +if __name__ == '__main__': + # sunflowers_vic_180_norm = pitch_array('outputs/audio/sunflowers-Victoria-180-normal-870.aiff') + # sunflowers_fred_180_norm = pitch_array('outputs/audio/sunflowers-Fred-180-normal-6515.aiff') + # sunflowers_vic_180_norm_mfcc = compute_mfcc('outputs/audio/sunflowers-Victoria-180-normal-870.aiff') + # fred_180_norm_mfcc = compute_mfcc('outputs/audio/sunflowers-Fred-180-normal-6515.aiff') + # alex_mfcc = compute_mfcc('outputs/audio/sunflowers-Alex-180-normal-4763.aiff') + # # # sunflowers_vic_180_norm.shape + # # # sunflowers_fred_180_norm.shape + # # alex_mfcc.shape + # # sunflowers_vic_180_norm_mfcc.shape + # # sunflowers_fred_180_norm_mfcc.shape + # from speech_spectrum import generate_aiff_spectrogram + # vic_spec = generate_aiff_spectrogram('outputs/audio/sunflowers-Victoria-180-normal-870.aiff') + # alex_spec = generate_aiff_spectrogram('outputs/audio/sunflowers-Alex-180-normal-4763.aiff') + # alex150spec = generate_aiff_spectrogram('outputs/audio/sunflowers-Alex-150-normal-589.aiff') + # vic_spec.shape + # alex_spec.shape + # alex150spec.shape + # alex_mfcc.shape + # fred_180_norm_mfcc.shape + plot_sample_pitch('outputs/audio/sunflowers-Victoria-180-normal-870.aiff') + plot_sample_pitch('inputs/self-apple/apple-low1.aiff') + plot_sample_pitch('inputs/self-apple/apple-low2.aiff') + plot_sample_pitch('inputs/self-apple/apple-medium1.aiff') + # pm.SoundFileFormat + # pm.Pitch.get_number_of_frames()