speech-scoring/speech_pitch.py

124 lines
4.5 KiB
Python

import parselmouth as pm
from pysndfile import sndio as snd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
sns.set() # Use seaborn's default style to make graphs more pretty
def pm_snd(sample_file):
# sample_file = 'inputs/self-apple/apple-low1.aiff'
samples, samplerate, _ = snd.read(sample_file)
return pm.Sound(values=samples,sampling_frequency=samplerate)
def pitch_array(sample_file='outputs/audio/sunflowers-Victoria-180-normal-870.aiff'):
sample_sound = pm_snd(sample_file)
sample_pitch = sample_sound.to_pitch()
return sample_pitch.to_matrix().as_array()
def intensity_array(sample_file='outputs/audio/sunflowers-Victoria-180-normal-870.aiff'):
sample_sound = pm_snd(sample_file)
sample_intensity = sample_sound.to_mfcc()
sample_intensity.as_array().shape
return sample_pitch.to_matrix().as_array()
def compute_mfcc(sample_file='outputs/audio/sunflowers-Victoria-180-normal-870.aiff'):
sample_sound = pm_snd(sample_file)
sample_mfcc = sample_sound.to_mfcc()
# sample_mfcc.to_array().shape
return sample_mfcc.to_array()
def compute_formants(sample_file='outputs/audio/sunflowers-Victoria-180-normal-870.aiff'):
sample_file='outputs/audio/sunflowers-Victoria-180-normal-870.aiff'
sample_sound = pm_snd(sample_file)
sample_formant = sample_sound.to_formant_burg()
# sample_formant.x_bins()
return sample_formant.x_bins()
def draw_spectrogram(spectrogram, dynamic_range=70):
X, Y = spectrogram.x_grid(), spectrogram.y_grid()
sg_db = 10 * np.log10(spectrogram.values.T)
plt.pcolormesh(X, Y, sg_db, vmin=sg_db.max() - dynamic_range, cmap='afmhot')
plt.ylim([spectrogram.ymin, spectrogram.ymax])
plt.xlabel("time [s]")
plt.ylabel("frequency [Hz]")
def draw_intensity(intensity):
plt.plot(intensity.xs(), intensity.values, linewidth=3, color='w')
plt.plot(intensity.xs(), intensity.values, linewidth=1)
plt.grid(False)
plt.ylim(0)
plt.ylabel("intensity [dB]")
def draw_pitch(pitch):
# Extract selected pitch contour, and
# replace unvoiced samples by NaN to not plot
pitch_values = pitch.to_matrix().values
pitch_values[pitch_values==0] = np.nan
plt.plot(pitch.xs(), pitch_values, linewidth=3, color='w')
plt.plot(pitch.xs(), pitch_values, linewidth=1)
plt.grid(False)
plt.ylim(0, pitch.ceiling)
plt.ylabel("pitch [Hz]")
def draw_formants(formant):
# Extract selected pitch contour, and
# replace unvoiced samples by NaN to not plot
formant_values = formant.to_matrix().values
pitch_values[pitch_values==0] = np.nan
plt.plot(pitch.xs(), pitch_values, linewidth=3, color='w')
plt.plot(pitch.xs(), pitch_values, linewidth=1)
plt.grid(False)
plt.ylim(0, pitch.ceiling)
plt.ylabel("Formants [val]")
def plot_sample_raw(sample_file='outputs/audio/sunflowers-Victoria-180-normal-870.aiff'):
# %matplotlib inline
# sample_file='outputs/audio/sunflowers-Victoria-180-normal-870.aiff
snd_d = pm_snd(sample_file)
plt.figure()
plt.plot(snd_d.xs(), snd_d.values)
plt.xlim([snd_d.xmin, snd_d.xmax])
plt.xlabel("time [s]")
plt.ylabel("amplitude")
plt.show()
def plot_sample_intensity(sample_file='outputs/audio/sunflowers-Victoria-180-normal-870.aiff'):
snd_d = pm_snd(sample_file)
intensity = snd_d.to_intensity()
spectrogram = snd_d.to_spectrogram()
plt.figure()
draw_spectrogram(spectrogram)
plt.twinx()
draw_intensity(intensity)
plt.xlim([snd_d.xmin, snd_d.xmax])
plt.show()
def plot_sample_pitch(sample_file='outputs/audio/sunflowers-Victoria-180-normal-870.aiff'):
snd_d = pm_snd(sample_file)
pitch = snd_d.to_pitch()
spectrogram = snd_d.to_spectrogram(window_length=0.03, maximum_frequency=8000)
plt.figure()
draw_spectrogram(spectrogram)
plt.twinx()
draw_pitch(pitch)
plt.xlim([snd_d.xmin, snd_d.xmax])
plt.show()
# snd_part = snd_d.extract_part(from_time=0.9, preserve_times=True)
# plt.figure()
# plt.plot(snd_part.xs(), snd_part.values, linewidth=0.5)
# plt.xlim([snd_part.xmin, snd_part.xmax])
# plt.xlabel("time [s]")
# plt.ylabel("amplitude")
# plt.show()
if __name__ == '__main__':
mom_snd = pm_snd('outputs/test/moms_are_engineers-7608.aiff')
plot_sample_pitch('outputs/audio/sunflowers-Victoria-180-normal-870.aiff')
plot_sample_pitch('inputs/self-apple/apple-low1.aiff')
plot_sample_pitch('inputs/self-apple/apple-low2.aiff')
plot_sample_pitch('inputs/self-apple/apple-medium1.aiff')