speech-scoring/speech_pitch.py

import parselmouth as pm
from pysndfile import sndio as snd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import pyaudio as pa
sns.set() # Use seaborn's default style to make graphs more pretty


def pm_snd(sample_file):
    # sample_file = 'inputs/self-apple/apple-low1.aiff'
    samples, samplerate, _  = snd.read(sample_file)
    return pm.Sound(values=samples,sampling_frequency=samplerate)

def pitch_array(sample_file='outputs/audio/sunflowers-Victoria-180-normal-870.aiff'):
    sample_sound = pm_snd(sample_file)
    sample_pitch = sample_sound.to_pitch()
    return sample_pitch.to_matrix().as_array()

def intensity_array(sample_file='outputs/audio/sunflowers-Victoria-180-normal-870.aiff'):
    sample_sound = pm_snd(sample_file)
    sample_intensity = sample_sound.to_mfcc()
    sample_intensity.as_array().shape
    return sample_pitch.to_matrix().as_array()

def compute_mfcc(sample_file='outputs/audio/sunflowers-Victoria-180-normal-870.aiff'):
    sample_sound = pm_snd(sample_file)
    sample_mfcc = sample_sound.to_mfcc()
    # sample_mfcc.to_array().shape
    return sample_mfcc.to_array()

def compute_formants(sample_file='outputs/audio/sunflowers-Victoria-180-normal-870.aiff'):
    # sample_file='outputs/audio/sunflowers-Victoria-180-normal-870.aiff'
    sample_sound = pm_snd(sample_file)
    sample_formant = sample_sound.to_formant_burg()
    # sample_formant.x_bins()
    return sample_formant.x_bins()

def draw_spectrogram(spectrogram, dynamic_range=70):
    X, Y = spectrogram.x_grid(), spectrogram.y_grid()
    sg_db = 10 * np.log10(spectrogram.values.T)
    plt.pcolormesh(X, Y, sg_db, vmin=sg_db.max() - dynamic_range, cmap='afmhot')
    plt.ylim([spectrogram.ymin, spectrogram.ymax])
    plt.xlabel("time [s]")
    plt.ylabel("frequency [Hz]")

def draw_intensity(intensity):
    plt.plot(intensity.xs(), intensity.values, linewidth=3, color='w')
    plt.plot(intensity.xs(), intensity.values, linewidth=1)
    plt.grid(False)
    plt.ylim(0)
    plt.ylabel("intensity [dB]")

def draw_pitch(pitch):
    # Extract selected pitch contour, and
    # replace unvoiced samples by NaN to not plot
    pitch_values = pitch.to_matrix().values
    pitch_values[pitch_values==0] = np.nan
    plt.plot(pitch.xs(), pitch_values, linewidth=3, color='w')
    plt.plot(pitch.xs(), pitch_values, linewidth=1)
    plt.grid(False)
    plt.ylim(0, pitch.ceiling)
    plt.ylabel("pitch [Hz]")

def draw_formants(formant):
    # Extract selected pitch contour, and
    # replace unvoiced samples by NaN to not plot
    formant_values = formant.to_matrix().values
    pitch_values[pitch_values==0] = np.nan
    plt.plot(pitch.xs(), pitch_values, linewidth=3, color='w')
    plt.plot(pitch.xs(), pitch_values, linewidth=1)
    plt.grid(False)
    plt.ylim(0, pitch.ceiling)
    plt.ylabel("Formants [val]")


def plot_sample_raw(sample_file='outputs/audio/sunflowers-Victoria-180-normal-870.aiff'):
    # %matplotlib inline
    # sample_file='outputs/audio/sunflowers-Victoria-180-normal-870.aiff
    snd_d = pm_snd(sample_file)
    plt.figure()
    plt.plot(snd_d.xs(), snd_d.values)
    plt.xlim([snd_d.xmin, snd_d.xmax])
    plt.xlabel("time [s]")
    plt.ylabel("amplitude")
    plt.show()

def plot_file_intensity(sample_file='outputs/audio/sunflowers-Victoria-180-normal-870.aiff'):
    snd_d = pm_snd(sample_file)
    plot_sample_intensity(snd_d)

def plot_sample_intensity(snd_d):
    intensity = snd_d.to_intensity()
    spectrogram = snd_d.to_spectrogram()
    plt.figure()
    draw_spectrogram(spectrogram)
    plt.twinx()
    draw_intensity(intensity)
    plt.xlim([snd_d.xmin, snd_d.xmax])
    plt.show()

def plot_file_pitch(sample_file='outputs/audio/sunflowers-Victoria-180-normal-870.aiff'):
    snd_d = pm_snd(sample_file)
    plot_sample_pitch(snd_d)

def plot_sample_pitch(snd_d,phons = []):
    pitch = snd_d.to_pitch()
    spectrogram = snd_d.to_spectrogram(window_length=0.03, maximum_frequency=8000)
    plt.figure()
    draw_spectrogram(spectrogram)
    plt.twinx()
    draw_pitch(pitch)
    for (p,c) in phons:
        plt.axvline(x=p)
        plt.text(p,-1,c)
    plt.xlim([snd_d.xmin, snd_d.xmax])
    plt.show()

def play_sound(samplerate=22050):
    #snd_sample = pm_snd('outputs/test/a_warm_smile_and_a_good_heart-1917.aiff')
    p_oup = pa.PyAudio()
    stream = p_oup.open(
        format=pa.paFloat32,
        channels=2,
        rate=samplerate,
        output=True)
    def sample_player(snd_sample=None):
        samples = snd_sample.as_array()[:,0]

        one_channel = np.asarray([samples, samples]).T.reshape(-1)
        audio_data = one_channel.astype(np.float32).tobytes()
        stream.write(audio_data)
    def close_player():
        stream.close()
        p_oup.terminate()
    return sample_player,close_player
    # snd_part = snd_d.extract_part(from_time=0.9, preserve_times=True)
    # plt.figure()
    # plt.plot(snd_part.xs(), snd_part.values, linewidth=0.5)
    # plt.xlim([snd_part.xmin, snd_part.xmax])
    # plt.xlabel("time [s]")
    # plt.ylabel("amplitude")
    # plt.show()


if __name__ == '__main__':
    plot_file_pitch('outputs/audio/sunflowers-Victoria-180-normal-870.aiff')
    plot_file_pitch('outputs/test/a_warm_smile_and_a_good_heart-1917.aiff')
    play_sound(pm_snd('outputs/test/a_warm_smile_and_a_good_heart-1917.aiff'))
    plot_file_pitch('outputs/test/a_wrong_turn-3763.aiff')
    play_sound(pm_snd('outputs/test/a_wrong_turn-3763.aiff'))
    plot_file_pitch('inputs/self/a_wrong_turn-low1.aiff')
    play_sound(pm_snd('inputs/self/a_wrong_turn-low1.aiff'))
    plot_file_pitch('inputs/self/a_wrong_turn-low2.aiff')
    play_sound(pm_snd('inputs/self/a_wrong_turn-low2.aiff'))
    plot_file_pitch('inputs/self/apple-low1.aiff')
    plot_file_pitch('inputs/self/apple-low2.aiff')
    plot_file_pitch('inputs/self/apple-medium1.aiff')
trying mfcc instead of spectrogram 2017-11-22 09:15:08 +00:00			`import parselmouth as pm`
			`from pysndfile import sndio as snd`
implemented pitch plotting 2017-11-24 09:02:13 +00:00			`import numpy as np`
			`import matplotlib.pyplot as plt`
			`import seaborn as sns`
implemented segmentation visualization 2017-11-30 09:19:55 +00:00			`import pyaudio as pa`
implemented pitch plotting 2017-11-24 09:02:13 +00:00			`sns.set() # Use seaborn's default style to make graphs more pretty`
trying mfcc instead of spectrogram 2017-11-22 09:15:08 +00:00
merged 2017-11-28 11:35:20 +00:00
			`def pm_snd(sample_file):`
			`# sample_file = 'inputs/self-apple/apple-low1.aiff'`
trying mfcc instead of spectrogram 2017-11-22 09:15:08 +00:00			`samples, samplerate, _ = snd.read(sample_file)`
merged 2017-11-28 11:35:20 +00:00			`return pm.Sound(values=samples,sampling_frequency=samplerate)`

			`def pitch_array(sample_file='outputs/audio/sunflowers-Victoria-180-normal-870.aiff'):`
			`sample_sound = pm_snd(sample_file)`
trying mfcc instead of spectrogram 2017-11-22 09:15:08 +00:00			`sample_pitch = sample_sound.to_pitch()`
			`return sample_pitch.to_matrix().as_array()`

			`def intensity_array(sample_file='outputs/audio/sunflowers-Victoria-180-normal-870.aiff'):`
merged 2017-11-28 11:35:20 +00:00			`sample_sound = pm_snd(sample_file)`
trying mfcc instead of spectrogram 2017-11-22 09:15:08 +00:00			`sample_intensity = sample_sound.to_mfcc()`
			`sample_intensity.as_array().shape`
			`return sample_pitch.to_matrix().as_array()`

			`def compute_mfcc(sample_file='outputs/audio/sunflowers-Victoria-180-normal-870.aiff'):`
merged 2017-11-28 11:35:20 +00:00			`sample_sound = pm_snd(sample_file)`
trying mfcc instead of spectrogram 2017-11-22 09:15:08 +00:00			`sample_mfcc = sample_sound.to_mfcc()`
			`# sample_mfcc.to_array().shape`
			`return sample_mfcc.to_array()`

implemented pitch plotting 2017-11-24 09:02:13 +00:00			`def compute_formants(sample_file='outputs/audio/sunflowers-Victoria-180-normal-870.aiff'):`
implemented phoneme segmented training on samples 2017-12-28 13:23:54 +00:00			`# sample_file='outputs/audio/sunflowers-Victoria-180-normal-870.aiff'`
merged 2017-11-28 11:35:20 +00:00			`sample_sound = pm_snd(sample_file)`
implemented pitch plotting 2017-11-24 09:02:13 +00:00			`sample_formant = sample_sound.to_formant_burg()`
merged 2017-11-28 11:35:20 +00:00			`# sample_formant.x_bins()`
			`return sample_formant.x_bins()`
implemented pitch plotting 2017-11-24 09:02:13 +00:00
			`def draw_spectrogram(spectrogram, dynamic_range=70):`
			`X, Y = spectrogram.x_grid(), spectrogram.y_grid()`
			`sg_db = 10 * np.log10(spectrogram.values.T)`
			`plt.pcolormesh(X, Y, sg_db, vmin=sg_db.max() - dynamic_range, cmap='afmhot')`
			`plt.ylim([spectrogram.ymin, spectrogram.ymax])`
			`plt.xlabel("time [s]")`
			`plt.ylabel("frequency [Hz]")`

			`def draw_intensity(intensity):`
			`plt.plot(intensity.xs(), intensity.values, linewidth=3, color='w')`
			`plt.plot(intensity.xs(), intensity.values, linewidth=1)`
			`plt.grid(False)`
			`plt.ylim(0)`
			`plt.ylabel("intensity [dB]")`

			`def draw_pitch(pitch):`
			`# Extract selected pitch contour, and`
			`# replace unvoiced samples by NaN to not plot`
			`pitch_values = pitch.to_matrix().values`
			`pitch_values[pitch_values==0] = np.nan`
			`plt.plot(pitch.xs(), pitch_values, linewidth=3, color='w')`
			`plt.plot(pitch.xs(), pitch_values, linewidth=1)`
			`plt.grid(False)`
			`plt.ylim(0, pitch.ceiling)`
			`plt.ylabel("pitch [Hz]")`

merged 2017-11-28 11:35:20 +00:00			`def draw_formants(formant):`
			`# Extract selected pitch contour, and`
			`# replace unvoiced samples by NaN to not plot`
			`formant_values = formant.to_matrix().values`
			`pitch_values[pitch_values==0] = np.nan`
			`plt.plot(pitch.xs(), pitch_values, linewidth=3, color='w')`
			`plt.plot(pitch.xs(), pitch_values, linewidth=1)`
			`plt.grid(False)`
			`plt.ylim(0, pitch.ceiling)`
			`plt.ylabel("Formants [val]")`
finding exact duration of sound sample 2017-11-28 07:22:00 +00:00

implemented pitch plotting 2017-11-24 09:02:13 +00:00			`def plot_sample_raw(sample_file='outputs/audio/sunflowers-Victoria-180-normal-870.aiff'):`
			`# %matplotlib inline`
			`# sample_file='outputs/audio/sunflowers-Victoria-180-normal-870.aiff`
			`snd_d = pm_snd(sample_file)`
			`plt.figure()`
			`plt.plot(snd_d.xs(), snd_d.values)`
			`plt.xlim([snd_d.xmin, snd_d.xmax])`
			`plt.xlabel("time [s]")`
			`plt.ylabel("amplitude")`
			`plt.show()`

implemented segmentation visualization 2017-11-30 09:19:55 +00:00			`def plot_file_intensity(sample_file='outputs/audio/sunflowers-Victoria-180-normal-870.aiff'):`
implemented pitch plotting 2017-11-24 09:02:13 +00:00			`snd_d = pm_snd(sample_file)`
implemented segmentation visualization 2017-11-30 09:19:55 +00:00			`plot_sample_intensity(snd_d)`

			`def plot_sample_intensity(snd_d):`
implemented pitch plotting 2017-11-24 09:02:13 +00:00			`intensity = snd_d.to_intensity()`
			`spectrogram = snd_d.to_spectrogram()`
			`plt.figure()`
			`draw_spectrogram(spectrogram)`
			`plt.twinx()`
			`draw_intensity(intensity)`
			`plt.xlim([snd_d.xmin, snd_d.xmax])`
			`plt.show()`

implemented segmentation visualization 2017-11-30 09:19:55 +00:00			`def plot_file_pitch(sample_file='outputs/audio/sunflowers-Victoria-180-normal-870.aiff'):`
implemented pitch plotting 2017-11-24 09:02:13 +00:00			`snd_d = pm_snd(sample_file)`
implemented segmentation visualization 2017-11-30 09:19:55 +00:00			`plot_sample_pitch(snd_d)`

			`def plot_sample_pitch(snd_d,phons = []):`
implemented pitch plotting 2017-11-24 09:02:13 +00:00			`pitch = snd_d.to_pitch()`
			`spectrogram = snd_d.to_spectrogram(window_length=0.03, maximum_frequency=8000)`
			`plt.figure()`
			`draw_spectrogram(spectrogram)`
			`plt.twinx()`
			`draw_pitch(pitch)`
implemented segmentation visualization 2017-11-30 09:19:55 +00:00			`for (p,c) in phons:`
			`plt.axvline(x=p)`
			`plt.text(p,-1,c)`
implemented pitch plotting 2017-11-24 09:02:13 +00:00			`plt.xlim([snd_d.xmin, snd_d.xmax])`
			`plt.show()`

implemented segmentation visualization 2017-11-30 09:19:55 +00:00			`def play_sound(samplerate=22050):`
			`#snd_sample = pm_snd('outputs/test/a_warm_smile_and_a_good_heart-1917.aiff')`
			`p_oup = pa.PyAudio()`
			`stream = p_oup.open(`
			`format=pa.paFloat32,`
			`channels=2,`
			`rate=samplerate,`
			`output=True)`
			`def sample_player(snd_sample=None):`
			`samples = snd_sample.as_array()[:,0]`

			`one_channel = np.asarray([samples, samples]).T.reshape(-1)`
			`audio_data = one_channel.astype(np.float32).tobytes()`
			`stream.write(audio_data)`
			`def close_player():`
			`stream.close()`
			`p_oup.terminate()`
			`return sample_player,close_player`
implemented pitch plotting 2017-11-24 09:02:13 +00:00			`# snd_part = snd_d.extract_part(from_time=0.9, preserve_times=True)`
			`# plt.figure()`
			`# plt.plot(snd_part.xs(), snd_part.values, linewidth=0.5)`
			`# plt.xlim([snd_part.xmin, snd_part.xmax])`
			`# plt.xlabel("time [s]")`
			`# plt.ylabel("amplitude")`
			`# plt.show()`


			`if __name__ == '__main__':`
implemented segmentation visualization 2017-11-30 09:19:55 +00:00			`plot_file_pitch('outputs/audio/sunflowers-Victoria-180-normal-870.aiff')`
			`plot_file_pitch('outputs/test/a_warm_smile_and_a_good_heart-1917.aiff')`
			`play_sound(pm_snd('outputs/test/a_warm_smile_and_a_good_heart-1917.aiff'))`
			`plot_file_pitch('outputs/test/a_wrong_turn-3763.aiff')`
			`play_sound(pm_snd('outputs/test/a_wrong_turn-3763.aiff'))`
			`plot_file_pitch('inputs/self/a_wrong_turn-low1.aiff')`
			`play_sound(pm_snd('inputs/self/a_wrong_turn-low1.aiff'))`
			`plot_file_pitch('inputs/self/a_wrong_turn-low2.aiff')`
			`play_sound(pm_snd('inputs/self/a_wrong_turn-low2.aiff'))`
			`plot_file_pitch('inputs/self/apple-low1.aiff')`
			`plot_file_pitch('inputs/self/apple-low2.aiff')`
			`plot_file_pitch('inputs/self/apple-medium1.aiff')`