don't load audio for annotation only ui and keep spoken as text for normal asr validation

Malar Kannan 2020-05-27 15:57:42 +05:30
parent 41af0a87de
commit d87369c8fe
1 changed files with 13 additions and 8 deletions

View File

@ -16,7 +16,9 @@ from ..utils import (
app = typer.Typer() app = typer.Typer()
def preprocess_datapoint(idx, rel_root, sample, use_domain_asr, annotation_only, enable_plots): def preprocess_datapoint(
idx, rel_root, sample, use_domain_asr, annotation_only, enable_plots
):
import matplotlib.pyplot as plt import matplotlib.pyplot as plt
import librosa import librosa
import librosa.display import librosa.display
@ -28,17 +30,20 @@ def preprocess_datapoint(idx, rel_root, sample, use_domain_asr, annotation_only,
res["real_idx"] = idx res["real_idx"] = idx
audio_path = rel_root / Path(sample["audio_filepath"]) audio_path = rel_root / Path(sample["audio_filepath"])
res["audio_path"] = str(audio_path) res["audio_path"] = str(audio_path)
if use_domain_asr:
res["spoken"] = alnum_to_asr_tokens(res["text"]) res["spoken"] = alnum_to_asr_tokens(res["text"])
else:
res["spoken"] = res["text"]
res["utterance_id"] = audio_path.stem res["utterance_id"] = audio_path.stem
if not annotation_only:
from jasper.client import transcriber_pretrained, transcriber_speller
aud_seg = ( aud_seg = (
AudioSegment.from_file_using_temporary_files(audio_path) AudioSegment.from_file_using_temporary_files(audio_path)
.set_channels(1) .set_channels(1)
.set_sample_width(2) .set_sample_width(2)
.set_frame_rate(24000) .set_frame_rate(24000)
) )
if not annotation_only:
from jasper.client import transcriber_pretrained, transcriber_speller
res["pretrained_asr"] = transcriber_pretrained(aud_seg.raw_data) res["pretrained_asr"] = transcriber_pretrained(aud_seg.raw_data)
res["pretrained_wer"] = word_error_rate( res["pretrained_wer"] = word_error_rate(
[res["text"]], [res["pretrained_asr"]] [res["text"]], [res["pretrained_asr"]]