1. added streamlit based validation ui with mongodb datastore integration

2. fix asr wrong sample rate inference 3. update requirements
2026-03-08 10:32:35 +00:00 · 2020-04-29 14:26:11 +05:30
parent 61048f855e
commit 41074a1bca
8 changed files with 318 additions and 1 deletions
--- a/jasper/asr.py
+++ b/jasper/asr.py
@@ -62,7 +62,7 @@ class JasperASR(object):
        wf = wave.open(audio_file_path, "w")
        wf.setnchannels(1)
        wf.setsampwidth(2)
-        wf.setframerate(16000)
+        wf.setframerate(24000)
        wf.writeframesraw(audio_data)
        wf.close()
        manifest = {"audio_filepath": audio_file_path, "duration": 60, "text": "todo"}
--- a/jasper/data_utils/call_recycler.py
+++ b/jasper/data_utils/call_recycler.py
@@ -46,6 +46,7 @@ def analyze(
    from tqdm import tqdm
    from .utils import asr_data_writer
    from pydub import AudioSegment
+    # from itertools import product, chain

    matplotlib.rcParams["agg.path.chunksize"] = 10000

--- a/jasper/data_utils/validation/jasper_client.py
+++ b/jasper/data_utils/validation/jasper_client.py
@@ -0,0 +1,23 @@
+import os
+import logging
+import rpyc
+
+logging.basicConfig(
+    level=logging.INFO, format="%(asctime)s - %(name)s - %(levelname)s - %(message)s"
+)
+logger = logging.getLogger(__name__)
+
+
+ASR_HOST = os.environ.get("JASPER_ASR_RPYC_HOST", "localhost")
+ASR_PORT = int(os.environ.get("JASPER_ASR_RPYC_PORT", "8045"))
+
+
+def transcribe_gen(asr_host=ASR_HOST, asr_port=ASR_PORT):
+    logger.info(f"connecting to asr server at {asr_host}:{asr_port}")
+    asr = rpyc.connect(asr_host, asr_port).root
+    logger.info(f"connected to asr server successfully")
+    return asr.transcribe
+
+
+transcriber_pretrained = transcribe_gen(asr_port=8044)
+transcriber_speller = transcribe_gen(asr_port=8045)
--- a/jasper/data_utils/validation/orig_ui.py
+++ b/jasper/data_utils/validation/orig_ui.py
@@ -0,0 +1,73 @@
+import json
+from pathlib import Path
+import streamlit as st
+
+# import matplotlib.pyplot as plt
+# import numpy as np
+import librosa
+import librosa.display
+from pydub import AudioSegment
+from jasper.client import transcriber_pretrained, transcriber_speller
+
+# from pymongo import MongoClient
+
+st.title("ASR Speller Validation")
+dataset_path: Path = Path("/dataset/asr_data/call_alphanum_v3")
+manifest_path = dataset_path / Path("test_manifest.json")
+# print(manifest_path)
+with manifest_path.open("r") as pf:
+    pnr_jsonl = pf.readlines()
+    pnr_data = [json.loads(i) for i in pnr_jsonl]
+
+
+def main():
+    # pnr_data = MongoClient("mongodb://localhost:27017/").test.asr_pnr
+    # sample_no = 0
+    sample_no = (
+        st.slider(
+            "Sample",
+            min_value=1,
+            max_value=len(pnr_data),
+            value=1,
+            step=1,
+            format=None,
+            key=None,
+        )
+        - 1
+    )
+    sample = pnr_data[sample_no]
+    st.write(f"Sample No: {sample_no+1} of {len(pnr_data)}")
+    audio_path = Path(sample["audio_filepath"])
+    # st.write(f"Audio Path:{audio_path}")
+    aud_seg = AudioSegment.from_wav(audio_path)  # .set_channels(1).set_sample_width(2).set_frame_rate(24000)
+    st.sidebar.text("Transcription")
+    st.sidebar.text(f"Pretrained:{transcriber_pretrained(aud_seg.raw_data)}")
+    st.sidebar.text(f"Speller:{transcriber_speller(aud_seg.raw_data)}")
+    st.sidebar.text(f"Expected: {audio_path.stem}")
+    spell_text = sample["text"]
+    st.sidebar.text(f"Spelled: {spell_text}")
+    st.audio(audio_path.open("rb"))
+    selected = st.radio("The Audio is", ("Correct", "Incorrect", "Inaudible"))
+    corrected = audio_path.stem
+    if selected == "Incorrect":
+        corrected = st.text_input("Actual:", value=corrected)
+    # content = ''
+    if sample_no > 0 and st.button("Previous"):
+        sample_no -= 1
+    if st.button("Next"):
+        st.write(sample_no, selected, corrected)
+        sample_no += 1
+
+    (y, sr) = librosa.load(audio_path)
+    librosa.display.waveplot(y=y, sr=sr)
+    # arr = np.random.normal(1, 1, size=100)
+    # plt.hist(arr, bins=20)
+    st.sidebar.pyplot()
+
+
+# def main():
+#     app()
+
+
+if __name__ == "__main__":
+    main()
--- a/jasper/data_utils/validation/st_rerun.py
+++ b/jasper/data_utils/validation/st_rerun.py
@@ -0,0 +1,38 @@
+import streamlit.ReportThread as ReportThread
+from streamlit.ScriptRequestQueue import RerunData
+from streamlit.ScriptRunner import RerunException
+from streamlit.server.Server import Server
+
+
+def rerun():
+    """Rerun a Streamlit app from the top!"""
+    widget_states = _get_widget_states()
+    raise RerunException(RerunData(widget_states))
+
+
+def _get_widget_states():
+    # Hack to get the session object from Streamlit.
+
+    ctx = ReportThread.get_report_ctx()
+
+    session = None
+
+    current_server = Server.get_current()
+    if hasattr(current_server, '_session_infos'):
+        # Streamlit < 0.56
+        session_infos = Server.get_current()._session_infos.values()
+    else:
+        session_infos = Server.get_current()._session_info_by_id.values()
+
+    for session_info in session_infos:
+        if session_info.session.enqueue == ctx.enqueue:
+            session = session_info.session
+
+    if session is None:
+        raise RuntimeError(
+            "Oh noes. Couldn't get your Streamlit Session object"
+            "Are you doing something fancy with threads?"
+        )
+    # Got the session object!
+
+    return session._widget_states
--- a/jasper/data_utils/validation/ui.py
+++ b/jasper/data_utils/validation/ui.py
@@ -0,0 +1,171 @@
+import json
+from io import BytesIO
+from pathlib import Path
+
+import streamlit as st
+from nemo.collections.asr.metrics import word_error_rate
+import librosa
+import librosa.display
+import matplotlib.pyplot as plt
+from tqdm import tqdm
+from pydub import AudioSegment
+import pymongo
+from .jasper_client import transcriber_pretrained, transcriber_speller
+from .st_rerun import rerun
+
+st.title("ASR Speller Validation")
+
+
+def clear_mongo_corrections():
+    col = pymongo.MongoClient("mongodb://localhost:27017/").test.asr_validation
+    col.delete_many({"type": "correction"})
+
+
+def preprocess_datapoint(idx, sample):
+    res = dict(sample)
+    res["real_idx"] = idx
+    audio_path = Path(sample["audio_filepath"])
+    res["audio_path"] = audio_path
+    res["gold_chars"] = audio_path.stem
+    res["gold_phone"] = sample["text"]
+    aud_seg = (
+        AudioSegment.from_wav(audio_path)
+        .set_channels(1)
+        .set_sample_width(2)
+        .set_frame_rate(24000)
+    )
+    res["pretrained_asr"] = transcriber_pretrained(aud_seg.raw_data)
+    res["speller_asr"] = transcriber_speller(aud_seg.raw_data)
+    res["wer"] = word_error_rate([res["gold_phone"]], [res["speller_asr"]])
+    (y, sr) = librosa.load(audio_path)
+    plt.tight_layout()
+    librosa.display.waveplot(y=y, sr=sr)
+    wav_plot_f = BytesIO()
+    plt.savefig(wav_plot_f, format="png", dpi=50)
+    plt.close()
+    wav_plot_f.seek(0)
+    res["plot_png"] = wav_plot_f
+    return res
+
+
+if not hasattr(st, "mongo_connected"):
+    st.mongoclient = pymongo.MongoClient(
+        "mongodb://localhost:27017/"
+    ).test.asr_validation
+    mongo_conn = st.mongoclient
+
+    def current_cursor_fn():
+        # mongo_conn = st.mongoclient
+        cursor_obj = mongo_conn.find_one({"type": "current_cursor"})
+        cursor_val = cursor_obj["cursor"]
+        return cursor_val
+
+    def update_cursor_fn(val=0):
+        mongo_conn.find_one_and_update(
+            {"type": "current_cursor"},
+            {"$set": {"type": "current_cursor", "cursor": val}},
+            upsert=True,
+        )
+        rerun()
+
+    def get_correction_entry_fn(code):
+        # mongo_conn = st.mongoclient
+        # cursor_obj = mongo_conn.find_one({"type": "correction", "code": code})
+        # cursor_val = cursor_obj["cursor"]
+        return mongo_conn.find_one(
+            {"type": "correction", "code": code}, projection={"_id": False}
+        )
+
+    def update_entry_fn(code, value):
+        mongo_conn.find_one_and_update(
+            {"type": "correction", "code": code},
+            {"$set": {"value": value}},
+            upsert=True,
+        )
+        rerun()
+
+    cursor_obj = mongo_conn.find_one({"type": "current_cursor"})
+    if not cursor_obj:
+        update_cursor_fn(0)
+    st.get_current_cursor = current_cursor_fn
+    st.update_cursor = update_cursor_fn
+    st.get_correction_entry = get_correction_entry_fn
+    st.update_entry = update_entry_fn
+    st.mongo_connected = True
+
+
+@st.cache(hash_funcs={"rpyc.core.netref.builtins.method": lambda _: None})
+def preprocess_dataset(dataset_path: Path = Path("/dataset/asr_data/call_alphanum_v3")):
+    print("misssed cache : preprocess_dataset")
+    dataset_path: Path = Path("/dataset/asr_data/call_alphanum_v3")
+    manifest_path = dataset_path / Path("test_manifest.json")
+    with manifest_path.open("r") as pf:
+        pnr_jsonl = pf.readlines()
+        pnr_data = [
+            preprocess_datapoint(i, json.loads(v))
+            for i, v in enumerate(tqdm(pnr_jsonl))
+        ]
+    result = sorted(pnr_data, key=lambda x: x["wer"], reverse=True)
+    return result
+
+
+def main():
+    pnr_data = preprocess_dataset()
+    sample_no = st.get_current_cursor()
+    sample = pnr_data[sample_no]
+    st.markdown(
+        f"{sample_no+1} of {len(pnr_data)} : **{sample['gold_chars']}** spelled *{sample['gold_phone']}*"
+    )
+    new_sample = st.number_input(
+        "Go To Sample:", value=sample_no + 1, min_value=1, max_value=len(pnr_data)
+    )
+    if new_sample != sample_no + 1:
+        st.update_cursor(new_sample - 1)
+    st.sidebar.title(f"Details: [{sample['real_idx']}]")
+    st.sidebar.markdown(f"Gold: **{sample['gold_chars']}**")
+    st.sidebar.markdown(f"Expected Speech: *{sample['gold_phone']}*")
+    st.sidebar.title("Results:")
+    st.sidebar.text(f"Pretrained:{sample['pretrained_asr']}")
+    st.sidebar.text(f"Speller:{sample['speller_asr']}")
+
+    st.sidebar.title(f"WER: {sample['wer']:.2f}%")
+    # (y, sr) = librosa.load(sample["audio_path"])
+    # librosa.display.waveplot(y=y, sr=sr)
+    # st.sidebar.pyplot(fig=sample["plot_fig"])
+    st.sidebar.image(sample["plot_png"])
+    st.audio(sample["audio_path"].open("rb"))
+    corrected = sample["gold_chars"]
+    correction_entry = st.get_correction_entry(sample["gold_chars"])
+    selected_idx = 0
+    options = ("Correct", "Incorrect", "Inaudible")
+    if correction_entry:
+        selected_idx = options.index(correction_entry["value"]["status"])
+        corrected = correction_entry["value"]["correction"]
+    selected = st.radio("The Audio is", options, index=selected_idx)
+    if selected == "Incorrect":
+        corrected = st.text_input("Actual:", value=corrected)
+    if selected == "Inaudible":
+        corrected = ""
+    if st.button("Submit"):
+        correct_code = corrected.replace(" ", "").upper()
+        st.update_entry(
+            sample["gold_chars"], {"status": selected, "correction": correct_code}
+        )
+    if correction_entry:
+        st.markdown(
+            f'Your Response: **{correction_entry["value"]["status"]}** Correction: **{correction_entry["value"]["correction"]}**'
+        )
+    # st.markdown(
+    #     ",".join(
+    #         [
+    #             "**" + str(p["real_idx"]) + "**"
+    #             if p["real_idx"] == sample["real_idx"]
+    #             else str(p["real_idx"])
+    #             for p in pnr_data
+    #         ]
+    #     )
+    # )
+
+
+if __name__ == "__main__":
+    main()
--- a/setup.py
+++ b/setup.py
@@ -25,6 +25,14 @@ extra_requirements = {
        "typer[all]==0.1.1",
        "lenses @ git+https://github.com/ingolemo/python-lenses.git@b2a2a9aa5b61540992d70b2cf36008d0121e8948#egg=lenses",
    ],
+    "validation": [
+        "rpyc~=4.1.4",
+        "tqdm~=4.39.0",
+        "librosa==0.7.2",
+        "pydub~=0.23.1",
+        "streamlit==0.58.0",
+        "stringcase==1.2.0"
+    ]
    # "train": [
    #     "torchaudio==0.5.0",
    #     "torch-stft==0.1.4",
--- a/streamlit.py
+++ b/streamlit.py
@@ -0,0 +1,3 @@
+import runpy
+
+runpy.run_module("jasper.data_utils.validation.ui", run_name="__main__", alter_sys=True)