1. include additional ui dependencies

2. set sample width to 1 for wav2vec2 training data export from jasper 3. add 'audio_seg' key to asr_manifest_reader 4. add alpha rules 5. bugfixes and tests
2026-03-07 20:02:34 +00:00 · 2021-08-16 18:02:26 +05:30
parent 076b0d11e3
commit db51553320
6 changed files with 99 additions and 4 deletions
--- a/setup.py
+++ b/setup.py
@@ -82,6 +82,8 @@ extra_requirements = {
        "google-cloud-speech~=1.3.1",
    ],
    "ui": [
        "pyspellchecker~=0.6.2",
        "google-cloud-texttospeech~=1.0.1",
        "rangehttpserver~=1.2.0",
    ],
    "crypto": ["cryptography~=3.4.7"],
--- a/src/plume/models/wav2vec2/data.py
+++ b/src/plume/models/wav2vec2/data.py
@@ -42,6 +42,7 @@ def export_jasper(src_dataset_path: Path, dest_dataset_path: Path, unlink: bool
                pydub.AudioSegment.from_wav(wav_path)
                .set_frame_rate(16000)
                .set_channels(1)
                .set_sample_width(1)
            )
            dest_path = dest_dataset_path / Path("wavs") / Path(wav_path.name)
            audio_seg.export(dest_path, format="wav")
--- a/src/plume/models/wav2vec2/serve.py
+++ b/src/plume/models/wav2vec2/serve.py
@@ -24,7 +24,7 @@ def rpyc(
 ):
    for p in [w2v_path, ctc_path, target_dict_path]:
        if not p.exists():
-            logging.info(f"{p} doesn't exists")
+            typer.echo(f"{p} doesn't exists")
            return
    w2vasr = Wav2Vec2ASR(str(ctc_path), str(w2v_path), str(target_dict_path))
    service = ASRService(w2vasr)
--- a/src/plume/utils/manifest.py
+++ b/src/plume/utils/manifest.py
@@ -2,6 +2,7 @@ from pathlib import Path
 # from tqdm import tqdm
 import json
 from .audio import audio_wav_bytes_to_seg
 # from .extended_path import ExtendedPath
 # from .parallel import parallel_apply
@@ -21,6 +22,10 @@ def asr_manifest_reader(data_manifest_path: Path):
    for p in data_data:
        p["audio_path"] = data_manifest_path.parent / Path(p["audio_filepath"])
        p["text"] = p["text"].strip()
        # import pdb; pdb.set_trace()
        p["audio_seg"] = audio_wav_bytes_to_seg(
                (data_manifest_path.parent / p["audio_filepath"]).read_bytes()
        )
        yield p
--- a/src/plume/utils/regentity.py
+++ b/src/plume/utils/regentity.py
@@ -147,6 +147,43 @@ def do_tri_verbose_list():
    ] + ["hundred"]
 def default_alpha_rules(oh_is_zero, i_oh_limit):
    o_i_vars = r"(\[?(?:A|Oh|O|I)\]?)"
    i_oh_limit_rules = [
        (r"\b([b-hj-np-z])\b", "\\1"),
        # (
        #     r"\b((?:"
        #     + al_num_regex
        #     + r"|^)\b\s*)(I|O)(\s*\b)(?="
        #     + al_num_regex
        #     + r"\s+|$)\b",
        #     "\\1[\\2]\\3",
        # ),
        # (
        #     r"\b" + o_i_vars + r"(\s+)" + o_i_vars + r"\b",
        #     "[\\1]\\2[\\3]",
        # ),
        (
            r"(\s+|^)" + o_i_vars + r"(\s+)\[?" + o_i_vars + r"\]?(\s+|$)",
            "\\1[\\2]\\3[\\4]\\5",
        ),
        (
            r"(\s+|^)\[?" + o_i_vars + r"\]?(\s+)" + o_i_vars + r"(\s+|$)",
            "\\1[\\2]\\3[\\4]\\5",
        ),
    ]
    entity_rules = (
        +[(r"\boh\b", "o")]
        + [
            (r"\bdouble(?: |-)(\w+|\d+)\b", "\\1 \\1"),
            (r"\btriple(?: |-)(\w+|\d+)\b", "\\1 \\1 \\1"),
            # (r"\b([a-zA-Z])\b", "\\1"),
        ]
        + (i_oh_limit_rules if i_oh_limit else [(r"\b([a-zA-Z])\b", "\\1")])
    )
    return entity_rules
 def default_alnum_rules(num_range, oh_is_zero, i_oh_limit):
    oh_is_zero_rules = [
        (r"\boh\b", "0"),
@@ -285,6 +322,34 @@ def alnum_keeper(num_range=100, oh_is_zero=False):
    return keeper
 def alpha_keeper(oh_is_zero=False):
    entity_rules = default_alpha_rules(oh_is_zero, i_oh_limit=True)
    # def strip_space(match_obj):
    #     # char_elem = match_obj.group(1)
    #     return match_obj.group(1).strip() + match_obj.group(2).strip()
    pre_rules = [
        (r"[ ;,.]", " "),
        (r"[']", ""),
        # (
        #     r"((?:(?<=\w{2,2})|^)\s*)(?:\bI\b|\bi\b|\bOh\b|\boh\b)(\s*(?:\w{2,}|$))",
        #     strip_space,
        # ),
    ]
    post_rules = [
        # (
        #     r"((?:(?<=\w{2,2})|^)\s*)(?:\bI\b|\bi\b|\bOh\b|\boh\b)(\s*(?:\w{2,}|$))",
        #     strip_space,
        # )
    ]
    replacer, keeper = entity_replacer_keeper(
        pre_rules=pre_rules, entity_rules=entity_rules, post_rules=post_rules
    )
    return keeper
 def num_keeper_orig(num_range=10, extra_rules=[]):
    num_int_map_ty = [
        (
@@ -377,9 +442,7 @@ def vocab_corrector_gen(vocab, distance=1, method="spell"):
            # return " ".join(
            #     [spell.correction(tok) for tok in spell.split_words(inp)]
            # )
-            return " ".join(
+            return " ".join([spell.correction(tok) for tok in inp.split()])
                [spell.correction(tok) for tok in inp.split()]
            )
    elif method == "edit":
        # editdistance.eval("banana", "bahama")
--- a/tests/plume/test_utils.py
+++ b/tests/plume/test_utils.py
@@ -87,6 +87,30 @@ def test_alnum_keeper():
    )
 def test_alpha_keeper():
    keeper = alnum_keeper()
    assert keeper("I One hundred n fifty-eight not 5 oh o fifty A B more") == (
        "I One hundred n fifty-eight 5 oh o fifty A B",
        11,
    )
    assert keeper(
        "I'll phone number One hundred n fifty-eight not 5 oh o fifty A B more"
    ) == ("One hundred n fifty-eight 5 oh o fifty A B", 10)
    assert keeper(
        "I'm One hundred n fifty-eight not 5 oh o fifty A B more"
    ) == (
        "One hundred n fifty-eight 5 oh o fifty A B",
        10,
    )
    assert keeper(
        "I am One hundred n fifty-eight not 5 oh o fifty A B more"
    ) == (
        "One hundred n fifty-eight 5 oh o fifty A B",
        10,
    )
@pytest.fixture
 def random():
    rand.seed(0)