diff --git a/setup.py b/setup.py index bc024e6..8aa3101 100644 --- a/setup.py +++ b/setup.py @@ -82,6 +82,8 @@ extra_requirements = { "google-cloud-speech~=1.3.1", ], "ui": [ + "pyspellchecker~=0.6.2", + "google-cloud-texttospeech~=1.0.1", "rangehttpserver~=1.2.0", ], "crypto": ["cryptography~=3.4.7"], diff --git a/src/plume/models/wav2vec2/data.py b/src/plume/models/wav2vec2/data.py index c67da1f..c22e35f 100644 --- a/src/plume/models/wav2vec2/data.py +++ b/src/plume/models/wav2vec2/data.py @@ -42,6 +42,7 @@ def export_jasper(src_dataset_path: Path, dest_dataset_path: Path, unlink: bool pydub.AudioSegment.from_wav(wav_path) .set_frame_rate(16000) .set_channels(1) + .set_sample_width(1) ) dest_path = dest_dataset_path / Path("wavs") / Path(wav_path.name) audio_seg.export(dest_path, format="wav") diff --git a/src/plume/models/wav2vec2/serve.py b/src/plume/models/wav2vec2/serve.py index b549904..833b685 100644 --- a/src/plume/models/wav2vec2/serve.py +++ b/src/plume/models/wav2vec2/serve.py @@ -24,7 +24,7 @@ def rpyc( ): for p in [w2v_path, ctc_path, target_dict_path]: if not p.exists(): - logging.info(f"{p} doesn't exists") + typer.echo(f"{p} doesn't exists") return w2vasr = Wav2Vec2ASR(str(ctc_path), str(w2v_path), str(target_dict_path)) service = ASRService(w2vasr) diff --git a/src/plume/utils/manifest.py b/src/plume/utils/manifest.py index c1c98dc..011a0e5 100644 --- a/src/plume/utils/manifest.py +++ b/src/plume/utils/manifest.py @@ -2,6 +2,7 @@ from pathlib import Path # from tqdm import tqdm import json +from .audio import audio_wav_bytes_to_seg # from .extended_path import ExtendedPath # from .parallel import parallel_apply @@ -21,6 +22,10 @@ def asr_manifest_reader(data_manifest_path: Path): for p in data_data: p["audio_path"] = data_manifest_path.parent / Path(p["audio_filepath"]) p["text"] = p["text"].strip() + # import pdb; pdb.set_trace() + p["audio_seg"] = audio_wav_bytes_to_seg( + (data_manifest_path.parent / p["audio_filepath"]).read_bytes() + ) yield p diff --git a/src/plume/utils/regentity.py b/src/plume/utils/regentity.py index 6191e15..daf487b 100644 --- a/src/plume/utils/regentity.py +++ b/src/plume/utils/regentity.py @@ -147,6 +147,43 @@ def do_tri_verbose_list(): ] + ["hundred"] +def default_alpha_rules(oh_is_zero, i_oh_limit): + o_i_vars = r"(\[?(?:A|Oh|O|I)\]?)" + i_oh_limit_rules = [ + (r"\b([b-hj-np-z])\b", "\\1"), + # ( + # r"\b((?:" + # + al_num_regex + # + r"|^)\b\s*)(I|O)(\s*\b)(?=" + # + al_num_regex + # + r"\s+|$)\b", + # "\\1[\\2]\\3", + # ), + # ( + # r"\b" + o_i_vars + r"(\s+)" + o_i_vars + r"\b", + # "[\\1]\\2[\\3]", + # ), + ( + r"(\s+|^)" + o_i_vars + r"(\s+)\[?" + o_i_vars + r"\]?(\s+|$)", + "\\1[\\2]\\3[\\4]\\5", + ), + ( + r"(\s+|^)\[?" + o_i_vars + r"\]?(\s+)" + o_i_vars + r"(\s+|$)", + "\\1[\\2]\\3[\\4]\\5", + ), + ] + entity_rules = ( + +[(r"\boh\b", "o")] + + [ + (r"\bdouble(?: |-)(\w+|\d+)\b", "\\1 \\1"), + (r"\btriple(?: |-)(\w+|\d+)\b", "\\1 \\1 \\1"), + # (r"\b([a-zA-Z])\b", "\\1"), + ] + + (i_oh_limit_rules if i_oh_limit else [(r"\b([a-zA-Z])\b", "\\1")]) + ) + return entity_rules + + def default_alnum_rules(num_range, oh_is_zero, i_oh_limit): oh_is_zero_rules = [ (r"\boh\b", "0"), @@ -285,6 +322,34 @@ def alnum_keeper(num_range=100, oh_is_zero=False): return keeper +def alpha_keeper(oh_is_zero=False): + entity_rules = default_alpha_rules(oh_is_zero, i_oh_limit=True) + + # def strip_space(match_obj): + # # char_elem = match_obj.group(1) + # return match_obj.group(1).strip() + match_obj.group(2).strip() + + pre_rules = [ + (r"[ ;,.]", " "), + (r"[']", ""), + # ( + # r"((?:(?<=\w{2,2})|^)\s*)(?:\bI\b|\bi\b|\bOh\b|\boh\b)(\s*(?:\w{2,}|$))", + # strip_space, + # ), + ] + + post_rules = [ + # ( + # r"((?:(?<=\w{2,2})|^)\s*)(?:\bI\b|\bi\b|\bOh\b|\boh\b)(\s*(?:\w{2,}|$))", + # strip_space, + # ) + ] + replacer, keeper = entity_replacer_keeper( + pre_rules=pre_rules, entity_rules=entity_rules, post_rules=post_rules + ) + return keeper + + def num_keeper_orig(num_range=10, extra_rules=[]): num_int_map_ty = [ ( @@ -377,9 +442,7 @@ def vocab_corrector_gen(vocab, distance=1, method="spell"): # return " ".join( # [spell.correction(tok) for tok in spell.split_words(inp)] # ) - return " ".join( - [spell.correction(tok) for tok in inp.split()] - ) + return " ".join([spell.correction(tok) for tok in inp.split()]) elif method == "edit": # editdistance.eval("banana", "bahama") diff --git a/tests/plume/test_utils.py b/tests/plume/test_utils.py index 0af41ee..186b7ed 100644 --- a/tests/plume/test_utils.py +++ b/tests/plume/test_utils.py @@ -87,6 +87,30 @@ def test_alnum_keeper(): ) +def test_alpha_keeper(): + keeper = alnum_keeper() + assert keeper("I One hundred n fifty-eight not 5 oh o fifty A B more") == ( + "I One hundred n fifty-eight 5 oh o fifty A B", + 11, + ) + assert keeper( + "I'll phone number One hundred n fifty-eight not 5 oh o fifty A B more" + ) == ("One hundred n fifty-eight 5 oh o fifty A B", 10) + assert keeper( + "I'm One hundred n fifty-eight not 5 oh o fifty A B more" + ) == ( + "One hundred n fifty-eight 5 oh o fifty A B", + 10, + ) + + assert keeper( + "I am One hundred n fifty-eight not 5 oh o fifty A B more" + ) == ( + "One hundred n fifty-eight 5 oh o fifty A B", + 10, + ) + + @pytest.fixture def random(): rand.seed(0)