1. include additional ui dependencies
2. set sample width to 1 for wav2vec2 training data export from jasper 3. add 'audio_seg' key to asr_manifest_reader 4. add alpha rules 5. bugfixes and teststegra
parent
076b0d11e3
commit
db51553320
2
setup.py
2
setup.py
|
|
@ -82,6 +82,8 @@ extra_requirements = {
|
||||||
"google-cloud-speech~=1.3.1",
|
"google-cloud-speech~=1.3.1",
|
||||||
],
|
],
|
||||||
"ui": [
|
"ui": [
|
||||||
|
"pyspellchecker~=0.6.2",
|
||||||
|
"google-cloud-texttospeech~=1.0.1",
|
||||||
"rangehttpserver~=1.2.0",
|
"rangehttpserver~=1.2.0",
|
||||||
],
|
],
|
||||||
"crypto": ["cryptography~=3.4.7"],
|
"crypto": ["cryptography~=3.4.7"],
|
||||||
|
|
|
||||||
|
|
@ -42,6 +42,7 @@ def export_jasper(src_dataset_path: Path, dest_dataset_path: Path, unlink: bool
|
||||||
pydub.AudioSegment.from_wav(wav_path)
|
pydub.AudioSegment.from_wav(wav_path)
|
||||||
.set_frame_rate(16000)
|
.set_frame_rate(16000)
|
||||||
.set_channels(1)
|
.set_channels(1)
|
||||||
|
.set_sample_width(1)
|
||||||
)
|
)
|
||||||
dest_path = dest_dataset_path / Path("wavs") / Path(wav_path.name)
|
dest_path = dest_dataset_path / Path("wavs") / Path(wav_path.name)
|
||||||
audio_seg.export(dest_path, format="wav")
|
audio_seg.export(dest_path, format="wav")
|
||||||
|
|
|
||||||
|
|
@ -24,7 +24,7 @@ def rpyc(
|
||||||
):
|
):
|
||||||
for p in [w2v_path, ctc_path, target_dict_path]:
|
for p in [w2v_path, ctc_path, target_dict_path]:
|
||||||
if not p.exists():
|
if not p.exists():
|
||||||
logging.info(f"{p} doesn't exists")
|
typer.echo(f"{p} doesn't exists")
|
||||||
return
|
return
|
||||||
w2vasr = Wav2Vec2ASR(str(ctc_path), str(w2v_path), str(target_dict_path))
|
w2vasr = Wav2Vec2ASR(str(ctc_path), str(w2v_path), str(target_dict_path))
|
||||||
service = ASRService(w2vasr)
|
service = ASRService(w2vasr)
|
||||||
|
|
|
||||||
|
|
@ -2,6 +2,7 @@ from pathlib import Path
|
||||||
|
|
||||||
# from tqdm import tqdm
|
# from tqdm import tqdm
|
||||||
import json
|
import json
|
||||||
|
from .audio import audio_wav_bytes_to_seg
|
||||||
|
|
||||||
# from .extended_path import ExtendedPath
|
# from .extended_path import ExtendedPath
|
||||||
# from .parallel import parallel_apply
|
# from .parallel import parallel_apply
|
||||||
|
|
@ -21,6 +22,10 @@ def asr_manifest_reader(data_manifest_path: Path):
|
||||||
for p in data_data:
|
for p in data_data:
|
||||||
p["audio_path"] = data_manifest_path.parent / Path(p["audio_filepath"])
|
p["audio_path"] = data_manifest_path.parent / Path(p["audio_filepath"])
|
||||||
p["text"] = p["text"].strip()
|
p["text"] = p["text"].strip()
|
||||||
|
# import pdb; pdb.set_trace()
|
||||||
|
p["audio_seg"] = audio_wav_bytes_to_seg(
|
||||||
|
(data_manifest_path.parent / p["audio_filepath"]).read_bytes()
|
||||||
|
)
|
||||||
yield p
|
yield p
|
||||||
|
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -147,6 +147,43 @@ def do_tri_verbose_list():
|
||||||
] + ["hundred"]
|
] + ["hundred"]
|
||||||
|
|
||||||
|
|
||||||
|
def default_alpha_rules(oh_is_zero, i_oh_limit):
|
||||||
|
o_i_vars = r"(\[?(?:A|Oh|O|I)\]?)"
|
||||||
|
i_oh_limit_rules = [
|
||||||
|
(r"\b([b-hj-np-z])\b", "\\1"),
|
||||||
|
# (
|
||||||
|
# r"\b((?:"
|
||||||
|
# + al_num_regex
|
||||||
|
# + r"|^)\b\s*)(I|O)(\s*\b)(?="
|
||||||
|
# + al_num_regex
|
||||||
|
# + r"\s+|$)\b",
|
||||||
|
# "\\1[\\2]\\3",
|
||||||
|
# ),
|
||||||
|
# (
|
||||||
|
# r"\b" + o_i_vars + r"(\s+)" + o_i_vars + r"\b",
|
||||||
|
# "[\\1]\\2[\\3]",
|
||||||
|
# ),
|
||||||
|
(
|
||||||
|
r"(\s+|^)" + o_i_vars + r"(\s+)\[?" + o_i_vars + r"\]?(\s+|$)",
|
||||||
|
"\\1[\\2]\\3[\\4]\\5",
|
||||||
|
),
|
||||||
|
(
|
||||||
|
r"(\s+|^)\[?" + o_i_vars + r"\]?(\s+)" + o_i_vars + r"(\s+|$)",
|
||||||
|
"\\1[\\2]\\3[\\4]\\5",
|
||||||
|
),
|
||||||
|
]
|
||||||
|
entity_rules = (
|
||||||
|
+[(r"\boh\b", "o")]
|
||||||
|
+ [
|
||||||
|
(r"\bdouble(?: |-)(\w+|\d+)\b", "\\1 \\1"),
|
||||||
|
(r"\btriple(?: |-)(\w+|\d+)\b", "\\1 \\1 \\1"),
|
||||||
|
# (r"\b([a-zA-Z])\b", "\\1"),
|
||||||
|
]
|
||||||
|
+ (i_oh_limit_rules if i_oh_limit else [(r"\b([a-zA-Z])\b", "\\1")])
|
||||||
|
)
|
||||||
|
return entity_rules
|
||||||
|
|
||||||
|
|
||||||
def default_alnum_rules(num_range, oh_is_zero, i_oh_limit):
|
def default_alnum_rules(num_range, oh_is_zero, i_oh_limit):
|
||||||
oh_is_zero_rules = [
|
oh_is_zero_rules = [
|
||||||
(r"\boh\b", "0"),
|
(r"\boh\b", "0"),
|
||||||
|
|
@ -285,6 +322,34 @@ def alnum_keeper(num_range=100, oh_is_zero=False):
|
||||||
return keeper
|
return keeper
|
||||||
|
|
||||||
|
|
||||||
|
def alpha_keeper(oh_is_zero=False):
|
||||||
|
entity_rules = default_alpha_rules(oh_is_zero, i_oh_limit=True)
|
||||||
|
|
||||||
|
# def strip_space(match_obj):
|
||||||
|
# # char_elem = match_obj.group(1)
|
||||||
|
# return match_obj.group(1).strip() + match_obj.group(2).strip()
|
||||||
|
|
||||||
|
pre_rules = [
|
||||||
|
(r"[ ;,.]", " "),
|
||||||
|
(r"[']", ""),
|
||||||
|
# (
|
||||||
|
# r"((?:(?<=\w{2,2})|^)\s*)(?:\bI\b|\bi\b|\bOh\b|\boh\b)(\s*(?:\w{2,}|$))",
|
||||||
|
# strip_space,
|
||||||
|
# ),
|
||||||
|
]
|
||||||
|
|
||||||
|
post_rules = [
|
||||||
|
# (
|
||||||
|
# r"((?:(?<=\w{2,2})|^)\s*)(?:\bI\b|\bi\b|\bOh\b|\boh\b)(\s*(?:\w{2,}|$))",
|
||||||
|
# strip_space,
|
||||||
|
# )
|
||||||
|
]
|
||||||
|
replacer, keeper = entity_replacer_keeper(
|
||||||
|
pre_rules=pre_rules, entity_rules=entity_rules, post_rules=post_rules
|
||||||
|
)
|
||||||
|
return keeper
|
||||||
|
|
||||||
|
|
||||||
def num_keeper_orig(num_range=10, extra_rules=[]):
|
def num_keeper_orig(num_range=10, extra_rules=[]):
|
||||||
num_int_map_ty = [
|
num_int_map_ty = [
|
||||||
(
|
(
|
||||||
|
|
@ -377,9 +442,7 @@ def vocab_corrector_gen(vocab, distance=1, method="spell"):
|
||||||
# return " ".join(
|
# return " ".join(
|
||||||
# [spell.correction(tok) for tok in spell.split_words(inp)]
|
# [spell.correction(tok) for tok in spell.split_words(inp)]
|
||||||
# )
|
# )
|
||||||
return " ".join(
|
return " ".join([spell.correction(tok) for tok in inp.split()])
|
||||||
[spell.correction(tok) for tok in inp.split()]
|
|
||||||
)
|
|
||||||
|
|
||||||
elif method == "edit":
|
elif method == "edit":
|
||||||
# editdistance.eval("banana", "bahama")
|
# editdistance.eval("banana", "bahama")
|
||||||
|
|
|
||||||
|
|
@ -87,6 +87,30 @@ def test_alnum_keeper():
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def test_alpha_keeper():
|
||||||
|
keeper = alnum_keeper()
|
||||||
|
assert keeper("I One hundred n fifty-eight not 5 oh o fifty A B more") == (
|
||||||
|
"I One hundred n fifty-eight 5 oh o fifty A B",
|
||||||
|
11,
|
||||||
|
)
|
||||||
|
assert keeper(
|
||||||
|
"I'll phone number One hundred n fifty-eight not 5 oh o fifty A B more"
|
||||||
|
) == ("One hundred n fifty-eight 5 oh o fifty A B", 10)
|
||||||
|
assert keeper(
|
||||||
|
"I'm One hundred n fifty-eight not 5 oh o fifty A B more"
|
||||||
|
) == (
|
||||||
|
"One hundred n fifty-eight 5 oh o fifty A B",
|
||||||
|
10,
|
||||||
|
)
|
||||||
|
|
||||||
|
assert keeper(
|
||||||
|
"I am One hundred n fifty-eight not 5 oh o fifty A B more"
|
||||||
|
) == (
|
||||||
|
"One hundred n fifty-eight 5 oh o fifty A B",
|
||||||
|
10,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
@pytest.fixture
|
@pytest.fixture
|
||||||
def random():
|
def random():
|
||||||
rand.seed(0)
|
rand.seed(0)
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue