1. fixed dependency issues
2. add task-id option to validation ui to respawn previous task 3. clean-up rastrik-recyclertegra
parent
e77943b2f2
commit
42647196fe
|
|
@ -7,10 +7,16 @@
|
||||||
|
|
||||||
# Table of Contents
|
# Table of Contents
|
||||||
|
|
||||||
|
* [Prerequisites](#prerequisites)
|
||||||
* [Features](#features)
|
* [Features](#features)
|
||||||
* [Installation](#installation)
|
* [Installation](#installation)
|
||||||
* [Usage](#usage)
|
* [Usage](#usage)
|
||||||
|
|
||||||
|
# Prerequisites
|
||||||
|
```bash
|
||||||
|
# apt install libsndfile-dev ffmpeg
|
||||||
|
```
|
||||||
|
|
||||||
# Features
|
# Features
|
||||||
|
|
||||||
* ASR using Jasper (from [NemoToolkit](https://github.com/NVIDIA/NeMo) )
|
* ASR using Jasper (from [NemoToolkit](https://github.com/NVIDIA/NeMo) )
|
||||||
|
|
|
||||||
|
|
@ -1,8 +1,7 @@
|
||||||
from rastrik.proto.callrecord_pb2 import CallRecordEvent, CallRecord
|
from rastrik.proto.callrecord_pb2 import CallRecord
|
||||||
import gzip
|
import gzip
|
||||||
from pydub import AudioSegment
|
from pydub import AudioSegment
|
||||||
import json
|
from .utils import ui_dump_manifest_writer, strip_silence
|
||||||
from .utils import ExtendedPath, asr_data_writer, strip_silence
|
|
||||||
|
|
||||||
import typer
|
import typer
|
||||||
from itertools import chain
|
from itertools import chain
|
||||||
|
|
@ -11,127 +10,84 @@ from pathlib import Path
|
||||||
|
|
||||||
app = typer.Typer()
|
app = typer.Typer()
|
||||||
|
|
||||||
|
|
||||||
@app.command()
|
@app.command()
|
||||||
def extract_manifest(
|
def extract_manifest(
|
||||||
call_audio_dir: Path = Path("./data/call_audio"),
|
call_log_dir: Path = Path("./data/call_audio"),
|
||||||
call_meta_dir: Path = Path("./data/call_metadata"),
|
|
||||||
output_dir: Path = Path("./data"),
|
output_dir: Path = Path("./data"),
|
||||||
dataset_name: str = "grassroot_pizzahut_v1",
|
dataset_name: str = "grassroot_pizzahut_v1",
|
||||||
|
caller_name: str = "grassroot",
|
||||||
verbose: bool = False,
|
verbose: bool = False,
|
||||||
):
|
):
|
||||||
|
|
||||||
|
|
||||||
call_asr_data: Path = output_dir / Path("asr_data")
|
call_asr_data: Path = output_dir / Path("asr_data")
|
||||||
call_asr_data.mkdir(exist_ok=True, parents=True)
|
call_asr_data.mkdir(exist_ok=True, parents=True)
|
||||||
"""
|
|
||||||
def read_event_old(log_file,audio_file):
|
|
||||||
call_wav = AudioSegment.from_wav(audio_file)
|
|
||||||
call_wav_0, call_wav_1 = call_wav.split_to_mono()
|
|
||||||
with gzip.open(log_file, "rb") as log_h:
|
|
||||||
record_data = log_h.read()
|
|
||||||
cr = CallRecord()
|
|
||||||
cr.ParseFromString(record_data)
|
|
||||||
|
|
||||||
import pdb
|
def wav_pb2_generator(log_dir):
|
||||||
first_audio_event_timestamp = next ((i
|
for wav_path in log_dir.glob("**/*.wav"):
|
||||||
for i in cr.events
|
|
||||||
if i.WhichOneof("event_type") == "call_event"
|
|
||||||
and i.call_event.WhichOneof("event_type") == "call_audio"
|
|
||||||
)).timestamp.ToDatetime()
|
|
||||||
|
|
||||||
speech_events = [ i
|
|
||||||
for i in cr.events
|
|
||||||
if i.WhichOneof("event_type") == "asr_result"
|
|
||||||
]
|
|
||||||
previous_event_timestamp = first_audio_event_timestamp - first_audio_event_timestamp
|
|
||||||
for index,each_speech_events in enumerate(speech_events):
|
|
||||||
asr_final = each_speech_events.asr_result.text
|
|
||||||
speech_timestamp = each_speech_events.timestamp.ToDatetime()
|
|
||||||
actual_timestamp = speech_timestamp - first_audio_event_timestamp
|
|
||||||
print(previous_event_timestamp.total_seconds(),actual_timestamp.total_seconds(),asr_final)
|
|
||||||
start_time = previous_event_timestamp.total_seconds()*1000
|
|
||||||
end_time = actual_timestamp.total_seconds() * 1000
|
|
||||||
audio_segment = strip_silence(call_wav_1[start_time:end_time])
|
|
||||||
audio_segment.export(output_folder+str(index) + '.wav' ,format='wav')
|
|
||||||
previous_event_timestamp = actual_timestamp
|
|
||||||
"""
|
|
||||||
|
|
||||||
def wav_pb2_generator(call_audio_dir):
|
|
||||||
for wav_path in call_audio_dir.glob("**/*.wav"):
|
|
||||||
if verbose:
|
if verbose:
|
||||||
typer.echo(f"loading events for file {wav_path}")
|
typer.echo(f"loading events for file {wav_path}")
|
||||||
call_wav = AudioSegment.from_file_using_temporary_files(wav_path)
|
call_wav = AudioSegment.from_file_using_temporary_files(wav_path)
|
||||||
rel_meta_path = wav_path.with_suffix(".pb2.gz").relative_to(call_audio_dir)
|
meta_path = wav_path.with_suffix(".pb2.gz")
|
||||||
meta_path = call_meta_dir / rel_meta_path
|
yield call_wav, wav_path, meta_path
|
||||||
#events = ExtendedPath(meta_path).read_json()
|
|
||||||
yield call_wav,wav_path, meta_path
|
|
||||||
|
|
||||||
def read_event(call_wav,log_file):
|
def read_event(call_wav, log_file):
|
||||||
#call_wav = AudioSegment.from_wav(audio_file)
|
|
||||||
call_wav_0, call_wav_1 = call_wav.split_to_mono()
|
call_wav_0, call_wav_1 = call_wav.split_to_mono()
|
||||||
with gzip.open(log_file, "rb") as log_h:
|
with gzip.open(log_file, "rb") as log_h:
|
||||||
record_data = log_h.read()
|
record_data = log_h.read()
|
||||||
cr = CallRecord()
|
cr = CallRecord()
|
||||||
cr.ParseFromString(record_data)
|
cr.ParseFromString(record_data)
|
||||||
|
|
||||||
import pdb
|
first_audio_event_timestamp = next(
|
||||||
first_audio_event_timestamp = next ((i
|
(
|
||||||
for i in cr.events
|
i
|
||||||
|
for i in cr.events
|
||||||
if i.WhichOneof("event_type") == "call_event"
|
if i.WhichOneof("event_type") == "call_event"
|
||||||
and i.call_event.WhichOneof("event_type") == "call_audio"
|
and i.call_event.WhichOneof("event_type") == "call_audio"
|
||||||
)).timestamp.ToDatetime()
|
)
|
||||||
|
).timestamp.ToDatetime()
|
||||||
|
|
||||||
speech_events = [ i
|
speech_events = [
|
||||||
|
i
|
||||||
for i in cr.events
|
for i in cr.events
|
||||||
if i.WhichOneof("event_type") == "speech_event"
|
if i.WhichOneof("event_type") == "speech_event"
|
||||||
and i.speech_event.WhichOneof("event_type") == "asr_final"
|
and i.speech_event.WhichOneof("event_type") == "asr_final"
|
||||||
]
|
]
|
||||||
previous_event_timestamp = first_audio_event_timestamp - first_audio_event_timestamp
|
previous_event_timestamp = (
|
||||||
for index,each_speech_events in enumerate(speech_events):
|
first_audio_event_timestamp - first_audio_event_timestamp
|
||||||
|
)
|
||||||
|
for index, each_speech_events in enumerate(speech_events):
|
||||||
asr_final = each_speech_events.speech_event.asr_final
|
asr_final = each_speech_events.speech_event.asr_final
|
||||||
speech_timestamp = each_speech_events.timestamp.ToDatetime()
|
speech_timestamp = each_speech_events.timestamp.ToDatetime()
|
||||||
actual_timestamp = speech_timestamp - first_audio_event_timestamp
|
actual_timestamp = speech_timestamp - first_audio_event_timestamp
|
||||||
print(previous_event_timestamp.total_seconds(),actual_timestamp.total_seconds(),asr_final)
|
start_time = previous_event_timestamp.total_seconds() * 1000
|
||||||
start_time = previous_event_timestamp.total_seconds()*1000
|
|
||||||
end_time = actual_timestamp.total_seconds() * 1000
|
end_time = actual_timestamp.total_seconds() * 1000
|
||||||
audio_segment = strip_silence(call_wav_1[start_time:end_time])
|
audio_segment = strip_silence(call_wav_1[start_time:end_time])
|
||||||
|
|
||||||
code_fb = BytesIO()
|
code_fb = BytesIO()
|
||||||
audio_segment.export(code_fb, format="wav")
|
audio_segment.export(code_fb, format="wav")
|
||||||
wav_data = code_fb.getvalue()
|
wav_data = code_fb.getvalue()
|
||||||
|
|
||||||
#output_audio_path = output_folder + audio_file.replace('.wav','') + '_' + str(index)
|
|
||||||
#audio_segment.export( output_audio_path+ '.wav' ,format='wav')
|
|
||||||
#manifest_file.write(json.dumps({"audio_filepath":output_audio_path , "duration": (end_time-start_time) / 1000 , "text":asr_final }) + '\n')
|
|
||||||
previous_event_timestamp = actual_timestamp
|
previous_event_timestamp = actual_timestamp
|
||||||
duration = (end_time-start_time) / 1000
|
duration = (end_time - start_time) / 1000
|
||||||
yield asr_final,duration,wav_data
|
yield asr_final, duration, wav_data, "grassroot", audio_segment
|
||||||
|
|
||||||
|
|
||||||
def generate_call_asr_data():
|
def generate_call_asr_data():
|
||||||
full_asr_data = []
|
full_data = []
|
||||||
total_duration = 0
|
total_duration = 0
|
||||||
for wav,wav_path, pb2_path in wav_pb2_generator(call_audio_dir):
|
for wav, wav_path, pb2_path in wav_pb2_generator(call_log_dir):
|
||||||
asr_data = read_event(wav,pb2_path)
|
asr_data = read_event(wav, pb2_path)
|
||||||
total_duration += wav.duration_seconds
|
total_duration += wav.duration_seconds
|
||||||
full_asr_data.append(asr_data)
|
full_data.append(asr_data)
|
||||||
|
n_calls = len(full_data)
|
||||||
typer.echo(f"loaded {len(full_asr_data)} calls of duration {total_duration}s")
|
typer.echo(f"loaded {n_calls} calls of duration {total_duration}s")
|
||||||
n_dps = asr_data_writer(call_asr_data, dataset_name, chain(*full_asr_data))
|
n_dps = ui_dump_manifest_writer(call_asr_data, dataset_name, chain(*full_data))
|
||||||
typer.echo(f"written {n_dps} data points")
|
typer.echo(f"written {n_dps} data points")
|
||||||
|
|
||||||
|
|
||||||
generate_call_asr_data()
|
generate_call_asr_data()
|
||||||
|
|
||||||
|
|
||||||
def main():
|
def main():
|
||||||
app()
|
app()
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
main()
|
main()
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -42,7 +42,9 @@ if not hasattr(st, "mongo_connected"):
|
||||||
upsert=True,
|
upsert=True,
|
||||||
)
|
)
|
||||||
|
|
||||||
def set_task_fn(mf_path):
|
def set_task_fn(mf_path, task_id):
|
||||||
|
if task_id:
|
||||||
|
st.task_id = task_id
|
||||||
task_path = mf_path.parent / Path(f"task-{st.task_id}.lck")
|
task_path = mf_path.parent / Path(f"task-{st.task_id}.lck")
|
||||||
if not task_path.exists():
|
if not task_path.exists():
|
||||||
print(f"creating task lock at {task_path}")
|
print(f"creating task lock at {task_path}")
|
||||||
|
|
@ -66,8 +68,8 @@ def load_ui_data(validation_ui_data_path: Path):
|
||||||
|
|
||||||
|
|
||||||
@app.command()
|
@app.command()
|
||||||
def main(manifest: Path):
|
def main(manifest: Path, task_id: str = ""):
|
||||||
st.set_task(manifest)
|
st.set_task(manifest, task_id)
|
||||||
ui_config = load_ui_data(manifest)
|
ui_config = load_ui_data(manifest)
|
||||||
asr_data = ui_config["data"]
|
asr_data = ui_config["data"]
|
||||||
use_domain_asr = ui_config.get("use_domain_asr", True)
|
use_domain_asr = ui_config.get("use_domain_asr", True)
|
||||||
|
|
|
||||||
4
setup.py
4
setup.py
|
|
@ -19,13 +19,15 @@ extra_requirements = {
|
||||||
"ruamel.yaml==0.16.10",
|
"ruamel.yaml==0.16.10",
|
||||||
"pymongo==3.10.1",
|
"pymongo==3.10.1",
|
||||||
"librosa==0.7.2",
|
"librosa==0.7.2",
|
||||||
|
"numba==0.48",
|
||||||
"matplotlib==3.2.1",
|
"matplotlib==3.2.1",
|
||||||
"pandas==1.0.3",
|
"pandas==1.0.3",
|
||||||
"tabulate==0.8.7",
|
"tabulate==0.8.7",
|
||||||
"natural==0.2.0",
|
"natural==0.2.0",
|
||||||
"num2words==0.5.10",
|
"num2words==0.5.10",
|
||||||
"typer[all]==0.1.1",
|
"typer[all]==0.3.1",
|
||||||
"python-slugify==4.0.0",
|
"python-slugify==4.0.0",
|
||||||
|
"rpyc~=4.1.4",
|
||||||
"lenses @ git+https://github.com/ingolemo/python-lenses.git@b2a2a9aa5b61540992d70b2cf36008d0121e8948#egg=lenses",
|
"lenses @ git+https://github.com/ingolemo/python-lenses.git@b2a2a9aa5b61540992d70b2cf36008d0121e8948#egg=lenses",
|
||||||
],
|
],
|
||||||
"validation": [
|
"validation": [
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue