mirror of
https://github.com/malarinv/jasper-asr.git
synced 2026-03-09 19:02:35 +00:00
1. add a new streamlit ui to preview manifest
2. implement rpcy transcription client for files
This commit is contained in:
@@ -2,6 +2,10 @@ import os
|
||||
import logging
|
||||
import rpyc
|
||||
from functools import lru_cache
|
||||
import typer
|
||||
from pathlib import Path
|
||||
|
||||
app = typer.Typer()
|
||||
|
||||
logging.basicConfig(
|
||||
level=logging.INFO, format="%(asctime)s - %(name)s - %(levelname)s - %(message)s"
|
||||
@@ -19,3 +23,28 @@ def transcribe_gen(asr_host=ASR_HOST, asr_port=ASR_PORT):
|
||||
asr = rpyc.connect(asr_host, asr_port).root
|
||||
logger.info(f"connected to asr server successfully")
|
||||
return asr.transcribe
|
||||
|
||||
|
||||
@app.command()
|
||||
def transcribe_file(audio_file: Path):
|
||||
from pydub import AudioSegment
|
||||
|
||||
transcriber = transcribe_gen()
|
||||
aud_seg = (
|
||||
AudioSegment.from_file_using_temporary_files(audio_file)
|
||||
.set_channels(1)
|
||||
.set_sample_width(2)
|
||||
.set_frame_rate(24000)
|
||||
)
|
||||
tscript_file_path = audio_file.with_suffix(".txt")
|
||||
transcription = transcriber(aud_seg.raw_data)
|
||||
with open(tscript_file_path, "w") as tf:
|
||||
tf.write(transcription)
|
||||
|
||||
|
||||
def main():
|
||||
app()
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
|
||||
@@ -225,6 +225,12 @@ class ExtendedPath(type(Path())):
|
||||
with self.open("r") as jf:
|
||||
return json.load(jf)
|
||||
|
||||
def read_jsonl(self):
|
||||
print(f"reading jsonl from {self}")
|
||||
with self.open("r") as jf:
|
||||
for l in jf.readlines():
|
||||
yield json.loads(l)
|
||||
|
||||
def write_json(self, data):
|
||||
print(f"writing json to {self}")
|
||||
self.parent.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
Reference in New Issue
Block a user