mirror of
https://github.com/malarinv/jasper-asr.git
synced 2026-03-08 10:32:35 +00:00
respect verbose flag
This commit is contained in:
@@ -59,10 +59,12 @@ def extract_data(
|
||||
code_wav = code_fb.getvalue()
|
||||
# only starting 1 min audio has reliable alignment ignore rest
|
||||
if start_time > 60:
|
||||
if verbose:
|
||||
print(f'start time over 60 seconds of audio skipping.')
|
||||
break
|
||||
# only if some reasonable audio data is present yield it
|
||||
if code_seg.duration_seconds < 0.5:
|
||||
if verbose:
|
||||
print(f'transcript chunk "{transcript}" contains no audio skipping.')
|
||||
continue
|
||||
yield transcript, code_seg.duration_seconds, code_wav
|
||||
|
||||
@@ -63,6 +63,7 @@ def extract_data(
|
||||
# print(monologue["speaker_name"])
|
||||
speaker_channel = channel_map.get(monologue["speaker_name"])
|
||||
if not speaker_channel:
|
||||
if verbose:
|
||||
print(f'unknown speaker tag {monologue["speaker_name"]} in wav:{wav_path} skipping.')
|
||||
continue
|
||||
try:
|
||||
@@ -79,6 +80,7 @@ def extract_data(
|
||||
.collect()(monologue)[-1]
|
||||
)
|
||||
except IndexError:
|
||||
if verbose:
|
||||
print(f'error when loading timestamp events in wav:{wav_path} skipping.')
|
||||
continue
|
||||
|
||||
@@ -92,6 +94,7 @@ def extract_data(
|
||||
text_clean = re.sub(r"\[.*\]", "", text)
|
||||
# only if some reasonable audio data is present yield it
|
||||
if tscript_wav_seg.duration_seconds < 0.5:
|
||||
if verbose:
|
||||
print(f'transcript chunk "{text_clean}" contains no audio in {wav_path} skipping.')
|
||||
continue
|
||||
yield text_clean, tscript_wav_seg.duration_seconds, tscript_wav
|
||||
@@ -113,6 +116,7 @@ def extract_data(
|
||||
.collect()(monologue)[-1]
|
||||
)
|
||||
except IndexError:
|
||||
if verbose:
|
||||
print(f'error when loading timestamp events in wav:{wav_path} skipping.')
|
||||
continue
|
||||
|
||||
@@ -125,6 +129,7 @@ def extract_data(
|
||||
text = "".join(lens["elements"].Each()["value"].collect()(monologue))
|
||||
text_clean = re.sub(r"\[.*\]", "", text)
|
||||
if tscript_wav_seg.duration_seconds < 0.5:
|
||||
if verbose:
|
||||
print(f'transcript chunk "{text_clean}" contains no audio in {wav_path} skipping.')
|
||||
continue
|
||||
yield text_clean, tscript_wav_seg.duration_seconds, tscript_wav
|
||||
|
||||
Reference in New Issue
Block a user