diff --git a/jasper/data/process.py b/jasper/data/process.py
index 000e843..7a53030 100644
--- a/jasper/data/process.py
+++ b/jasper/data/process.py
@@ -45,20 +45,28 @@ def split_data(dataset_path: Path, test_size: float = 0.1):
 
 @app.command()
 def validate_data(dataset_path: Path):
+    from natural.date import compress
+    from datetime import timedelta
+
     for mf_type in ["train_manifest.json", "test_manifest.json"]:
         data_file = dataset_path / Path(mf_type)
         print(f"validating {data_file}.")
         with Path(data_file).open("r") as pf:
             pnr_jsonl = pf.readlines()
+        duration = 0
         for (i, s) in enumerate(pnr_jsonl):
             try:
                 d = json.loads(s)
+                duration += d["duration"]
                 audio_file = data_file.parent / Path(d["audio_filepath"])
                 if not audio_file.exists():
                     raise OSError(f"File {audio_file} not found")
             except BaseException as e:
                 print(f'failed on {i} with "{e}"')
-        print(f"no errors found. seems like a valid {mf_type}.")
+        duration_str = compress(timedelta(seconds=duration), pad=" ")
+        print(
+            f"no errors found. seems like a valid {mf_type}. contains {duration_str}sec of audio"
+        )
 
 
 def main():
diff --git a/setup.py b/setup.py
index 801831c..38f253e 100644
--- a/setup.py
+++ b/setup.py
@@ -37,7 +37,8 @@ extra_requirements = {
         "matplotlib==3.2.1",
         "pydub~=0.24.0",
         "streamlit==0.58.0",
-        "stringcase==1.2.0"
+        "natural==0.2.0",
+        "stringcase==1.2.0",
     ]
     # "train": [
     #     "torchaudio==0.5.0",