diff --git a/README.md b/README.md
index 9aa7754..d95808d 100644
--- a/README.md
+++ b/README.md
@@ -7,10 +7,16 @@
 
 # Table of Contents
 
+* [Prerequisites](#prerequisites)
 * [Features](#features)
 * [Installation](#installation)
 * [Usage](#usage)
 
+# Prerequisites
+```bash
+# apt install libsndfile-dev ffmpeg
+```
+
 # Features
 
 * ASR using Jasper (from [NemoToolkit](https://github.com/NVIDIA/NeMo) )
diff --git a/jasper/data/rastrik_recycler.py b/jasper/data/rastrik_recycler.py
index e051551..6093b18 100644
--- a/jasper/data/rastrik_recycler.py
+++ b/jasper/data/rastrik_recycler.py
@@ -1,8 +1,7 @@
-from rastrik.proto.callrecord_pb2 import CallRecordEvent, CallRecord
+from rastrik.proto.callrecord_pb2 import CallRecord
 import gzip
 from pydub import AudioSegment
-import json
-from .utils import ExtendedPath, asr_data_writer, strip_silence
+from .utils import ui_dump_manifest_writer, strip_silence
 
 import typer
 from itertools import chain
@@ -11,127 +10,84 @@ from pathlib import Path
 
 app = typer.Typer()
 
+
 @app.command()
 def extract_manifest(
-    call_audio_dir: Path = Path("./data/call_audio"),
-    call_meta_dir: Path = Path("./data/call_metadata"),
+    call_log_dir: Path = Path("./data/call_audio"),
     output_dir: Path = Path("./data"),
     dataset_name: str = "grassroot_pizzahut_v1",
+    caller_name: str = "grassroot",
     verbose: bool = False,
 ):
-
-
     call_asr_data: Path = output_dir / Path("asr_data")
     call_asr_data.mkdir(exist_ok=True, parents=True)
-    """
-    def read_event_old(log_file,audio_file):
-        call_wav = AudioSegment.from_wav(audio_file)
-        call_wav_0, call_wav_1 = call_wav.split_to_mono()
-        with gzip.open(log_file, "rb") as log_h:
-            record_data = log_h.read()
-        cr = CallRecord()
-        cr.ParseFromString(record_data)
 
-        import pdb
-        first_audio_event_timestamp = next ((i
-            for i in cr.events
-                if i.WhichOneof("event_type") == "call_event"
-                and i.call_event.WhichOneof("event_type") == "call_audio"
-        )).timestamp.ToDatetime()
-
-        speech_events = [ i
-            for i in cr.events
-            if i.WhichOneof("event_type") == "asr_result"
-        ]
-        previous_event_timestamp = first_audio_event_timestamp - first_audio_event_timestamp
-        for index,each_speech_events in enumerate(speech_events):
-            asr_final = each_speech_events.asr_result.text
-            speech_timestamp = each_speech_events.timestamp.ToDatetime()
-            actual_timestamp = speech_timestamp - first_audio_event_timestamp
-            print(previous_event_timestamp.total_seconds(),actual_timestamp.total_seconds(),asr_final)
-            start_time = previous_event_timestamp.total_seconds()*1000
-            end_time = actual_timestamp.total_seconds() * 1000
-            audio_segment = strip_silence(call_wav_1[start_time:end_time])
-            audio_segment.export(output_folder+str(index) + '.wav' ,format='wav')
-            previous_event_timestamp = actual_timestamp
-    """
-
-    def wav_pb2_generator(call_audio_dir):
-        for wav_path in call_audio_dir.glob("**/*.wav"):
+    def wav_pb2_generator(log_dir):
+        for wav_path in log_dir.glob("**/*.wav"):
             if verbose:
                 typer.echo(f"loading events for file {wav_path}")
             call_wav = AudioSegment.from_file_using_temporary_files(wav_path)
-            rel_meta_path = wav_path.with_suffix(".pb2.gz").relative_to(call_audio_dir)
-            meta_path = call_meta_dir / rel_meta_path
-            #events = ExtendedPath(meta_path).read_json()
-            yield  call_wav,wav_path, meta_path
+            meta_path = wav_path.with_suffix(".pb2.gz")
+            yield call_wav, wav_path, meta_path
 
-    def read_event(call_wav,log_file):
-        #call_wav = AudioSegment.from_wav(audio_file)
+    def read_event(call_wav, log_file):
         call_wav_0, call_wav_1 = call_wav.split_to_mono()
         with gzip.open(log_file, "rb") as log_h:
             record_data = log_h.read()
         cr = CallRecord()
         cr.ParseFromString(record_data)
 
-        import pdb
-        first_audio_event_timestamp = next ((i
-            for i in cr.events
+        first_audio_event_timestamp = next(
+            (
+                i
+                for i in cr.events
                 if i.WhichOneof("event_type") == "call_event"
                 and i.call_event.WhichOneof("event_type") == "call_audio"
-        )).timestamp.ToDatetime()
+            )
+        ).timestamp.ToDatetime()
 
-        speech_events = [ i
+        speech_events = [
+            i
             for i in cr.events
             if i.WhichOneof("event_type") == "speech_event"
             and i.speech_event.WhichOneof("event_type") == "asr_final"
         ]
-        previous_event_timestamp = first_audio_event_timestamp - first_audio_event_timestamp
-        for index,each_speech_events in enumerate(speech_events):
+        previous_event_timestamp = (
+            first_audio_event_timestamp - first_audio_event_timestamp
+        )
+        for index, each_speech_events in enumerate(speech_events):
             asr_final = each_speech_events.speech_event.asr_final
             speech_timestamp = each_speech_events.timestamp.ToDatetime()
             actual_timestamp = speech_timestamp - first_audio_event_timestamp
-            print(previous_event_timestamp.total_seconds(),actual_timestamp.total_seconds(),asr_final)
-            start_time = previous_event_timestamp.total_seconds()*1000
+            start_time = previous_event_timestamp.total_seconds() * 1000
             end_time = actual_timestamp.total_seconds() * 1000
             audio_segment = strip_silence(call_wav_1[start_time:end_time])
 
             code_fb = BytesIO()
             audio_segment.export(code_fb, format="wav")
             wav_data = code_fb.getvalue()
-
-            #output_audio_path = output_folder + audio_file.replace('.wav','') + '_' + str(index) 
-            #audio_segment.export( output_audio_path+ '.wav' ,format='wav')
-            #manifest_file.write(json.dumps({"audio_filepath":output_audio_path , "duration": (end_time-start_time) / 1000 , "text":asr_final }) + '\n')
             previous_event_timestamp = actual_timestamp
-            duration = (end_time-start_time) / 1000
-            yield asr_final,duration,wav_data
-
+            duration = (end_time - start_time) / 1000
+            yield asr_final, duration, wav_data, "grassroot", audio_segment
 
     def generate_call_asr_data():
-        full_asr_data = []
+        full_data = []
         total_duration = 0
-        for wav,wav_path, pb2_path in wav_pb2_generator(call_audio_dir):
-            asr_data = read_event(wav,pb2_path)
+        for wav, wav_path, pb2_path in wav_pb2_generator(call_log_dir):
+            asr_data = read_event(wav, pb2_path)
             total_duration += wav.duration_seconds
-            full_asr_data.append(asr_data)
-
-        typer.echo(f"loaded {len(full_asr_data)} calls of duration {total_duration}s")
-        n_dps = asr_data_writer(call_asr_data, dataset_name, chain(*full_asr_data))
+            full_data.append(asr_data)
+        n_calls = len(full_data)
+        typer.echo(f"loaded {n_calls} calls of duration {total_duration}s")
+        n_dps = ui_dump_manifest_writer(call_asr_data, dataset_name, chain(*full_data))
         typer.echo(f"written {n_dps} data points")
 
-
     generate_call_asr_data()
 
+
 def main():
     app()
 
 
 if __name__ == "__main__":
     main()
-    
-
-
-
-
-
diff --git a/jasper/data/validation/ui.py b/jasper/data/validation/ui.py
index 3915aeb..00f2e5c 100644
--- a/jasper/data/validation/ui.py
+++ b/jasper/data/validation/ui.py
@@ -42,7 +42,9 @@ if not hasattr(st, "mongo_connected"):
             upsert=True,
         )
 
-    def set_task_fn(mf_path):
+    def set_task_fn(mf_path, task_id):
+        if task_id:
+            st.task_id = task_id
         task_path = mf_path.parent / Path(f"task-{st.task_id}.lck")
         if not task_path.exists():
             print(f"creating task lock at {task_path}")
@@ -66,8 +68,8 @@ def load_ui_data(validation_ui_data_path: Path):
 
 
 @app.command()
-def main(manifest: Path):
-    st.set_task(manifest)
+def main(manifest: Path, task_id: str = ""):
+    st.set_task(manifest, task_id)
     ui_config = load_ui_data(manifest)
     asr_data = ui_config["data"]
     use_domain_asr = ui_config.get("use_domain_asr", True)
diff --git a/setup.py b/setup.py
index 5b70dcc..eb23848 100644
--- a/setup.py
+++ b/setup.py
@@ -19,13 +19,15 @@ extra_requirements = {
         "ruamel.yaml==0.16.10",
         "pymongo==3.10.1",
         "librosa==0.7.2",
+        "numba==0.48",
         "matplotlib==3.2.1",
         "pandas==1.0.3",
         "tabulate==0.8.7",
         "natural==0.2.0",
         "num2words==0.5.10",
-        "typer[all]==0.1.1",
+        "typer[all]==0.3.1",
         "python-slugify==4.0.0",
+        "rpyc~=4.1.4",
         "lenses @ git+https://github.com/ingolemo/python-lenses.git@b2a2a9aa5b61540992d70b2cf36008d0121e8948#egg=lenses",
     ],
     "validation": [