massive refactor/rename to plume

2026-03-08 04:12:35 +00:00 · 2021-02-23 19:43:33 +05:30
parent e8f58a5043
commit ed6117559a
51 changed files with 2864 additions and 1037 deletions
--- a/plume/utils/init.py
+++ b/plume/utils/init.py
@@ -0,0 +1,486 @@
+import io
+import os
+import re
+import json
+import wave
+import logging
+from pathlib import Path
+from functools import partial
+from uuid import uuid4
+from concurrent.futures import ThreadPoolExecutor, ProcessPoolExecutor
+import subprocess
+import shutil
+from urllib.parse import urlsplit
+# from .lazy_loader import LazyLoader
+from .lazy_import import lazy_callable, lazy_module
+
+# from ruamel.yaml import YAML
+# import boto3
+import typer
+# import pymongo
+# from slugify import slugify
+# import pydub
+# import matplotlib.pyplot as plt
+# import librosa
+# import librosa.display as audio_display
+# from natural.date import compress
+# from num2words import num2words
+from tqdm import tqdm
+from datetime import timedelta
+
+# from .transcribe import triton_transcribe_grpc_gen
+# from .eval import app as eval_app
+from .tts import app as tts_app
+from .transcribe import app as transcribe_app
+from .align import app as align_app
+
+boto3 = lazy_module('boto3')
+pymongo = lazy_module('pymongo')
+pydub = lazy_module('pydub')
+audio_display = lazy_module('librosa.display')
+plt = lazy_module('matplotlib.pyplot')
+librosa = lazy_module('librosa')
+YAML = lazy_callable('ruamel.yaml.YAML')
+num2words = lazy_callable('num2words.num2words')
+slugify = lazy_callable('slugify.slugify')
+compress = lazy_callable('natural.date.compress')
+
+app = typer.Typer()
+app.add_typer(tts_app, name="tts")
+app.add_typer(align_app, name="align")
+app.add_typer(transcribe_app, name="transcribe")
+
+
+logging.basicConfig(
+    level=logging.INFO, format="%(asctime)s - %(name)s - %(levelname)s - %(message)s"
+)
+logger = logging.getLogger(__name__)
+
+
+def manifest_str(path, dur, text):
+    return (
+        json.dumps({"audio_filepath": path, "duration": round(dur, 1), "text": text})
+        + "\n"
+    )
+
+
+def duration_str(seconds):
+    return compress(timedelta(seconds=seconds), pad=" ")
+
+
+def replace_digit_symbol(w2v_out):
+    num_int_map = {num2words(i): str(i) for i in range(10)}
+    out = w2v_out.lower()
+    for (k, v) in num_int_map.items():
+        out = re.sub(k, v, out)
+    return out
+
+
+def discard_except_digits(inp):
+    return re.sub("[^0-9]", "", inp)
+
+
+def digits_to_chars(text):
+    num_tokens = [num2words(c) + " " if "0" <= c <= "9" else c for c in text]
+    return ("".join(num_tokens)).lower()
+
+
+def replace_redundant_spaces_with(text, sub):
+    return re.sub(" +", sub, text)
+
+
+def space_out(text):
+    letters = " ".join(list(text))
+    return letters
+
+
+def wav_bytes(audio_bytes, frame_rate=24000):
+    wf_b = io.BytesIO()
+    with wave.open(wf_b, mode="w") as wf:
+        wf.setnchannels(1)
+        wf.setframerate(frame_rate)
+        wf.setsampwidth(2)
+        wf.writeframesraw(audio_bytes)
+    return wf_b.getvalue()
+
+
+def tscript_uuid_fname(transcript):
+    return str(uuid4()) + "_" + slugify(transcript, max_length=8)
+
+
+def run_shell(cmd_str, work_dir="."):
+    cwd_path = Path(work_dir).absolute()
+    p = subprocess.Popen(
+        cmd_str,
+        stdout=subprocess.PIPE,
+        stderr=subprocess.STDOUT,
+        shell=True,
+        cwd=cwd_path,
+    )
+    for line in p.stdout:
+        print(line.replace(b"\n", b"").decode("utf-8"))
+
+
+def upload_s3(dataset_path, s3_path):
+    run_shell(f"aws s3 sync {dataset_path} {s3_path}")
+
+
+def get_download_path(s3_uri, output_path):
+    s3_uri_p = urlsplit(s3_uri)
+    download_path = output_path / Path(s3_uri_p.path[1:])
+    download_path.parent.mkdir(exist_ok=True, parents=True)
+    return download_path
+
+
+def s3_downloader():
+    s3 = boto3.client("s3")
+
+    def download_s3(s3_uri, download_path):
+        s3_uri_p = urlsplit(s3_uri)
+        download_path.parent.mkdir(exist_ok=True, parents=True)
+        if not download_path.exists():
+            print(f"downloading {s3_uri} to {download_path}")
+            s3.download_file(s3_uri_p.netloc, s3_uri_p.path[1:], str(download_path))
+
+    return download_s3
+
+
+def asr_data_writer(dataset_dir, asr_data_source, verbose=False):
+    (dataset_dir / Path("wavs")).mkdir(parents=True, exist_ok=True)
+    asr_manifest = dataset_dir / Path("manifest.json")
+    num_datapoints = 0
+    with asr_manifest.open("w") as mf:
+        print(f"writing manifest to {asr_manifest}")
+        for transcript, audio_dur, wav_data in asr_data_source:
+            fname = tscript_uuid_fname(transcript)
+            audio_file = dataset_dir / Path("wavs") / Path(fname).with_suffix(".wav")
+            audio_file.write_bytes(wav_data)
+            rel_data_path = audio_file.relative_to(dataset_dir)
+            manifest = manifest_str(str(rel_data_path), audio_dur, transcript)
+            mf.write(manifest)
+            if verbose:
+                print(f"writing '{transcript}' of duration {audio_dur}")
+            num_datapoints += 1
+    return num_datapoints
+
+
+def ui_data_generator(dataset_dir, asr_data_source, verbose=False):
+    (dataset_dir / Path("wavs")).mkdir(parents=True, exist_ok=True)
+    (dataset_dir / Path("wav_plots")).mkdir(parents=True, exist_ok=True)
+
+    def data_fn(
+        transcript,
+        audio_dur,
+        wav_data,
+        caller_name,
+        aud_seg,
+        fname,
+        audio_file,
+        num_datapoints,
+        rel_data_path,
+    ):
+        png_path = Path(fname).with_suffix(".png")
+        rel_plot_path = Path("wav_plots") / png_path
+        wav_plot_path = dataset_dir / rel_plot_path
+        if not wav_plot_path.exists():
+            plot_seg(wav_plot_path.absolute(), audio_file)
+        return {
+            "audio_path": str(rel_data_path),
+            "duration": round(audio_dur, 1),
+            "text": transcript,
+            "real_idx": num_datapoints,
+            "caller": caller_name,
+            "utterance_id": fname,
+            "plot_path": str(rel_plot_path),
+        }
+
+    num_datapoints = 0
+    data_funcs = []
+    for transcript, audio_dur, wav_data, caller_name, aud_seg in asr_data_source:
+        fname = str(uuid4()) + "_" + slugify(transcript, max_length=8)
+        audio_file = (
+            dataset_dir / Path("wavs") / Path(fname).with_suffix(".wav")
+        ).absolute()
+        audio_file.write_bytes(wav_data)
+        # audio_path = str(audio_file)
+        rel_data_path = audio_file.relative_to(dataset_dir.absolute())
+        data_funcs.append(
+            partial(
+                data_fn,
+                transcript,
+                audio_dur,
+                wav_data,
+                caller_name,
+                aud_seg,
+                fname,
+                audio_file,
+                num_datapoints,
+                rel_data_path,
+            )
+        )
+        num_datapoints += 1
+    ui_data = parallel_apply(lambda x: x(), data_funcs)
+    return ui_data, num_datapoints
+
+
+def ui_dump_manifest_writer(dataset_dir, asr_data_source, verbose=False):
+    dump_data, num_datapoints = ui_data_generator(
+        dataset_dir, asr_data_source, verbose=verbose
+    )
+
+    asr_manifest = dataset_dir / Path("manifest.json")
+    with asr_manifest.open("w") as mf:
+        print(f"writing manifest to {asr_manifest}")
+        for d in dump_data:
+            rel_data_path = d["audio_path"]
+            audio_dur = d["duration"]
+            transcript = d["text"]
+            manifest = manifest_str(str(rel_data_path), audio_dur, transcript)
+            mf.write(manifest)
+
+    ui_dump_file = dataset_dir / Path("ui_dump.json")
+    ExtendedPath(ui_dump_file).write_json({"data": dump_data})
+    return num_datapoints
+
+
+def asr_manifest_reader(data_manifest_path: Path):
+    print(f"reading manifest from {data_manifest_path}")
+    with data_manifest_path.open("r") as pf:
+        data_jsonl = pf.readlines()
+    data_data = [json.loads(v) for v in data_jsonl]
+    for p in data_data:
+        p["audio_path"] = data_manifest_path.parent / Path(p["audio_filepath"])
+        p["text"] = p["text"].strip()
+        yield p
+
+
+def asr_manifest_writer(asr_manifest_path: Path, manifest_str_source):
+    with asr_manifest_path.open("w") as mf:
+        print(f"opening {asr_manifest_path} for writing manifest")
+        for mani_dict in manifest_str_source:
+            manifest = manifest_str(
+                mani_dict["audio_filepath"], mani_dict["duration"], mani_dict["text"]
+            )
+            mf.write(manifest)
+
+
+def asr_test_writer(out_file_path: Path, source):
+    def dd_str(dd, idx):
+        path = dd["audio_filepath"]
+        # dur = dd["duration"]
+        # return f"SAY {idx}\nPAUSE 3\nPLAY {path}\nPAUSE 3\n\n"
+        return f"PAUSE 2\nPLAY {path}\nPAUSE 60\n\n"
+
+    res_file = out_file_path.with_suffix(".result.json")
+    with out_file_path.open("w") as of:
+        print(f"opening {out_file_path} for writing test")
+        results = []
+        idx = 0
+        for ui_dd in source:
+            results.append(ui_dd)
+            out_str = dd_str(ui_dd, idx)
+            of.write(out_str)
+            idx += 1
+        of.write("DO_HANGUP\n")
+        ExtendedPath(res_file).write_json(results)
+
+
+def batch(iterable, n=1):
+    ls = len(iterable)
+    return [iterable[ndx : min(ndx + n, ls)] for ndx in range(0, ls, n)]
+
+
+class ExtendedPath(type(Path())):
+    """docstring for ExtendedPath."""
+
+    def read_json(self):
+        print(f"reading json from {self}")
+        with self.open("r") as jf:
+            return json.load(jf)
+
+    def read_yaml(self):
+        yaml = YAML(typ="safe", pure=True)
+        print(f"reading yaml from {self}")
+        with self.open("r") as yf:
+            return yaml.load(yf)
+
+    def read_jsonl(self):
+        print(f"reading jsonl from {self}")
+        with self.open("r") as jf:
+            for l in jf.readlines():
+                yield json.loads(l)
+
+    def write_json(self, data):
+        print(f"writing json to {self}")
+        self.parent.mkdir(parents=True, exist_ok=True)
+        with self.open("w") as jf:
+            json.dump(data, jf, indent=2)
+
+    def write_yaml(self, data):
+        yaml = YAML()
+        print(f"writing yaml to {self}")
+        with self.open("w") as yf:
+            yaml.dump(data, yf)
+
+    def write_jsonl(self, data):
+        print(f"writing jsonl to {self}")
+        self.parent.mkdir(parents=True, exist_ok=True)
+        with self.open("w") as jf:
+            for d in data:
+                jf.write(json.dumps(d) + "\n")
+
+
+def get_mongo_coll(uri):
+    ud = pymongo.uri_parser.parse_uri(uri)
+    conn = pymongo.MongoClient(uri)
+    return conn[ud["database"]][ud["collection"]]
+
+
+def get_mongo_conn(host="", port=27017, db="db", col="collection"):
+    mongo_host = host if host else os.environ.get("MONGO_HOST", "localhost")
+    mongo_uri = f"mongodb://{mongo_host}:{port}/"
+    return pymongo.MongoClient(mongo_uri)[db][col]
+
+
+def strip_silence(sound):
+    from pydub.silence import detect_leading_silence
+
+    start_trim = detect_leading_silence(sound)
+    end_trim = detect_leading_silence(sound.reverse())
+    duration = len(sound)
+    return sound[start_trim : duration - end_trim]
+
+
+def plot_seg(wav_plot_path, audio_path):
+    fig = plt.Figure()
+    ax = fig.add_subplot()
+    (y, sr) = librosa.load(str(audio_path))
+    audio_display.waveplot(y=y, sr=sr, ax=ax)
+    with wav_plot_path.open("wb") as wav_plot_f:
+        fig.set_tight_layout(True)
+        fig.savefig(wav_plot_f, format="png", dpi=50)
+
+
+def parallel_apply(fn, iterable, workers=8, pool="thread"):
+    if pool == "thread":
+        with ThreadPoolExecutor(max_workers=workers) as exe:
+            print(f"parallelly applying {fn}")
+            return [
+                res
+                for res in tqdm(
+                    exe.map(fn, iterable), position=0, leave=True, total=len(iterable)
+                )
+            ]
+    elif pool == "process":
+        with ProcessPoolExecutor(max_workers=workers) as exe:
+            print(f"parallelly applying {fn}")
+            return [
+                res
+                for res in tqdm(
+                    exe.map(fn, iterable), position=0, leave=True, total=len(iterable)
+                )
+            ]
+    else:
+        raise Exception(f"unsupported pool type - {pool}")
+
+
+def generate_filter_map(src_dataset_path, dest_dataset_path, data_file):
+    min_nums = 3
+    max_duration = 1 * 60 * 60
+    skip_duration = 1 * 60 * 60
+
+    def filtered_max_dur():
+        wav_duration = 0
+        for s in ExtendedPath(data_file).read_jsonl():
+            nums = re.sub(" ", "", s["text"])
+            if len(nums) >= min_nums:
+                wav_duration += s["duration"]
+                shutil.copy(
+                    src_dataset_path / Path(s["audio_filepath"]),
+                    dest_dataset_path / Path(s["audio_filepath"]),
+                )
+                yield s
+            if wav_duration > max_duration:
+                break
+        typer.echo(f"filtered only {duration_str(wav_duration)} of audio")
+
+    def filtered_skip_dur():
+        wav_duration = 0
+        for s in ExtendedPath(data_file).read_jsonl():
+            nums = re.sub(" ", "", s["text"])
+            if len(nums) >= min_nums:
+                wav_duration += s["duration"]
+            if wav_duration <= skip_duration:
+                continue
+            elif len(nums) >= min_nums:
+                yield s
+                shutil.copy(
+                    src_dataset_path / Path(s["audio_filepath"]),
+                    dest_dataset_path / Path(s["audio_filepath"]),
+                )
+        typer.echo(f"skipped {duration_str(skip_duration)} of audio")
+
+    def filtered_blanks():
+        blank_count = 0
+        for s in ExtendedPath(data_file).read_jsonl():
+            nums = re.sub(" ", "", s["text"])
+            if nums != "":
+                blank_count += 1
+                shutil.copy(
+                    src_dataset_path / Path(s["audio_filepath"]),
+                    dest_dataset_path / Path(s["audio_filepath"]),
+                )
+                yield s
+        typer.echo(f"filtered {blank_count} blank samples")
+
+    def filtered_transform_digits():
+        count = 0
+        for s in ExtendedPath(data_file).read_jsonl():
+            count += 1
+            digit_text = replace_digit_symbol(s["text"])
+            only_digits = discard_except_digits(digit_text)
+            char_text = digits_to_chars(only_digits)
+            shutil.copy(
+                src_dataset_path / Path(s["audio_filepath"]),
+                dest_dataset_path / Path(s["audio_filepath"]),
+            )
+            s["text"] = char_text
+            yield s
+        typer.echo(f"transformed {count} samples")
+
+    def filtered_extract_chars():
+        count = 0
+        for s in ExtendedPath(data_file).read_jsonl():
+            count += 1
+            no_digits = digits_to_chars(s["text"]).upper()
+            only_chars = re.sub("[^A-Z'\b]", " ", no_digits)
+            filter_text = replace_redundant_spaces_with(only_chars, " ").strip()
+            shutil.copy(
+                src_dataset_path / Path(s["audio_filepath"]),
+                dest_dataset_path / Path(s["audio_filepath"]),
+            )
+            s["text"] = filter_text
+            yield s
+        typer.echo(f"transformed {count} samples")
+
+    def filtered_resample():
+        count = 0
+        for s in ExtendedPath(data_file).read_jsonl():
+            count += 1
+            src_aud = pydub.AudioSegment.from_file(
+                src_dataset_path / Path(s["audio_filepath"])
+            )
+            dst_aud = src_aud.set_channels(1).set_sample_width(1).set_frame_rate(24000)
+            dst_aud.export(dest_dataset_path / Path(s["audio_filepath"]), format="wav")
+            yield s
+        typer.echo(f"transformed {count} samples")
+
+    filter_kind_map = {
+        "max_dur_1hr_min3num": filtered_max_dur,
+        "skip_dur_1hr_min3num": filtered_skip_dur,
+        "blanks": filtered_blanks,
+        "transform_digits": filtered_transform_digits,
+        "extract_chars": filtered_extract_chars,
+        "resample_ulaw24kmono": filtered_resample,
+    }
+    return filter_kind_map
--- a/plume/utils/align.py
+++ b/plume/utils/align.py
@@ -0,0 +1,117 @@
+from pathlib import Path
+from .tts import GoogleTTS
+# from IPython import display
+import requests
+import io
+import typer
+
+from plume.utils import lazy_module
+
+display = lazy_module('IPython.display')
+pydub = lazy_module('pydub')
+
+app = typer.Typer()
+
+# Start gentle with following command
+# docker run --rm -d  --name gentle_service -p 8765:8765/tcp lowerquality/gentle
+
+
+def gentle_aligner(service_uri, wav_data, utter_text):
+    # service_uri= "http://52.41.161.36:8765/transcriptions"
+    wav_f = io.BytesIO(wav_data)
+    wav_seg = pydub.AudioSegment.from_file(wav_f)
+
+    mp3_f = io.BytesIO()
+    wav_seg.export(mp3_f, format="mp3")
+    mp3_f.seek(0)
+    params = (("async", "false"),)
+    files = {
+        "audio": ("audio.mp3", mp3_f),
+        "transcript": ("words.txt", io.BytesIO(utter_text.encode("utf-8"))),
+    }
+
+    response = requests.post(service_uri, params=params, files=files)
+    print(f"Time duration of audio {wav_seg.duration_seconds}")
+    print(f"Time taken to align: {response.elapsed}s")
+    return wav_seg, response.json()
+
+
+def gentle_align_iter(service_uri, wav_data, utter_text):
+    wav_seg, response = gentle_aligner(service_uri, wav_data, utter_text)
+    for span in response:
+        word_seg = wav_seg[int(span["start"] * 1000) : int(span["end"] * 1000)]
+        word = span["word"]
+        yield (word, word_seg)
+
+
+def tts_jupyter():
+    google_voices = GoogleTTS.voice_list()
+    gtts = GoogleTTS()
+    # google_voices[4]
+    us_voice = [v for v in google_voices if v["language"] == "en-US"][0]
+    utter_text = (
+        "I would like to align the audio segments based on word level timestamps"
+    )
+    wav_data = gtts.text_to_speech(text=utter_text, params=us_voice)
+    for word, seg in gentle_align_iter(wav_data, utter_text):
+        print(word)
+        display.display(seg)
+
+
+@app.command()
+def cut(audio_path: Path, transcript_path: Path, out_dir: Path = "/tmp"):
+    from . import ExtendedPath
+    import datetime
+    import re
+
+    aud_seg = pydub.AudioSegment.from_file(audio_path)
+    aud_seg[: 15 * 60 * 1000].export(out_dir / Path("audio.mp3"), format="mp3")
+    tscript_json = ExtendedPath(transcript_path).read_json()
+
+    def time_to_msecs(time_str):
+        return (
+            datetime.datetime.strptime(time_str, "%H:%M:%S,%f")
+            - datetime.datetime(1900, 1, 1)
+        ).total_seconds() * 1000
+
+    tscript_words = []
+    broken = False
+    for m in tscript_json["monologues"]:
+        # tscript_words.append("|")
+        for e in m["elements"]:
+            if e["type"] == "text":
+                text = e["value"]
+                text = re.sub(r"\[.*\]", "", text)
+                text = re.sub(r"\(.*\)", "", text)
+                tscript_words.append(text)
+            if "timestamp" in e and time_to_msecs(e["timestamp"]) >= 15 * 60 * 1000:
+                broken = True
+                break
+        if broken:
+            break
+    (out_dir / Path("words.txt")).write_text("".join(tscript_words))
+
+
+@app.command()
+def gentle_preview(
+    audio_path: Path,
+    transcript_path: Path,
+    service_uri="http://101.53.142.218:8765/transcriptions",
+    gent_preview_dir="../gentle_preview",
+):
+    from . import ExtendedPath
+
+    ab = audio_path.read_bytes()
+    tt = transcript_path.read_text()
+    audio, alignment = gentle_aligner(service_uri, ab, tt)
+    audio.export(gent_preview_dir / Path("a.wav"), format="wav")
+    alignment["status"] = "OK"
+    ExtendedPath(gent_preview_dir / Path("status.json")).write_json(alignment)
+
+
+def main():
+    app()
+
+
+if __name__ == "__main__":
+    main()
--- a/plume/utils/audio.py
+++ b/plume/utils/audio.py
@@ -0,0 +1,28 @@
+from scipy.signal import lfilter, butter
+from scipy.io.wavfile import read, write
+from numpy import array, int16
+import sys
+
+
+def butter_params(low_freq, high_freq, fs, order=5):
+    nyq = 0.5 * fs
+    low = low_freq / nyq
+    high = high_freq / nyq
+    b, a = butter(order, [low, high], btype="band")
+    return b, a
+
+
+def butter_bandpass_filter(data, low_freq, high_freq, fs, order=5):
+    b, a = butter_params(low_freq, high_freq, fs, order=order)
+    y = lfilter(b, a, data)
+    return y
+
+
+if __name__ == "__main__":
+    fs, audio = read(sys.argv[1])
+    import pdb; pdb.set_trace()
+    low_freq = 300.0
+    high_freq = 4000.0
+    filtered_signal = butter_bandpass_filter(audio, low_freq, high_freq, fs, order=6)
+    fname = sys.argv[1].split(".wav")[0] + "_moded.wav"
+    write(fname, fs, array(filtered_signal, dtype=int16))
--- a/plume/utils/lazy_import.py
+++ b/plume/utils/lazy_import.py
@@ -0,0 +1,737 @@
+# -*- Mode: python; tab-width: 4; indent-tabs-mode:nil; coding:utf-8 -*-
+# vim: tabstop=4 expandtab shiftwidth=4 softtabstop=4
+#
+# lazy_import --- https://github.com/mnmelo/lazy_import
+# Copyright (C) 2017-2018 Manuel Nuno Melo
+#
+# This file is part of lazy_import.
+#
+#  lazy_import is free software: you can redistribute it and/or modify
+#  it under the terms of the GNU General Public License as published by
+#  the Free Software Foundation, either version 3 of the License, or
+#  (at your option) any later version.
+#
+#  lazy_import is distributed in the hope that it will be useful,
+#  but WITHOUT ANY WARRANTY; without even the implied warranty of
+#  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+#  GNU General Public License for more details.
+#
+#  You should have received a copy of the GNU General Public License
+#  along with lazy_import.  If not, see <http://www.gnu.org/licenses/>.
+#
+# lazy_import was based on code from the importing module from the PEAK
+# package (see <http://peak.telecommunity.com/DevCenter/Importing>). The PEAK
+# package is released under the following license, reproduced here:
+#
+#  Copyright (C) 1996-2004 by Phillip J. Eby and Tyler C. Sarna.
+#  All rights reserved.  This software may be used under the same terms
+#  as Zope or Python.  THERE ARE ABSOLUTELY NO WARRANTIES OF ANY KIND.
+#  Code quality varies between modules, from "beta" to "experimental
+#  pre-alpha".  :)
+#
+# Code pertaining to lazy loading from PEAK importing was included in
+# lazy_import, modified in a number of ways. These are detailed in the
+# CHANGELOG file of lazy_import. Changes mainly involved Python 3
+# compatibility, extension to allow customizable behavior, and added
+# functionality (lazy importing of callable objects).
+#
+
+"""
+Lazy module loading
+===================
+Functions and classes for lazy module loading that also delay import errors.
+Heavily borrowed from the `importing`_ module.
+.. _`importing`: http://peak.telecommunity.com/DevCenter/Importing
+Files and directories
+---------------------
+.. autofunction:: module
+.. autofunction:: callable
+"""
+
+__all__ = [
+    "lazy_module",
+    "lazy_callable",
+    "lazy_function",
+    "lazy_class",
+    "LazyModule",
+    "LazyCallable",
+    "module_basename",
+    "_MSG",
+    "_MSG_CALLABLE",
+]
+
+from types import ModuleType
+import sys
+
+try:
+    from importlib._bootstrap import _ImportLockContext
+except ImportError:
+    # Python 2 doesn't have the context manager. Roll it ourselves (copied from
+    # Python 3's importlib/_bootstrap.py)
+    import imp
+
+    class _ImportLockContext:
+        """Context manager for the import lock."""
+
+        def __enter__(self):
+            imp.acquire_lock()
+
+        def __exit__(self, exc_type, exc_value, exc_traceback):
+            imp.release_lock()
+
+
+# Adding a __spec__ doesn't really help. I'll leave the code here in case
+# future python implementations start relying on it.
+# try:
+#    from importlib.machinery import ModuleSpec
+# except ImportError:
+#    ModuleSpec = None
+
+import six
+from six import raise_from
+from six.moves import reload_module
+
+# It is sometime useful to have access to the version number of a library.
+# This is usually done through the __version__ special attribute.
+# To make sure the version number is consistent between setup.py and the
+# library, we read the version number from the file called VERSION that stays
+# in the module directory.
+import os
+
+# VERSION_FILE = os.path.join(os.path.dirname(__file__), "VERSION")
+# with open(VERSION_FILE) as infile:
+#     __version__ = infile.read().strip()
+
+# Logging
+import logging
+
+# adding a TRACE level for stack debugging
+_LAZY_TRACE = 1
+logging.addLevelName(1, "LAZY_TRACE")
+logging.basicConfig(level=logging.WARNING)
+# Logs a formatted stack (takes no message or args/kwargs)
+def _lazy_trace(self):
+    if self.isEnabledFor(_LAZY_TRACE):
+        import traceback
+
+        self._log(_LAZY_TRACE, " ### STACK TRACE ###", ())
+        for line in traceback.format_stack(sys._getframe(2)):
+            for subline in line.split("\n"):
+                self._log(_LAZY_TRACE, subline.rstrip(), ())
+
+
+logging.Logger.lazy_trace = _lazy_trace
+logger = logging.getLogger(__name__)
+
+################################
+# Module/function registration #
+################################
+
+#### Lazy classes ####
+
+
+class LazyModule(ModuleType):
+    """Class for lazily-loaded modules that triggers proper loading on access.
+    Instantiation should be made from a subclass of :class:`LazyModule`, with
+    one subclass per instantiated module. Regular attribute set/access can then
+    be recovered by setting the subclass's :meth:`__getattribute__` and
+    :meth:`__setattribute__` to those of :class:`types.ModuleType`.
+    """
+
+    # peak.util.imports sets __slots__ to (), but it seems pointless because
+    # the base ModuleType doesn't itself set __slots__.
+    def __getattribute__(self, attr):
+        logger.debug(
+            "Getting attr {} of LazyModule instance of {}".format(
+                attr, super(LazyModule, self).__getattribute__("__name__")
+            )
+        )
+        logger.lazy_trace()
+        # IPython tries to be too clever and constantly inspects, asking for
+        #  modules' attrs, which causes premature module loading and unesthetic
+        #  internal errors if the lazily-loaded module doesn't exist.
+        if (
+            run_from_ipython()
+            and (attr.startswith(("__", "_ipython")) or attr == "_repr_mimebundle_")
+            and module_basename(_caller_name()) in ("inspect", "IPython")
+        ):
+            logger.debug(
+                "Ignoring request for {}, deemed from IPython's "
+                "inspection.".format(
+                    super(LazyModule, self).__getattribute__("__name__"), attr
+                )
+            )
+            raise AttributeError
+        if not attr in ("__name__", "__class__", "__spec__"):
+            # __name__ and __class__ yield their values from the LazyModule;
+            # __spec__ causes an AttributeError. Maybe in the future it will be
+            # necessary to return an actual ModuleSpec object, but it works as
+            # it is without that now.
+
+            # If it's an already-loaded submodule, we return it without
+            # triggering a full loading
+            try:
+                return sys.modules[self.__name__ + "." + attr]
+            except KeyError:
+                pass
+            # Check if it's one of the lazy callables
+            try:
+                _callable = type(self)._lazy_import_callables[attr]
+                logger.debug("Returning lazy-callable '{}'.".format(attr))
+                return _callable
+            except (AttributeError, KeyError) as err:
+                logger.debug(
+                    "Proceeding to load module {}, "
+                    "from requested value {}".format(
+                        super(LazyModule, self).__getattribute__("__name__"), attr
+                    )
+                )
+                _load_module(self)
+        logger.debug(
+            "Returning value '{}'.".format(
+                super(LazyModule, self).__getattribute__(attr)
+            )
+        )
+        return super(LazyModule, self).__getattribute__(attr)
+
+    def __setattr__(self, attr, value):
+        logger.debug(
+            "Setting attr {} to value {}, in LazyModule instance "
+            "of {}".format(
+                attr, value, super(LazyModule, self).__getattribute__("__name__")
+            )
+        )
+        _load_module(self)
+        return super(LazyModule, self).__setattr__(attr, value)
+
+
+class LazyCallable(object):
+    """Class for lazily-loaded callables that triggers module loading on access
+    """
+
+    def __init__(self, *args):
+        if len(args) != 2:
+            # Maybe the user tried to base a class off this lazy callable?
+            try:
+                logger.debug(
+                    "Got wrong number of args when init'ing "
+                    "LazyCallable. args is '{}'".format(args)
+                )
+                base = args[1][0]
+                if isinstance(base, LazyCallable) and len(args) == 3:
+                    raise NotImplementedError(
+                        "It seems you are trying to use "
+                        "a lazy callable as a class "
+                        "base. This is not supported."
+                    )
+            except (IndexError, TypeError):
+                raise_from(
+                    TypeError(
+                        "LazyCallable takes exactly 2 arguments: "
+                        "a module/lazy module object and the name of "
+                        "a callable to be lazily loaded."
+                    ),
+                    None,
+                )
+        self.module, self.cname = args
+        self.modclass = type(self.module)
+        self.callable = None
+        # Need to save these, since the module-loading gets rid of them
+        self.error_msgs = self.modclass._lazy_import_error_msgs
+        self.error_strings = self.modclass._lazy_import_error_strings
+
+    def __call__(self, *args, **kwargs):
+        # No need to go through all the reloading more than once.
+        if self.callable:
+            return self.callable(*args, **kwargs)
+        try:
+            del self.modclass._lazy_import_callables[self.cname]
+        except (AttributeError, KeyError):
+            pass
+        try:
+            self.callable = getattr(self.module, self.cname)
+        except AttributeError:
+            msg = self.error_msgs["msg_callable"]
+            raise_from(
+                AttributeError(msg.format(callable=self.cname, **self.error_strings)),
+                None,
+            )
+        except ImportError as err:
+            # Import failed. We reset the dict and re-raise the ImportError.
+            try:
+                self.modclass._lazy_import_callables[self.cname] = self
+            except AttributeError:
+                self.modclass._lazy_import_callables = {self.cname: self}
+            raise_from(err, None)
+        else:
+            return self.callable(*args, **kwargs)
+
+
+### Functions ###
+
+
+def lazy_module(modname, error_strings=None, lazy_mod_class=LazyModule, level="leaf"):
+    """Function allowing lazy importing of a module into the namespace.
+    A lazy module object is created, registered in `sys.modules`, and
+    returned. This is a hollow module; actual loading, and `ImportErrors` if
+    not found, are delayed until an attempt is made to access attributes of the
+    lazy module.
+    A handy application is to use :func:`lazy_module` early in your own code
+    (say, in `__init__.py`) to register all modulenames you want to be lazy.
+    Because of registration in `sys.modules` later invocations of
+    `import modulename` will also return the lazy object. This means that after
+    initial registration the rest of your code can use regular pyhon import
+    statements and retain the lazyness of the modules.
+    Parameters
+    ----------
+    modname : str
+         The module to import.
+    error_strings : dict, optional
+         A dictionary of strings to use when module-loading fails. Key 'msg'
+         sets the message to use (defaults to :attr:`lazy_import._MSG`). The
+         message is formatted using the remaining dictionary keys. The default
+         message informs the user of which module is missing (key 'module'),
+         what code loaded the module as lazy (key 'caller'), and which package
+         should be installed to solve the dependency (key 'install_name').
+         None of the keys is mandatory and all are given smart names by default.
+    lazy_mod_class: type, optional
+         Which class to use when instantiating the lazy module, to allow
+         deep customization. The default is :class:`LazyModule` and custom
+         alternatives **must** be a subclass thereof.
+    level : str, optional
+         Which submodule reference to return. Either a reference to the 'leaf'
+         module (the default) or to the 'base' module. This is useful if you'll
+         be using the module functionality in the same place you're calling
+         :func:`lazy_module` from, since then you don't need to run `import`
+         again. Setting *level* does not affect which names/modules get
+         registered in `sys.modules`.
+         For *level* set to 'base' and *modulename* 'aaa.bbb.ccc'::
+            aaa = lazy_import.lazy_module("aaa.bbb.ccc", level='base')
+            # 'aaa' becomes defined in the current namespace, with
+            #  (sub)attributes 'aaa.bbb' and 'aaa.bbb.ccc'.
+            # It's the lazy equivalent to:
+            import aaa.bbb.ccc
+        For *level* set to 'leaf'::
+            ccc = lazy_import.lazy_module("aaa.bbb.ccc", level='leaf')
+            # Only 'ccc' becomes set in the current namespace.
+            # Lazy equivalent to:
+            from aaa.bbb import ccc
+    Returns
+    -------
+    module
+        The module specified by *modname*, or its base, depending on *level*.
+        The module isn't immediately imported. Instead, an instance of
+        *lazy_mod_class* is returned. Upon access to any of its attributes, the
+        module is finally loaded.
+    Examples
+    --------
+    >>> import lazy_import, sys
+    >>> np = lazy_import.lazy_module("numpy")
+    >>> np
+    Lazily-loaded module numpy
+    >>> np is sys.modules['numpy']
+    True
+    >>> np.pi # This causes the full loading of the module ...
+    3.141592653589793
+    >>> np # ... and the module is changed in place.
+    <module 'numpy' from '/usr/local/lib/python/site-packages/numpy/__init__.py'>
+    >>> import lazy_import, sys
+    >>> # The following succeeds even when asking for a module that's not available
+    >>> missing = lazy_import.lazy_module("missing_module")
+    >>> missing
+    Lazily-loaded module missing_module
+    >>> missing is sys.modules['missing_module']
+    True
+    >>> missing.some_attr # This causes the full loading of the module, which now fails.
+    ImportError: __main__ attempted to use a functionality that requires module missing_module, but it couldn't be loaded. Please install missing_module and retry.
+    See Also
+    --------
+    :func:`lazy_callable`
+    :class:`LazyModule`
+    """
+    if error_strings is None:
+        error_strings = {}
+    _set_default_errornames(modname, error_strings)
+
+    mod = _lazy_module(modname, error_strings, lazy_mod_class)
+    if level == "base":
+        return sys.modules[module_basename(modname)]
+    elif level == "leaf":
+        return mod
+    else:
+        raise ValueError("Parameter 'level' must be one of ('base', 'leaf')")
+
+
+def _lazy_module(modname, error_strings, lazy_mod_class):
+    with _ImportLockContext():
+        fullmodname = modname
+        fullsubmodname = None
+        # ensure parent module/package is in sys.modules
+        # and parent.modname=module, as soon as the parent is imported
+        while modname:
+            try:
+                mod = sys.modules[modname]
+                # We reached a (base) module that's already loaded. Let's stop
+                # the cycle. Can't use 'break' because we still want to go
+                # through the fullsubmodname check below.
+                modname = ""
+            except KeyError:
+                err_s = error_strings.copy()
+                err_s.setdefault("module", modname)
+
+                class _LazyModule(lazy_mod_class):
+                    _lazy_import_error_msgs = {"msg": err_s.pop("msg")}
+                    try:
+                        _lazy_import_error_msgs["msg_callable"] = err_s.pop(
+                            "msg_callable"
+                        )
+                    except KeyError:
+                        pass
+                    _lazy_import_error_strings = err_s
+                    _lazy_import_callables = {}
+                    _lazy_import_submodules = {}
+
+                    def __repr__(self):
+                        return "Lazily-loaded module {}".format(self.__name__)
+
+                # A bit of cosmetic, to make AttributeErrors read more natural
+                _LazyModule.__name__ = "module"
+                # Actual module instantiation
+                mod = sys.modules[modname] = _LazyModule(modname)
+                # No need for __spec__. Maybe in the future.
+                # if ModuleSpec:
+                #    ModuleType.__setattr__(mod, '__spec__',
+                #            ModuleSpec(modname, None))
+            if fullsubmodname:
+                submod = sys.modules[fullsubmodname]
+                ModuleType.__setattr__(mod, submodname, submod)
+                _LazyModule._lazy_import_submodules[submodname] = submod
+            fullsubmodname = modname
+            modname, _, submodname = modname.rpartition(".")
+        return sys.modules[fullmodname]
+
+
+def lazy_callable(modname, *names, **kwargs):
+    """Performs lazy importing of one or more callables.
+    :func:`lazy_callable` creates functions that are thin wrappers that pass
+    any and all arguments straight to the target module's callables. These can
+    be functions or classes. The full loading of that module is only actually
+    triggered when the returned lazy function itself is called. This lazy
+    import of the target module uses the same mechanism as
+    :func:`lazy_module`.
+
+    If, however, the target module has already been fully imported prior
+    to invocation of :func:`lazy_callable`, then the target callables
+    themselves are returned and no lazy imports are made.
+    :func:`lazy_function` and :func:`lazy_function` are aliases of
+    :func:`lazy_callable`.
+    Parameters
+    ----------
+    modname : str
+         The base module from where to import the callable(s) in *names*,
+         or a full 'module_name.callable_name' string.
+    names : str (optional)
+         The callable name(s) to import from the module specified by *modname*.
+         If left empty, *modname* is assumed to also include the callable name
+         to import.
+    error_strings : dict, optional
+         A dictionary of strings to use when reporting loading errors (either a
+         missing module, or a missing callable name in the loaded module).
+         *error_string* follows the same usage as described under
+         :func:`lazy_module`, with the exceptions that 1) a further key,
+         'msg_callable', can be supplied to be used as the error when a module
+         is successfully loaded but the target callable can't be found therein
+         (defaulting to :attr:`lazy_import._MSG_CALLABLE`); 2) a key 'callable'
+         is always added with the callable name being loaded.
+    lazy_mod_class : type, optional
+         See definition under :func:`lazy_module`.
+    lazy_call_class : type, optional
+         Analogously to *lazy_mod_class*, allows setting a custom class to
+         handle lazy callables, other than the default :class:`LazyCallable`.
+    Returns
+    -------
+    wrapper function or tuple of wrapper functions
+        If *names* is passed, returns a tuple of wrapper functions, one for
+        each element in *names*.
+        If only *modname* is passed it is assumed to be a full
+        'module_name.callable_name' string, in which case the wrapper for the
+        imported callable is returned directly, and not in a tuple.
+
+    Notes
+    -----
+    Unlike :func:`lazy_module`, which returns a lazy module that eventually
+    mutates into the fully-functional version, :func:`lazy_callable` only
+    returns thin wrappers that never change. This means that the returned
+    wrapper object never truly becomes the one under the module's namespace,
+    even after successful loading of the module in *modname*. This is fine for
+    most practical use cases, but may break code that relies on the usage of
+    the returned objects oter than calling them. One such example is the lazy
+    import of a class: it's fine to use the returned wrapper to instantiate an
+    object, but it can't be used, for instance, to subclass from.
+    Examples
+    --------
+    >>> import lazy_import, sys
+    >>> fn = lazy_import.lazy_callable("numpy.arange")
+    >>> sys.modules['numpy']
+    Lazily-loaded module numpy
+    >>> fn(10)
+    array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])
+    >>> sys.modules['numpy']
+    <module 'numpy' from '/usr/local/lib/python3.5/site-packages/numpy/__init__.py'>
+    >>> import lazy_import, sys
+    >>> cl = lazy_import.lazy_callable("numpy.ndarray") # a class
+    >>> obj = cl([1, 2]) # This works OK (and also triggers the loading of numpy)
+    >>> class MySubclass(cl): # This fails because cls is just a wrapper,
+    >>>     pass              #  not an actual class.
+    See Also
+    --------
+    :func:`lazy_module`
+    :class:`LazyCallable`
+    :class:`LazyModule`
+    """
+    if not names:
+        modname, _, name = modname.rpartition(".")
+    lazy_mod_class = _setdef(kwargs, "lazy_mod_class", LazyModule)
+    lazy_call_class = _setdef(kwargs, "lazy_call_class", LazyCallable)
+    error_strings = _setdef(kwargs, "error_strings", {})
+    _set_default_errornames(modname, error_strings, call=True)
+
+    if not names:
+        # We allow passing a single string as 'modname.callable_name',
+        # in which case the wrapper is returned directly and not as a list.
+        return _lazy_callable(
+            modname, name, error_strings.copy(), lazy_mod_class, lazy_call_class
+        )
+    return tuple(
+        _lazy_callable(
+            modname, cname, error_strings.copy(), lazy_mod_class, lazy_call_class
+        )
+        for cname in names
+    )
+
+
+lazy_function = lazy_class = lazy_callable
+
+
+def _lazy_callable(modname, cname, error_strings, lazy_mod_class, lazy_call_class):
+    # We could do most of this in the LazyCallable __init__, but here we can
+    # pre-check whether to actually be lazy or not.
+    module = _lazy_module(modname, error_strings, lazy_mod_class)
+    modclass = type(module)
+    if issubclass(modclass, LazyModule) and hasattr(modclass, "_lazy_import_callables"):
+        modclass._lazy_import_callables.setdefault(
+            cname, lazy_call_class(module, cname)
+        )
+    return getattr(module, cname)
+
+
+#######################
+# Real module loading #
+#######################
+
+
+def _load_module(module):
+    """Ensures that a module, and its parents, are properly loaded
+    """
+    modclass = type(module)
+    # We only take care of our own LazyModule instances
+    if not issubclass(modclass, LazyModule):
+        raise TypeError("Passed module is not a LazyModule instance.")
+    with _ImportLockContext():
+        parent, _, modname = module.__name__.rpartition(".")
+        logger.debug("loading module {}".format(modname))
+        # We first identify whether this is a loadable LazyModule, then we
+        # strip as much of lazy_import behavior as possible (keeping it cached,
+        # in case loading fails and we need to reset the lazy state).
+        if not hasattr(modclass, "_lazy_import_error_msgs"):
+            # Alreay loaded (no _lazy_import_error_msgs attr). Not reloading.
+            return
+        # First, ensure the parent is loaded (using recursion; *very* unlikely
+        # we'll ever hit a stack limit in this case).
+        modclass._LOADING = True
+        try:
+            if parent:
+                logger.debug("first loading parent module {}".format(parent))
+                setattr(sys.modules[parent], modname, module)
+            if not hasattr(modclass, "_LOADING"):
+                logger.debug("Module {} already loaded by the parent".format(modname))
+                # We've been loaded by the parent. Let's bail.
+                return
+            cached_data = _clean_lazymodule(module)
+            try:
+                # Get Python to do the real import!
+                reload_module(module)
+            except:
+                # Loading failed. We reset our lazy state.
+                logger.debug("Failed to load module {}. Resetting...".format(modname))
+                _reset_lazymodule(module, cached_data)
+                raise
+            else:
+                # Successful load
+                logger.debug("Successfully loaded module {}".format(modname))
+                delattr(modclass, "_LOADING")
+                _reset_lazy_submod_refs(module)
+
+        except (AttributeError, ImportError) as err:
+            logger.debug(
+                "Failed to load {}.\n{}: {}".format(
+                    modname, err.__class__.__name__, err
+                )
+            )
+            logger.lazy_trace()
+            # Under Python 3 reloading our dummy LazyModule instances causes an
+            # AttributeError if the module can't be found. Would be preferrable
+            # if we could always rely on an ImportError. As it is we vet the
+            # AttributeError as thoroughly as possible.
+            if (six.PY3 and isinstance(err, AttributeError)) and not err.args[
+                0
+            ] == "'NoneType' object has no attribute 'name'":
+                # Not the AttributeError we were looking for.
+                raise
+            msg = modclass._lazy_import_error_msgs["msg"]
+            raise_from(
+                ImportError(msg.format(**modclass._lazy_import_error_strings)), None
+            )
+
+
+##############################
+# Helper functions/constants #
+##############################
+
+_MSG = (
+    "{caller} attempted to use a functionality that requires module "
+    "{module}, but it couldn't be loaded. Please install {install_name} "
+    "and retry."
+)
+
+_MSG_CALLABLE = (
+    "{caller} attempted to use a functionality that requires "
+    "{callable}, of module {module}, but it couldn't be found in that "
+    "module. Please install a version of {install_name} that has "
+    "{module}.{callable} and retry."
+)
+
+_CLS_ATTRS = (
+    "_lazy_import_error_strings",
+    "_lazy_import_error_msgs",
+    "_lazy_import_callables",
+    "_lazy_import_submodules",
+    "__repr__",
+)
+
+_DELETION_DICT = ("_lazy_import_submodules",)
+
+
+def _setdef(argdict, name, defaultvalue):
+    """Like dict.setdefault but sets the default value also if None is present.
+    """
+    if not name in argdict or argdict[name] is None:
+        argdict[name] = defaultvalue
+    return argdict[name]
+
+
+def module_basename(modname):
+    return modname.partition(".")[0]
+
+
+def _set_default_errornames(modname, error_strings, call=False):
+    # We don't set the modulename default here because it will change for
+    # parents of lazily imported submodules.
+    error_strings.setdefault("caller", _caller_name(3, default="Python"))
+    error_strings.setdefault("install_name", module_basename(modname))
+    error_strings.setdefault("msg", _MSG)
+    if call:
+        error_strings.setdefault("msg_callable", _MSG_CALLABLE)
+
+
+def _caller_name(depth=2, default=""):
+    """Returns the name of the calling namespace.
+    """
+    # the presence of sys._getframe might be implementation-dependent.
+    # It isn't that serious if we can't get the caller's name.
+    try:
+        return sys._getframe(depth).f_globals["__name__"]
+    except AttributeError:
+        return default
+
+
+def _clean_lazymodule(module):
+    """Removes all lazy behavior from a module's class, for loading.
+    Also removes all module attributes listed under the module's class deletion
+    dictionaries. Deletion dictionaries are class attributes with names
+    specified in `_DELETION_DICT`.
+    Parameters
+    ----------
+    module: LazyModule
+    Returns
+    -------
+    dict
+        A dictionary of deleted class attributes, that can be used to reset the
+        lazy state using :func:`_reset_lazymodule`.
+    """
+    modclass = type(module)
+    _clean_lazy_submod_refs(module)
+
+    modclass.__getattribute__ = ModuleType.__getattribute__
+    modclass.__setattr__ = ModuleType.__setattr__
+    cls_attrs = {}
+    for cls_attr in _CLS_ATTRS:
+        try:
+            cls_attrs[cls_attr] = getattr(modclass, cls_attr)
+            delattr(modclass, cls_attr)
+        except AttributeError:
+            pass
+    return cls_attrs
+
+
+def _clean_lazy_submod_refs(module):
+    modclass = type(module)
+    for deldict in _DELETION_DICT:
+        try:
+            delnames = getattr(modclass, deldict)
+        except AttributeError:
+            continue
+        for delname in delnames:
+            try:
+                super(LazyModule, module).__delattr__(delname)
+            except AttributeError:
+                # Maybe raise a warning?
+                pass
+
+
+def _reset_lazymodule(module, cls_attrs):
+    """Resets a module's lazy state from cached data.
+    """
+    modclass = type(module)
+    del modclass.__getattribute__
+    del modclass.__setattr__
+    try:
+        del modclass._LOADING
+    except AttributeError:
+        pass
+    for cls_attr in _CLS_ATTRS:
+        try:
+            setattr(modclass, cls_attr, cls_attrs[cls_attr])
+        except KeyError:
+            pass
+    _reset_lazy_submod_refs(module)
+
+
+def _reset_lazy_submod_refs(module):
+    modclass = type(module)
+    for deldict in _DELETION_DICT:
+        try:
+            resetnames = getattr(modclass, deldict)
+        except AttributeError:
+            continue
+        for name, submod in resetnames.items():
+            super(LazyModule, module).__setattr__(name, submod)
+
+
+def run_from_ipython():
+    # Taken from https://stackoverflow.com/questions/5376837
+    try:
+        __IPYTHON__
+        return True
+    except NameError:
+        return False
--- a/plume/utils/lazy_loader.py
+++ b/plume/utils/lazy_loader.py
@@ -0,0 +1,46 @@
+# Code copied from https://github.com/tensorflow/tensorflow/blob/master/tensorflow/python/util/lazy_loader.py
+"""A LazyLoader class."""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import importlib
+import types
+
+
+class LazyLoader(types.ModuleType):
+    """Lazily import a module, mainly to avoid pulling in large dependencies.
+
+  `contrib`, and `ffmpeg` are examples of modules that are large and not always
+  needed, and this allows them to only be loaded when they are used.
+  """
+
+    # The lint error here is incorrect.
+    def __init__(
+        self, local_name, parent_module_globals, name
+    ):  # pylint: disable=super-on-old-class
+        self._local_name = local_name
+        self._parent_module_globals = parent_module_globals
+
+        super(LazyLoader, self).__init__(name)
+
+    def _load(self):
+        # Import the target module and insert it into the parent's namespace
+        module = importlib.import_module(self.__name__)
+        self._parent_module_globals[self._local_name] = module
+
+        # Update this object's dict so that if someone keeps a reference to the
+        #   LazyLoader, lookups are efficient (__getattr__ is only called on lookups
+        #   that fail).
+        self.__dict__.update(module.__dict__)
+
+        return module
+
+    def __getattr__(self, item):
+        module = self._load()
+        return getattr(module, item)
+
+    def __dir__(self):
+        module = self._load()
+        return dir(module)
--- a/plume/utils/serve.py
+++ b/plume/utils/serve.py
@@ -0,0 +1,31 @@
+from plume.utils import lazy_module
+import typer
+
+rpyc = lazy_module('rpyc')
+
+app = typer.Typer()
+
+
+class ASRService(rpyc.Service):
+    def __init__(self, asr_recognizer):
+        self.asr = asr_recognizer
+
+    def on_connect(self, conn):
+        # code that runs when a connection is created
+        # (to init the service, if needed)
+        pass
+
+    def on_disconnect(self, conn):
+        # code that runs after the connection has already closed
+        # (to finalize the service, if needed)
+        pass
+
+    def exposed_transcribe(self, utterance: bytes):  # this is an exposed method
+        speech_audio = self.asr.transcribe(utterance)
+        return speech_audio
+
+    def exposed_transcribe_cb(
+        self, utterance: bytes, respond
+    ):  # this is an exposed method
+        speech_audio = self.asr.transcribe(utterance)
+        respond(speech_audio)
--- a/plume/utils/transcribe.py
+++ b/plume/utils/transcribe.py
@@ -0,0 +1,184 @@
+import os
+import logging
+from io import BytesIO
+from pathlib import Path
+from functools import lru_cache
+
+import typer
+# import rpyc
+
+# from tqdm import tqdm
+# from pydub import AudioSegment
+# from pydub.silence import split_on_silence
+from plume.utils import lazy_module, lazy_callable
+
+rpyc = lazy_module('rpyc')
+AudioSegment = lazy_callable('pydub.AudioSegment')
+split_on_silence = lazy_callable('pydub.silence.split_on_silence')
+
+app = typer.Typer()
+
+logging.basicConfig(
+    level=logging.INFO, format="%(asctime)s - %(name)s - %(levelname)s - %(message)s"
+)
+logger = logging.getLogger(__name__)
+
+
+ASR_RPYC_HOST = os.environ.get("JASR_RPYC_HOST", "localhost")
+ASR_RPYC_PORT = int(os.environ.get("ASR_RPYC_PORT", "8044"))
+
+TRITON_ASR_MODEL = os.environ.get("TRITON_ASR_MODEL", "slu_wav2vec2")
+
+TRITON_GRPC_ASR_HOST = os.environ.get("TRITON_GRPC_ASR_HOST", "localhost")
+TRITON_GRPC_ASR_PORT = int(os.environ.get("TRITON_GRPC_ASR_PORT", "8001"))
+
+
+@lru_cache()
+def transcribe_rpyc_gen(asr_host=ASR_RPYC_HOST, asr_port=ASR_RPYC_PORT):
+    logger.info(f"connecting to asr server at {asr_host}:{asr_port}")
+    try:
+        asr = rpyc.connect(asr_host, asr_port).root
+        logger.info(f"connected to asr server successfully")
+    except ConnectionRefusedError:
+        raise Exception("env-var JASPER_ASR_RPYC_HOST invalid")
+
+    def audio_prep(aud_seg):
+        asr_seg = aud_seg.set_channels(1).set_sample_width(2).set_frame_rate(16000)
+        return asr_seg
+
+    return asr.transcribe, audio_prep
+
+
+def triton_transcribe_grpc_gen(
+    asr_host=TRITON_GRPC_ASR_HOST,
+    asr_port=TRITON_GRPC_ASR_PORT,
+    asr_model=TRITON_ASR_MODEL,
+    method="chunked",
+    chunk_msec=5000,
+    sil_msec=500,
+    # overlap=False,
+    sep=" ",
+):
+    from tritonclient.utils import np_to_triton_dtype
+    import tritonclient.grpc as grpcclient
+    import numpy as np
+
+    sup_meth = ["chunked", "silence", "whole"]
+    if method not in sup_meth:
+        meths = "|".join(sup_meth)
+        raise Exception(f"unsupported method {method}. pick one of {meths}")
+
+    client = grpcclient.InferenceServerClient(f"{asr_host}:{asr_port}")
+
+    def transcriber(aud_seg):
+        af = BytesIO()
+        aud_seg.export(af, format="wav")
+        input_audio_bytes = af.getvalue()
+        input_audio_data = np.array([input_audio_bytes])
+        inputs = [
+            grpcclient.InferInput(
+                "INPUT_AUDIO",
+                input_audio_data.shape,
+                np_to_triton_dtype(input_audio_data.dtype),
+            )
+        ]
+        inputs[0].set_data_from_numpy(input_audio_data)
+        outputs = [grpcclient.InferRequestedOutput("OUTPUT_TEXT")]
+        response = client.infer(asr_model, inputs, request_id=str(1), outputs=outputs)
+        transcript = response.as_numpy("OUTPUT_TEXT")[0]
+        return transcript.decode("utf-8")
+
+    def chunked_transcriber(aud_seg):
+        if method == "silence":
+            sil_chunks = split_on_silence(
+                aud_seg,
+                min_silence_len=sil_msec,
+                silence_thresh=-50,
+                keep_silence=500,
+            )
+            chunks = [sc for c in sil_chunks for sc in c[::chunk_msec]]
+        else:
+            chunks = aud_seg[::chunk_msec]
+        # if overlap:
+        #     chunks = [
+        #         aud_seg[start, end]
+        #         for start, end in range(0, int(aud_seg.duration_seconds * 1000, 1000))
+        #     ]
+        #     pass
+        transcript_list = []
+        sil_pad = AudioSegment.silent(duration=sil_msec)
+        for seg in chunks:
+            t_seg = sil_pad + seg + sil_pad
+            c_transcript = transcriber(t_seg)
+            transcript_list.append(c_transcript)
+        transcript = sep.join(transcript_list)
+        return transcript
+
+    def audio_prep(aud_seg):
+        asr_seg = aud_seg.set_channels(1).set_sample_width(2).set_frame_rate(16000)
+        return asr_seg
+
+    whole_transcriber = transcriber if method == "whole" else chunked_transcriber
+    return whole_transcriber, audio_prep
+
+
+@app.command()
+def file(audio_file: Path, write_file: bool = False, chunked=True):
+    from pydub import AudioSegment
+
+    aseg = AudioSegment.from_file(audio_file)
+    transcriber, prep = triton_transcribe_grpc_gen()
+    transcription = transcriber(prep(aseg))
+
+    typer.echo(transcription)
+    if write_file:
+        tscript_file_path = audio_file.with_suffix(".txt")
+        with open(tscript_file_path, "w") as tf:
+            tf.write(transcription)
+
+
+@app.command()
+def benchmark(audio_file: Path):
+    from pydub import AudioSegment
+
+    transcriber, audio_prep = transcribe_rpyc_gen()
+    file_seg = AudioSegment.from_file(audio_file)
+    aud_seg = audio_prep(file_seg)
+
+    def timeinfo():
+        from timeit import Timer
+
+        timer = Timer(lambda: transcriber(aud_seg))
+        number = 100
+        repeat = 10
+        time_taken = timer.repeat(repeat, number=number)
+        best = min(time_taken) * 1000 / number
+        print(f"{number} loops, best of {repeat}: {best:.3f} msec per loop")
+
+    timeinfo()
+    import time
+
+    time.sleep(5)
+
+    transcriber, audio_prep = triton_transcribe_grpc_gen()
+    aud_seg = audio_prep(file_seg)
+
+    def timeinfo():
+        from timeit import Timer
+
+        timer = Timer(lambda: transcriber(aud_seg))
+        number = 100
+        repeat = 10
+        time_taken = timer.repeat(repeat, number=number)
+        best = min(time_taken) * 1000 / number
+        print(f"{number} loops, best of {repeat}: {best:.3f} msec per loop")
+
+    timeinfo()
+
+
+def main():
+    app()
+
+
+if __name__ == "__main__":
+    main()
--- a/plume/utils/tts.py
+++ b/plume/utils/tts.py
@@ -0,0 +1,92 @@
+from logging import getLogger
+from plume.utils import lazy_module
+
+
+from pathlib import Path
+
+import typer
+
+# from google.cloud import texttospeech
+texttospeech = lazy_module('google.cloud.texttospeech')
+
+LOGGER = getLogger("googletts")
+
+app = typer.Typer()
+
+
+class GoogleTTS(object):
+    def __init__(self):
+        self.client = texttospeech.TextToSpeechClient()
+
+    def text_to_speech(self, text: str, params: dict) -> bytes:
+        tts_input = texttospeech.types.SynthesisInput(text=text)
+        voice = texttospeech.types.VoiceSelectionParams(
+            language_code=params["language"], name=params["name"]
+        )
+        audio_config = texttospeech.types.AudioConfig(
+            audio_encoding=texttospeech.enums.AudioEncoding.LINEAR16,
+            sample_rate_hertz=params["sample_rate"],
+        )
+        response = self.client.synthesize_speech(tts_input, voice, audio_config)
+        audio_content = response.audio_content
+        return audio_content
+
+    def ssml_to_speech(self, text: str, params: dict) -> bytes:
+        tts_input = texttospeech.types.SynthesisInput(ssml=text)
+        voice = texttospeech.types.VoiceSelectionParams(
+            language_code=params["language"], name=params["name"]
+        )
+        audio_config = texttospeech.types.AudioConfig(
+            audio_encoding=texttospeech.enums.AudioEncoding.LINEAR16,
+            sample_rate_hertz=params["sample_rate"],
+        )
+        response = self.client.synthesize_speech(tts_input, voice, audio_config)
+        audio_content = response.audio_content
+        return audio_content
+
+    @classmethod
+    def voice_list(cls):
+        """Lists the available voices."""
+
+        client = cls().client
+
+        # Performs the list voices request
+        voices = client.list_voices()
+        results = []
+        for voice in voices.voices:
+            supported_eng_langs = [
+                lang for lang in voice.language_codes if lang[:2] == "en"
+            ]
+            if len(supported_eng_langs) > 0:
+                lang = ",".join(supported_eng_langs)
+            else:
+                continue
+
+            ssml_gender = texttospeech.enums.SsmlVoiceGender(voice.ssml_gender)
+            results.append(
+                {
+                    "name": voice.name,
+                    "language": lang,
+                    "gender": ssml_gender.name,
+                    "engine": "wavenet" if "Wav" in voice.name else "standard",
+                    "sample_rate": voice.natural_sample_rate_hertz,
+                }
+            )
+        return results
+
+
+@app.command()
+def generate_audio_file(text, dest_path: Path = "./tts_audio.wav", voice="en-US-Wavenet-D"):
+    tts = GoogleTTS()
+    selected_voice = [v for v in tts.voice_list() if v["name"] == voice][0]
+    wav_data = tts.text_to_speech(text, selected_voice)
+    with dest_path.open("wb") as wf:
+        wf.write(wav_data)
+
+
+def main():
+    app()
+
+
+if __name__ == "__main__":
+    main()