diff --git a/README.md b/README.md new file mode 100644 index 0000000..a66811c --- /dev/null +++ b/README.md @@ -0,0 +1,18 @@ +### Setup +`. env/bin/activate` to activate the virtualenv. + +### Data Generation +* update `OUTPUT_NAME` in *speech_samplegen.py* to create the dataset folder with the name +* `python speech_samplegen.py` generates variants of audio samples + +### Data Preprocessing +* `python speech_data.py` creates the training-testing data from the generated samples. +* run `fix_csv(OUTPUT_NAME)` to create the fixed index of the dataset generated +* `generate_sppas_trans(OUTPUT_NAME)` creates the SPPAS transcription(wav+txt) data +* `$ (SPPAS_DIR)/bin/annotation.py -l eng -e csv --ipus --tok --phon --align --align -w ./outputs/OUTPUT_NAME/` creates the phoneme alignment csv files for all variants. +* `create_seg_phonpair_tfrecords(OUTPUT_NAME)` creates the tfrecords files + with the phoneme level pairs of right/wrong stresses + +### Training +* `python speech_model.py` trains the model with the training data generated. +* `train_siamese(OUTPUT_NAME)` trains the siamese model with the generated dataset. diff --git a/speech_samplegen.py b/speech_samplegen.py index 2b6faa4..2ff5849 100644 --- a/speech_samplegen.py +++ b/speech_samplegen.py @@ -216,6 +216,9 @@ def generate_audio_for_text_list(text_list): closer() def generate_audio_for_stories(): + ''' + Generates the audio sample variants for the list of words in the stories + ''' # story_file = './inputs/all_stories_hs.json' story_file = './inputs/all_stories.json' stories_data = json.load(open(story_file)) @@ -225,6 +228,10 @@ def generate_audio_for_stories(): generate_audio_for_text_list(text_list) def generate_test_audio_for_stories(): + ''' + Picks a list of words from the wordlist that are not in story words + and generates the variants + ''' story_file = './inputs/all_stories_hs.json' # story_file = './inputs/all_stories.json' stories_data = json.load(open(story_file))