From f449105b79a9367ed51325b718308df92963bd8b Mon Sep 17 00:00:00 2001 From: Malar Kannan Date: Mon, 23 Sep 2019 16:26:54 +0530 Subject: [PATCH] 1. updated requirements 2. spm params explicit 3. gitignore and script --- .gitignore | 2 ++ requirements.txt | 4 ++-- spm_codec.py | 6 +++--- 3 files changed, 7 insertions(+), 5 deletions(-) diff --git a/.gitignore b/.gitignore index 4afec03..acc83b2 100644 --- a/.gitignore +++ b/.gitignore @@ -1,6 +1,8 @@ .env env/ jupyter.json +run*/ +filelists/ # Created by https://www.gitignore.io/api/macos # Edit at https://www.gitignore.io/?templates=macos diff --git a/requirements.txt b/requirements.txt index 11eccea..49e76b5 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,6 +1,6 @@ matplotlib==2.1.0 -tensorflow -numpy==1.13.3 +tensorflow==1.1.0 +numpy==1.17.1 inflect==0.2.5 librosa==0.6.0 scipy==1.0.0 diff --git a/spm_codec.py b/spm_codec.py index c677643..92cdb9f 100644 --- a/spm_codec.py +++ b/spm_codec.py @@ -25,7 +25,7 @@ def _create_sentencepiece_corpus(): def _create_sentencepiece_vocab(vocab_size=SPM_VOCAB_SIZE): - train_params = "--input={} --model_prefix={} --vocab_size={}".format( + train_params = "--input={} --model_type=unigram --character_coverage=1.0 --model_prefix={} --vocab_size={}".format( SPM_CORPUS_FILE, SPM_MODEL_PREFIX, vocab_size ) spm.SentencePieceTrainer.Train(train_params) @@ -56,8 +56,8 @@ def _interactive_test(): def main(): - # _create_sentencepiece_corpus() - # _create_sentencepiece_vocab() + _create_sentencepiece_corpus() + _create_sentencepiece_vocab() _interactive_test()