diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..4afec03 --- /dev/null +++ b/.gitignore @@ -0,0 +1,145 @@ +.env +env/ +jupyter.json +# Created by https://www.gitignore.io/api/macos +# Edit at https://www.gitignore.io/?templates=macos + +### macOS ### +# General +.DS_Store +.AppleDouble +.LSOverride + +# Icon must end with two \r +Icon + +# Thumbnails +._* + +# Files that might appear in the root of a volume +.DocumentRevisions-V100 +.fseventsd +.Spotlight-V100 +.TemporaryItems +.Trashes +.VolumeIcon.icns +.com.apple.timemachine.donotpresent + +# Directories potentially created on remote AFP share +.AppleDB +.AppleDesktop +Network Trash Folder +Temporary Items +.apdisk + +# End of https://www.gitignore.io/api/macos + +# Created by https://www.gitignore.io/api/python +# Edit at https://www.gitignore.io/?templates=python + +### Python ### +# Byte-compiled / optimized / DLL files +__pycache__/ +*.py[cod] +*$py.class + +# C extensions +*.so + +# Distribution / packaging +.Python +build/ +develop-eggs/ +dist/ +downloads/ +eggs/ +.eggs/ +lib/ +lib64/ +parts/ +sdist/ +var/ +wheels/ +pip-wheel-metadata/ +share/python-wheels/ +*.egg-info/ +.installed.cfg +*.egg +MANIFEST + +# PyInstaller +# Usually these files are written by a python script from a template +# before PyInstaller builds the exe, so as to inject date/other infos into it. +*.manifest +*.spec + +# Installer logs +pip-log.txt +pip-delete-this-directory.txt + +# Unit test / coverage reports +htmlcov/ +.tox/ +.nox/ +.coverage +.coverage.* +.cache +nosetests.xml +coverage.xml +*.cover +.hypothesis/ +.pytest_cache/ + +# Translations +*.mo +*.pot + +# Scrapy stuff: +.scrapy + +# Sphinx documentation +docs/_build/ + +# PyBuilder +target/ + +# pyenv +.python-version + +# pipenv +# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. +# However, in case of collaboration, if having platform-specific dependencies or dependencies +# having no cross-platform support, pipenv may install dependencies that don't work, or not +# install all needed dependencies. +#Pipfile.lock + +# celery beat schedule file +celerybeat-schedule + +# SageMath parsed files +*.sage.py + +# Spyder project settings +.spyderproject +.spyproject + +# Rope project settings +.ropeproject + +# Mr Developer +.mr.developer.cfg +.project +.pydevproject + +# mkdocs documentation +/site + +# mypy +.mypy_cache/ +.dmypy.json +dmypy.json + +# Pyre type checker +.pyre/ + +# End of https://www.gitignore.io/api/python diff --git a/Makefile b/Makefile new file mode 100644 index 0000000..1b79c49 --- /dev/null +++ b/Makefile @@ -0,0 +1,6 @@ +.PHONY: clean clean-test clean-pyc clean-build docs help common.mk +.DEFAULT_GOAL := help + + +notebook: + jupyter lab --ip=0.0.0.0 --no-browser --NotebookApp.token='${JUPYTER_TOKEN}' diff --git a/common.mk b/common.mk new file mode 100644 index 0000000..b08d35c --- /dev/null +++ b/common.mk @@ -0,0 +1,27 @@ +define BROWSER_PYSCRIPT +import os, webbrowser, sys + +try: + from urllib import pathname2url +except: + from urllib.request import pathname2url + +webbrowser.open("file://" + pathname2url(os.path.abspath(sys.argv[1]))) +endef +export BROWSER_PYSCRIPT + +define PRINT_HELP_PYSCRIPT +import re, sys + +for line in sys.stdin: + match = re.match(r'^([a-zA-Z_-]+):.*?## (.*)$$', line) + if match: + target, help = match.groups() + print("%-20s %s" % (target, help)) +endef +export PRINT_HELP_PYSCRIPT + +BROWSER := python -c "$$BROWSER_PYSCRIPT" + +help: ## make TARGET forwards the TARGET to sub packages + @cat $(MAKEFILE_LIST) | python -c "$$PRINT_HELP_PYSCRIPT" diff --git a/data_utils.py b/data_utils.py index fdfd287..40fdbad 100644 --- a/data_utils.py +++ b/data_utils.py @@ -5,8 +5,8 @@ import torch.utils.data import layers from utils import load_wav_to_torch, load_filepaths_and_text -from text import text_to_sequence - +# from text import text_to_sequence +from spm_codec import text_to_sequence class TextMelLoader(torch.utils.data.Dataset): """ diff --git a/hparams.py b/hparams.py index 8886f18..9a34a14 100644 --- a/hparams.py +++ b/hparams.py @@ -37,7 +37,7 @@ def create_hparams(hparams_string=None, verbose=False): filter_length=1024, hop_length=256, win_length=1024, - n_mel_channels=80, + n_mel_channels=40, mel_fmin=0.0, mel_fmax=8000.0, diff --git a/spm.vocab b/spm.vocab new file mode 100644 index 0000000..d87c182 --- /dev/null +++ b/spm.vocab @@ -0,0 +1,1000 @@ + 0 + 0 + 0 +▁the -3.2269 +s -3.3112 +, -3.37118 +. -3.76142 +▁ -3.78779 +▁of -3.89831 +ed -4.0102 +e -4.06402 +▁to -4.14157 +▁a -4.17751 +▁and -4.22342 +t -4.29623 +▁in -4.3537 +d -4.45794 +ing -4.57117 +r -4.72009 +▁was -4.73585 +c -4.80185 +o -4.80931 +y -4.88917 +u -4.89129 +p -4.89754 +m -4.90647 +er -4.90897 +a -4.93154 +n -4.93472 +▁that -4.99489 +i -5.01984 +ly -5.11416 +▁he -5.12218 +ar -5.13987 +f -5.2516 +b -5.26537 +g -5.27135 +in -5.2807 +▁for -5.30204 +al -5.34872 +l -5.35116 +h -5.36385 +- -5.3807 +▁his -5.40727 +▁s -5.41852 +or -5.43655 +▁The -5.44021 +▁with -5.49879 +w -5.50201 +▁f -5.52449 +on -5.54868 +▁be -5.55379 +▁p -5.57072 +▁on -5.57961 +▁had -5.5882 +re -5.61023 +k -5.61119 +it -5.65853 +' -5.67248 +▁as -5.68787 +▁by -5.69654 +▁were -5.70924 +ent -5.71958 +en -5.74092 +le -5.74923 +▁b -5.75421 +ic -5.78284 +▁which -5.78521 +▁at -5.8029 +at -5.82394 +▁re -5.82438 +th -5.84507 +es -5.86566 +▁it -5.87634 +ur -5.88183 +▁not -5.89042 +ro -5.90769 +▁de -5.91404 +▁Oswald -5.93273 +▁c -5.94149 +ir -5.95969 +an -5.96982 +st -6.03457 +ion -6.05129 +▁quote -6.06199 +▁from -6.08164 +▁or -6.11151 +▁m -6.11224 +▁is -6.12348 +ation -6.13483 +ch -6.18872 +▁who -6.20358 +▁A -6.22244 +▁g -6.25243 +▁M -6.28746 +id -6.29069 +▁this -6.29457 +se -6.29522 +▁I -6.30087 +ate -6.30457 +▁con -6.30485 +ment -6.3156 +▁President -6.31665 +▁an -6.3193 +▁have -6.32328 +▁t -6.32578 +ve -6.32959 +▁B -6.33182 +▁been -6.33186 +▁st -6.34622 +is -6.34836 +▁one -6.36381 +ce -6.37974 +▁but -6.38082 +ol -6.39285 +us -6.39457 +; -6.41785 +▁no -6.4218 +▁S -6.4237 +▁him -6.44975 +▁r -6.47751 +ive -6.47981 +il -6.49676 +▁so -6.50785 +▁C -6.50969 +x -6.51424 +▁all -6.52671 +▁He -6.54276 +▁h -6.54814 +un -6.54828 +ty -6.551 +ad -6.55239 +▁P -6.56037 +as -6.56097 +ne -6.5692 +ter -6.57893 +el -6.58754 +ra -6.59546 +ow -6.59782 +▁pro -6.60383 +▁co -6.63912 +om -6.64797 +▁other -6.65219 +▁d -6.66462 +li -6.67502 +▁" -6.68649 +ell -6.70026 +▁end -6.71271 +▁time -6.72054 +▁e -6.73107 +▁ex -6.73297 +▁dis -6.74143 +▁un -6.75034 +vi -6.75321 +able -6.75469 +▁car -6.76784 +▁their -6.77042 +am -6.77886 +▁D -6.77956 +la -6.78124 +ance -6.78294 +ac -6.79274 +te -6.79689 +▁In -6.81034 +▁Mr -6.81125 +▁man -6.81176 +▁W -6.81274 +ge -6.82046 +ist -6.82407 +▁are -6.82569 +ant -6.82778 +▁two -6.83582 +▁would -6.83764 +ld -6.84218 +▁when -6.85013 +ul -6.85128 +ers -6.85318 +▁out -6.86182 +ig -6.86465 +ted -6.86868 +▁more -6.8747 +▁prison -6.88635 +▁after -6.89251 +tion -6.90396 +ity -6.91095 +est -6.91691 +end -6.92241 +ated -6.92622 +im -6.92841 +▁her -6.93364 +▁they -6.93782 +▁T -6.94884 +age -6.9514 +▁F -6.95413 +ther -6.95718 +ard -6.9574 +ial -6.95773 +ap -6.95872 +▁any -6.96094 +und -6.96163 +▁sp -6.96939 +ight -6.97052 +nce -6.97521 +ward -6.99349 +▁me -6.99369 +▁It -7.0206 +ill -7.02302 +ure -7.02682 +▁some -7.03892 +▁sh -7.04391 +j -7.04398 +▁twenty -7.044 +ies -7.0466 +op -7.05156 +ous -7.05216 +um -7.05547 +▁into -7.06793 +▁E -7.07196 +and -7.10502 +ok -7.11168 +ie -7.11453 +▁do -7.11472 +▁lo -7.11637 +et -7.12785 +ot -7.13089 +▁mo -7.14042 +▁L -7.14198 +▁H -7.1511 +▁made -7.15354 +mp -7.15458 +man -7.15568 +▁G -7.15724 +ry -7.15797 +▁about -7.16577 +up -7.16795 +▁there -7.16853 +▁bo -7.17481 +▁could -7.17504 +for -7.18094 +ag -7.18243 +ine -7.1992 +▁nineteen -7.21304 +▁en -7.21608 +▁has -7.22473 +ast -7.22516 +▁great -7.22609 +▁first -7.23266 +▁J -7.23326 +▁she -7.23586 +all -7.23687 +▁sixty -7.24026 +▁only -7.24878 +▁up -7.25136 +▁we -7.2526 +▁these -7.25557 +▁did -7.25874 +▁part -7.27392 +▁Newgate -7.27985 +▁found -7.27989 +▁Commission -7.28677 +▁pre -7.28858 +ary -7.29124 +ver -7.29486 +▁its -7.29697 +one -7.29813 +▁under -7.30474 +ail -7.30672 +▁Secret -7.31142 +ke -7.31166 +ions -7.31671 +▁On -7.32158 +▁li -7.32693 +▁three -7.3505 +▁Service -7.35143 +ough -7.35509 +ish -7.35513 +▁work -7.35515 +▁you -7.37037 +▁go -7.37159 +▁also -7.37422 +▁than -7.37806 +ful -7.38143 +▁Dallas -7.38924 +three -7.39827 +▁eighteen -7.4009 +▁before -7.40095 +out -7.40157 +▁comp -7.40769 +▁said -7.40882 +▁imp -7.41191 +ll -7.41424 +▁day -7.41742 +: -7.42055 +▁will -7.42069 +▁St -7.42861 +▁upon -7.42933 +▁person -7.4327 +▁where -7.4382 +▁over -7.44757 +ious -7.44836 +▁hand -7.45512 +▁la -7.45808 +▁prisoners -7.4677 +▁should -7.46944 +act -7.47775 +tri -7.48463 +▁them -7.48625 +▁O -7.48821 +▁This -7.49059 +▁plan -7.50048 +▁again -7.51647 +▁tra -7.51697 +▁police -7.52525 +▁hundred -7.52968 +▁through -7.53859 +ven -7.54204 +▁case -7.54359 +▁most -7.54429 +he -7.54604 +z -7.55251 +▁di -7.55267 +less -7.56383 +▁such -7.56596 +ress -7.57488 +▁rec -7.57908 +▁same -7.58457 +led -7.58778 +▁place -7.60813 +▁four -7.61045 +▁long -7.61777 +▁many -7.62528 +ness -7.62579 +▁still -7.62604 +ting -7.62731 +▁follow -7.63233 +▁six -7.63583 +▁might -7.63695 +" -7.64658 +▁even -7.6641 +ose -7.66424 +▁per -7.66681 +▁down -7.66742 +▁Re -7.67434 +▁act -7.68012 +▁rifle -7.69834 +▁left -7.69836 +▁qu -7.70375 +ever -7.70386 +▁very -7.70923 +▁cha -7.71387 +ual -7.71857 +▁those -7.71965 +▁Co -7.71999 +two -7.72414 +▁jail -7.72497 +▁state -7.72945 +ach -7.73578 +▁years -7.7379 +▁five -7.73826 +▁own -7.7418 +▁R -7.7527 +▁cr -7.75959 +▁No -7.76341 +ock -7.77106 +▁visit -7.78049 +▁name -7.78055 +▁if -7.78221 +▁shot -7.78233 +▁sta -7.78912 +port -7.78938 +▁exp -7.78953 +▁assassination -7.79232 +▁November -7.7976 +▁testified -7.7976 +▁ho -7.79845 +ments -7.81365 +▁al -7.81465 +▁back -7.81518 +▁ten -7.81689 +▁employ -7.82106 +way -7.83033 +▁much -7.8331 +▁life -7.83316 +▁But -7.83346 +▁fact -7.83912 +ical -7.84299 +▁second -7.84509 +▁may -7.85151 +ving -7.85702 +▁office -7.85723 +▁because -7.85732 +▁appear -7.85732 +▁side -7.86101 +▁FBI -7.86349 +▁Lee -7.86473 +▁form -7.86606 +▁vi -7.86864 +ize -7.86914 +▁Kennedy -7.8697 +▁saw -7.8701 +▁can -7.87029 +over -7.87172 +▁taken -7.87407 +▁return -7.88857 +▁seven -7.88875 +▁new -7.88894 +," -7.89452 +▁came -7.89513 +ton -7.89993 +▁As -7.90322 +side -7.90504 +▁however -7.90781 +ory -7.91109 +▁Street -7.92084 +lu -7.92315 +▁right -7.92545 +▁public -7.92741 +▁general -7.92741 +▁what -7.92741 +▁himself -7.92834 +▁eight -7.93149 +hip -7.93679 +▁men -7.93968 +▁An -7.94687 +▁attempt -7.95417 +▁must -7.9545 +▁du -7.95452 +▁while -7.96097 +▁room -7.96811 +▁wall -7.97492 +▁Ha -7.97965 +▁murder -7.98166 +▁present -7.98891 +▁There -7.99528 +▁building -7.9957 +▁point -7.99571 +▁report -7.99946 +▁between -8.00993 +▁When -8.01 +riv -8.01126 +▁thousand -8.01712 +▁pounds -8.01713 +▁house -8.01731 +▁At -8.01751 +▁well -8.01875 +▁last -8.02294 +▁now -8.02355 +▁city -8.02601 +▁certain -8.03167 +▁people -8.03167 +▁way -8.0371 +▁Ro -8.04069 +▁floor -8.04646 +▁large -8.04646 +▁my -8.0499 +▁number -8.05389 +▁thirty -8.05389 +▁protect -8.0539 +▁like -8.05651 +lthough -8.06159 +▁law -8.06172 +▁New -8.06898 +▁turn -8.06913 +▁every -8.0846 +▁U -8.08507 +▁sentence -8.09207 +▁never -8.09238 +▁near -8.09273 +▁went -8.09328 +▁-- -8.09362 +▁use -8.09966 +▁Babylon -8.09988 +▁convict -8.09988 +▁another -8.09993 +▁sc -8.10314 +▁whole -8.10629 +▁agents -8.10779 +▁inter -8.10964 +five -8.1196 +▁execution -8.12369 +▁information -8.12369 +▁say -8.12464 +▁o -8.12488 +▁wife -8.13175 +▁good -8.13196 +▁show -8.13606 +▁our -8.13666 +▁want -8.14026 +ian -8.14231 +▁His -8.14446 +▁evidence -8.14808 +cut -8.15297 +." -8.15384 +▁pass -8.15532 +▁mis -8.16167 +ative -8.16358 +▁Marina -8.16468 +▁death -8.16502 +ities -8.17646 +▁old -8.17666 +▁close -8.18158 +▁high -8.18159 +▁print -8.18159 +▁make -8.1816 +▁She -8.19711 +▁arrest -8.19875 +▁consider -8.19876 +▁dur -8.20014 +serv -8.20468 +que -8.20706 +▁used -8.21447 +▁crime -8.21627 +▁secur -8.22509 +▁passed -8.2271 +▁system -8.23397 +▁animal -8.23401 +▁provide -8.23405 +▁body -8.23423 +▁For -8.24112 +▁order -8.24402 +▁once -8.24514 +▁look -8.24568 +four -8.24834 +▁seem -8.25946 +▁fifty -8.26126 +▁crowd -8.26127 +▁see -8.26341 +▁each -8.264 +▁obtain -8.2705 +▁letter -8.27051 +▁get -8.27103 +▁head -8.27848 +▁remain -8.27985 +▁both -8.28244 +▁brough -8.28936 +ition -8.29911 +▁full -8.30377 +▁set -8.30425 +▁just -8.3082 +▁window -8.30842 +▁whe -8.30875 +▁night -8.30904 +▁come -8.3105 +▁war -8.31289 +▁mov -8.31409 +▁motorcade -8.31813 +▁House -8.31819 +▁few -8.3193 +▁cap -8.31999 +▁known -8.32497 +▁read -8.32735 +▁Soviet -8.32793 +▁effect -8.32793 +▁little -8.32794 +▁nine -8.32851 +▁asked -8.32918 +most -8.33603 +▁open -8.33816 +▁feet -8.34815 +loo -8.35677 +▁character -8.35794 +▁front -8.35795 +day -8.3598 +▁know -8.37125 +▁take -8.37127 +▁gra -8.3738 +▁door -8.37387 +▁John -8.37845 +▁several -8.3785 +▁later -8.38251 +▁line -8.38537 +▁thought -8.38887 +▁direct -8.38887 +▁charge -8.38894 +▁paper -8.38905 +range -8.39195 +▁enter -8.40802 +▁establish -8.41003 +▁year -8.41667 +V -8.42078 +▁Unit -8.42082 +▁small -8.42092 +▁bank -8.42093 +▁relat -8.42514 +▁twelve -8.43165 +▁became -8.43166 +ible -8.43217 +▁fired -8.43969 +▁along -8.4427 +▁purpose -8.45375 +▁condition -8.45378 +▁called -8.45397 +ncy -8.46225 +▁until -8.46511 +self -8.47034 +▁concern -8.47636 +▁officers -8.47696 +▁supp -8.48782 +▁morning -8.48785 +▁After -8.48792 +▁given -8.48799 +▁seen -8.49903 +▁Depository -8.49948 +▁immediate -8.49948 +▁period -8.49948 +▁writ -8.49949 +▁complete -8.49973 +▁Cuba -8.51126 +part -8.52267 +▁Texas -8.52314 +▁question -8.52317 +▁organ -8.5233 +▁local -8.52331 +▁power -8.52357 +▁yard -8.524 +S -8.52477 +▁leav -8.53536 +▁Orleans -8.54739 +▁friend -8.54739 +C -8.5495 +▁agencies -8.55973 +▁occasion -8.55973 +▁require -8.55973 +▁escape -8.55974 +▁clear -8.56002 +▁assist -8.56061 +rvey -8.56495 +▁hour -8.56626 +▁fifteen -8.57223 +▁danger -8.57224 +▁watch -8.57224 +▁continue -8.57224 +▁among -8.57225 +▁further -8.57229 +▁reason -8.57234 +▁approximate -8.58489 +▁particular -8.58489 +▁photograph -8.58489 +▁difficult -8.58489 +lready -8.5849 +▁believe -8.58491 +▁Union -8.58492 +▁half -8.5851 +▁means -8.58538 +▁live -8.59093 +▁Cha -8.59241 +▁practice -8.59771 +▁strong -8.59772 +▁member -8.59772 +▁carried -8.59772 +▁kept -8.59772 +▁route -8.59785 +▁criminal -8.6107 +▁next -8.6107 +▁view -8.61086 +▁week -8.61094 +▁since -8.61117 +▁country -8.61136 +▁William -8.62386 +▁husband -8.62386 +▁received -8.62386 +▁Paine -8.62391 +▁change -8.62393 +▁money -8.62399 +▁short -8.62427 +▁home -8.62441 +▁interest -8.6259 +▁often -8.62985 +▁effort -8.63719 +▁While -8.63719 +▁kind -8.63722 +▁gave -8.63734 +▁Agent -8.63738 +▁thus -8.63801 +▁condemned -8.6507 +▁possible -8.6507 +▁probably -8.65071 +▁Court -8.65081 +▁indicate -8.65085 +chool -8.65088 +▁find -8.65209 +▁instruct -8.6644 +▁observ -8.66441 +▁having -8.66443 +▁tried -8.66576 +▁government -8.67829 +▁investigat -8.67829 +▁proper -8.67874 +▁trial -8.67875 +▁free -8.6831 +▁debtors -8.69238 +▁necessary -8.69238 +▁London -8.69238 +▁Vice -8.69238 +▁food -8.69239 +▁committed -8.69299 +▁possess -8.70666 +▁walk -8.70667 +▁result -8.70668 +▁class -8.7067 +▁regard -8.70676 +▁sign -8.70691 +ift -8.71706 +▁testimony -8.72115 +▁picture -8.72116 +▁official -8.72116 +▁except -8.72116 +▁latter -8.72123 +▁trans -8.722 +▁amount -8.73586 +▁keep -8.73587 +▁prepar -8.73605 +▁caus -8.73614 +▁express -8.73724 +▁suggest -8.75079 +▁capital -8.75081 +▁matter -8.75086 +▁together -8.7509 +scription -8.76594 +▁business -8.76594 +▁develop -8.76594 +▁themselves -8.76594 +▁always -8.76599 +▁court -8.76635 +▁End -8.76639 +▁lead -8.76666 +▁put -8.76958 +▁female -8.78132 +▁contain -8.78133 +▁detail -8.78149 +▁record -8.78151 +▁Building -8.79695 +▁accord -8.79695 +▁shooting -8.79695 +ably -8.79849 +▁request -8.81284 +▁Marine -8.81289 +▁trip -8.81345 +elect -8.81398 +▁advance -8.82895 +▁various -8.82895 +▁Walker -8.82895 +▁service -8.82895 +▁corner -8.82896 +▁Elm -8.82915 +mission -8.82961 +▁Department -8.84534 +▁Federal -8.84535 +▁described -8.84536 +▁Lord -8.84562 +▁operat -8.84566 +▁scene -8.84572 +A -8.85619 +▁witness -8.86201 +▁fraud -8.86201 +▁water -8.86205 +▁either -8.86208 +▁mark -8.8625 +▁south -8.86298 +▁count -8.86316 +▁nothing -8.86331 +▁rule -8.86454 +▁hope -8.86928 +ification -8.87896 +▁determine -8.87896 +▁process -8.879 +▁street -8.87901 +▁inspectors -8.87903 +▁labor -8.87906 +▁step -8.88099 +▁Governor -8.8962 +▁individual -8.8962 +▁similar -8.8962 +▁bullet -8.8962 +▁lunch -8.8962 +▁course -8.89627 +▁least -8.89662 +▁kill -8.89676 +▁yet -8.89752 +▁pay -8.89779 +nswer -8.89857 +▁England -8.91374 +▁activities -8.91374 +▁type -8.91384 +▁fire -8.92232 +▁admitted -8.9316 +▁introduc -8.9316 +▁White -8.9316 +▁position -8.9316 +▁doubt -8.9316 +▁attention -8.93161 +▁third -8.93162 +▁differ -8.93508 +▁important -8.94978 +▁punishment -8.94978 +▁seventeen -8.9498 +▁perform -8.94981 +▁Some -8.95034 +▁subject -8.9683 +▁Russia -8.96831 +▁prevent -8.9684 +▁support -8.96861 +clude -8.97415 +▁involv -8.98717 +▁suffer -8.98717 +▁According -8.98717 +▁Tippit -8.98717 +▁frequent -8.98717 +▁identified -8.98717 +▁respect -8.98717 +▁material -8.98717 +▁wound -8.98717 +▁above -8.98718 +▁think -8.98718 +▁committee -8.98719 +▁weapon -8.98719 +▁safe -8.98727 +▁paid -8.98762 +▁better -8.98785 +field -8.9883 +▁Frazier -9.0064 +▁account -9.0064 +▁interview -9.0064 +▁family -9.0064 +▁threat -9.00641 +▁brother -9.00647 +ified -9.00657 +▁post -9.00731 +▁idea -9.00778 +▁issue -9.01131 +▁conclud -9.02013 +▁Assassination -9.02601 +▁identification -9.02601 +▁sufficient -9.02601 +▁cloth -9.02601 +▁common -9.02601 +▁addition -9.02601 +itude -9.02606 +▁block -9.02606 +▁draw -9.02608 +▁attend -9.02639 +▁October -9.04601 +▁previous -9.04601 +▁join -9.04601 +▁actually -9.04602 +▁learn -9.04604 +▁therefore -9.04611 +▁authorities -9.06642 +▁Robert -9.06642 +▁problem -9.06642 +▁making -9.06642 +▁evil -9.06649 +D -9.08592 +P -9.08604 +▁Committee -9.08725 +▁accept -9.08725 +▁success -9.08725 +▁Baker -9.08725 +▁Hosty -9.08725 +▁transfer -9.08732 +▁father -9.08751 +▁justice -9.08774 +▁discovered -9.10853 +▁represent -9.10853 +▁world -9.10853 +ability -9.10854 +▁simpl -9.10877 +▁PRS -9.13026 +▁governor -9.13026 +▁object -9.13027 +▁become -9.13028 +▁arrived -9.13028 +▁claim -9.1303 +▁Chief -9.15249 +▁Connally -9.15249 +▁function -9.15249 +▁ordinary -9.15249 +▁defect -9.15249 +▁expect -9.15252 +▁group -9.15253 +▁judge -9.17521 +▁maintain -9.17521 +▁recognize -9.17521 +▁woman -9.17522 +▁search -9.17523 +▁basis -9.17532 +ference -9.19395 +▁August -9.19847 +▁responsibility -9.19847 +▁measures -9.19847 +▁discuss -9.19848 +▁offense -9.19848 +▁Wakefield -9.22228 +▁conduct -9.22228 +▁jacket -9.22228 +▁purchase -9.22228 +▁radio -9.22228 +▁review -9.22228 +▁specific -9.22228 +▁fourteen -9.22231 +▁communicat -9.24667 +▁sometimes -9.24667 +▁remove -9.24667 +▁Houston -9.24667 +▁Truly -9.24667 +▁speak -9.24667 +▁address -9.24668 +▁throw -9.24669 +▁assassin -9.26947 +▁Government -9.27167 +▁Lawson -9.27167 +▁experience -9.27167 +▁legislat -9.27167 +▁sheriff -9.27167 +▁favor -9.27167 +▁separate -9.27167 +▁black -9.27167 +▁wrote -9.27167 +▁opinion -9.27168 +▁job -9.27168 +▁prior -9.27168 +▁national -9.27169 +▁private -9.2717 +▁eleven -9.2717 +scribe -9.29728 +▁Congress -9.29731 +▁arrangements -9.29731 +▁construct -9.29731 +▁enforcement -9.29731 +▁length -9.29731 +▁newspaper -9.29731 +▁subsequent -9.29731 +▁value -9.29731 +▁distinct -9.29731 +▁behind -9.29732 +▁detect -9.29739 +▁discharge -9.32363 +▁permitted -9.32363 +▁clerk -9.32363 +▁Washington -9.35065 +▁authority -9.35065 +▁children -9.35065 +▁revolver -9.35065 +▁Bureau -9.35065 +▁Turner -9.35066 +▁attack -9.35066 +▁method -9.35066 +▁limited -9.35066 +▁access -9.35066 +▁modern -9.35066 +▁produce -9.35067 +▁Trade -9.35081 +▁Communist -9.37843 +▁declared -9.37843 +▁victim -9.37843 +▁chief -9.37843 +▁knew -9.37844 +▁remark -9.37844 +▁increase -9.37844 +▁oppos -9.37844 +▁beyond -9.37845 +I -9.38437 +▁approach -9.407 +▁occupied -9.407 +▁occurred -9.407 +▁principle -9.407 +▁remember -9.407 +▁gallows -9.407 +▁regular -9.407 +▁transportation -9.407 +▁April -9.40701 +▁situation -9.40701 +▁women -9.40701 +▁confess -9.40702 +▁regulat -9.40708 +▁commit -9.43509 +▁Constitution -9.43641 +▁Office -9.43641 +▁blanket -9.43641 +▁elevator -9.43641 +▁check -9.43642 +▁Section -9.43642 +T -9.5075 +R -9.54092 +W -9.57537 +M -9.85718 +B -9.85726 +L -10.0075 +F -10.251 +O -10.3992 +G -10.5736 +J -10.7847 +E -11.0526 +( -11.4186 +K -11.4187 +N -11.4188 +H -11.4189 +q -11.419 +U -11.4191 +v -11.4191 diff --git a/spm_codec.py b/spm_codec.py new file mode 100644 index 0000000..c677643 --- /dev/null +++ b/spm_codec.py @@ -0,0 +1,65 @@ +from utils import load_filepaths_and_text + +# from text import text_to_sequence, sequence_to_text + +from hparams import create_hparams +import sentencepiece as spm +from text import symbols + + +SPM_CORPUS_FILE = "filelists/text_corpus.txt" +SPM_MODEL_PREFIX = "spm" +SPM_VOCAB_SIZE = 1000 +hparams = create_hparams() + + +def _create_sentencepiece_corpus(): + def get_text_list(text_file): + return [i[1] + "\n" for i in load_filepaths_and_text(text_file)] + + full_text_list = get_text_list(hparams.training_files) + get_text_list( + hparams.validation_files + ) + with open(SPM_CORPUS_FILE, "w") as fd: + fd.writelines(full_text_list) + + +def _create_sentencepiece_vocab(vocab_size=SPM_VOCAB_SIZE): + train_params = "--input={} --model_prefix={} --vocab_size={}".format( + SPM_CORPUS_FILE, SPM_MODEL_PREFIX, vocab_size + ) + spm.SentencePieceTrainer.Train(train_params) + + +def _spm_text_codecs(): + sp = spm.SentencePieceProcessor() + sp.Load("{}.model".format(SPM_MODEL_PREFIX)) + + def ttseq(text, cleaners): + return sp.EncodeAsIds(text) + + def seqtt(sequence): + return sp.DecodeIds(sequence) + + return ttseq, seqtt + + +text_to_sequence, sequence_to_text = _spm_text_codecs() + + +def _interactive_test(): + prompt = "Hello world; how are you, doing ?" + while prompt not in ["q", "quit"]: + oup = sequence_to_text(text_to_sequence(prompt, hparams.text_cleaners)) + print('==> ',oup) + prompt = input("> ") + + +def main(): + # _create_sentencepiece_corpus() + # _create_sentencepiece_vocab() + _interactive_test() + + +if __name__ == "__main__": + main()