From e0c7e0e9280b47a8a9cc81f8bc11f41cc7f6a495 Mon Sep 17 00:00:00 2001 From: rafaelvalle Date: Mon, 26 Nov 2018 16:41:21 -0800 Subject: [PATCH] adding changes to text --- text/__init__.py | 2 -- text/symbols.py | 7 ++++--- 2 files changed, 4 insertions(+), 5 deletions(-) diff --git a/text/__init__.py b/text/__init__.py index 2720c55..02ecf0e 100644 --- a/text/__init__.py +++ b/text/__init__.py @@ -37,8 +37,6 @@ def text_to_sequence(text, cleaner_names): sequence += _arpabet_to_sequence(m.group(2)) text = m.group(3) - # Append EOS token - sequence.append(_symbol_to_id['~']) return sequence diff --git a/text/symbols.py b/text/symbols.py index 7212f92..1be47bf 100644 --- a/text/symbols.py +++ b/text/symbols.py @@ -7,11 +7,12 @@ The default is a set of ASCII characters that works well for English or text tha from text import cmudict _pad = '_' -_eos = '~' -_characters = 'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz!\'(),-.:;? ' +_punctuation = '!\'(),.:;? ' +_special = '-' +_letters = 'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz' # Prepend "@" to ARPAbet symbols to ensure uniqueness (some are the same as uppercase letters): _arpabet = ['@' + s for s in cmudict.valid_symbols] # Export all symbols: -symbols = [_pad, _eos] + list(_characters) + _arpabet +symbols = [_pad] + list(_special) + list(_punctuation) + list(_letters) + _arpabet