fix: (issue #32124) Exception raised when running transformers/examples/flax/language-modeling/t5_tokenizer_model.py. (#32157)

fix: Exception raised when running .
This commit is contained in:
Shaopeng Fu 2024-08-03 19:24:11 +03:00 committed by GitHub
parent c1aa0edb48
commit 7c31d05b59
No known key found for this signature in database
GPG key ID: B5690EEEBB952194

View file

@ -47,14 +47,14 @@ class SentencePieceUnigramTokenizer(BaseTokenizer):
tokenizer.pre_tokenizer = pre_tokenizers.Sequence(
[
pre_tokenizers.Metaspace(
replacement=replacement, add_prefix_space="always" if add_prefix_space else "never"
replacement=replacement, prepend_scheme="always" if add_prefix_space else "never"
),
pre_tokenizers.Digits(individual_digits=True),
pre_tokenizers.Punctuation(),
]
)
tokenizer.decoder = decoders.Metaspace(
replacement=replacement, add_prefix_space="always" if add_prefix_space else "never"
replacement=replacement, prepend_scheme="always" if add_prefix_space else "never"
)
tokenizer.post_processor = TemplateProcessing(