diff --git a/src/transformers/models/llama/tokenization_llama.py b/src/transformers/models/llama/tokenization_llama.py index 14c6a3dcd..2f8997274 100644 --- a/src/transformers/models/llama/tokenization_llama.py +++ b/src/transformers/models/llama/tokenization_llama.py @@ -243,7 +243,7 @@ class LlamaTokenizer(PreTrainedTokenizer): return vocab # Copied from transformers.models.t5.tokenization_t5.T5Tokenizer.tokenize - def tokenize(self, text: "TextInput", add_special_tokens=False, **kwargs) -> List[str]: + def tokenize(self, text: "TextInput", **kwargs) -> List[str]: """ Converts a string to a list of tokens. If `self.legacy` is set to `False`, a prefix token is added unless the first token is special. @@ -255,7 +255,7 @@ class LlamaTokenizer(PreTrainedTokenizer): if self.add_prefix_space: text = SPIECE_UNDERLINE + text - tokens = super().tokenize(text, add_special_tokens=add_special_tokens, **kwargs) + tokens = super().tokenize(text, **kwargs) if len(tokens) > 1 and tokens[0] == SPIECE_UNDERLINE and tokens[1] in self.all_special_tokens: tokens = tokens[1:] diff --git a/src/transformers/models/seamless_m4t/tokenization_seamless_m4t.py b/src/transformers/models/seamless_m4t/tokenization_seamless_m4t.py index afefd6feb..99dd1f095 100644 --- a/src/transformers/models/seamless_m4t/tokenization_seamless_m4t.py +++ b/src/transformers/models/seamless_m4t/tokenization_seamless_m4t.py @@ -447,7 +447,7 @@ class SeamlessM4TTokenizer(PreTrainedTokenizer): return tokenizer # Copied from transformers.models.t5.tokenization_t5.T5Tokenizer.tokenize - def tokenize(self, text: "TextInput", add_special_tokens=False, **kwargs) -> List[str]: + def tokenize(self, text: "TextInput", **kwargs) -> List[str]: """ Converts a string to a list of tokens. If `self.legacy` is set to `False`, a prefix token is added unless the first token is special. @@ -459,7 +459,7 @@ class SeamlessM4TTokenizer(PreTrainedTokenizer): if self.add_prefix_space: text = SPIECE_UNDERLINE + text - tokens = super().tokenize(text, add_special_tokens=add_special_tokens, **kwargs) + tokens = super().tokenize(text, **kwargs) if len(tokens) > 1 and tokens[0] == SPIECE_UNDERLINE and tokens[1] in self.all_special_tokens: tokens = tokens[1:] diff --git a/src/transformers/models/t5/tokenization_t5.py b/src/transformers/models/t5/tokenization_t5.py index 8d3202985..fba83ae92 100644 --- a/src/transformers/models/t5/tokenization_t5.py +++ b/src/transformers/models/t5/tokenization_t5.py @@ -377,7 +377,7 @@ class T5Tokenizer(PreTrainedTokenizer): self.sp_model = spm.SentencePieceProcessor(**self.sp_model_kwargs) self.sp_model.Load(self.vocab_file) - def tokenize(self, text: "TextInput", add_special_tokens=False, **kwargs) -> List[str]: + def tokenize(self, text: "TextInput", **kwargs) -> List[str]: """ Converts a string to a list of tokens. If `self.legacy` is set to `False`, a prefix token is added unless the first token is special. @@ -389,7 +389,7 @@ class T5Tokenizer(PreTrainedTokenizer): if self.add_prefix_space: text = SPIECE_UNDERLINE + text - tokens = super().tokenize(text, add_special_tokens=add_special_tokens, **kwargs) + tokens = super().tokenize(text, **kwargs) if len(tokens) > 1 and tokens[0] == SPIECE_UNDERLINE and tokens[1] in self.all_special_tokens: tokens = tokens[1:]