mirror of
https://github.com/saymrwulf/transformers.git
synced 2026-05-14 20:58:08 +00:00
Processor: prevent duplicated tokens
When using text-only LLMs, the chat template is expected to take care of adding the required special tokens, such as bos. Hence, tokenization must not include special tokens. The same contract should be honored for multimodal processors.
This commit is contained in:
parent
b5f327f350
commit
c4cbed8081
1 changed files with 1 additions and 0 deletions
|
|
@ -1246,6 +1246,7 @@ class ProcessorMixin(PushToHubMixin):
|
|||
text=prompt,
|
||||
images=images if images else None,
|
||||
videos=videos if videos else None,
|
||||
add_special_tokens=False,
|
||||
**kwargs,
|
||||
)
|
||||
if return_dict:
|
||||
|
|
|
|||
Loading…
Reference in a new issue