mirror of
https://github.com/saymrwulf/transformers.git
synced 2026-05-14 20:58:08 +00:00
Add method to retrieve used chat template (#32032)
encapsulate chat template logic
This commit is contained in:
parent
605f3245dc
commit
7405c1c77e
1 changed files with 72 additions and 48 deletions
|
|
@ -1772,54 +1772,7 @@ class PreTrainedTokenizerBase(SpecialTokensMixin, PushToHubMixin):
|
|||
if tokenizer_kwargs is None:
|
||||
tokenizer_kwargs = {}
|
||||
|
||||
using_default_template = False
|
||||
|
||||
# First, handle the cases when the model has a dict of multiple templates
|
||||
if isinstance(self.chat_template, dict) or (
|
||||
self.chat_template is None and isinstance(self.default_chat_template, dict)
|
||||
):
|
||||
if self.chat_template is not None:
|
||||
template_dict = self.chat_template
|
||||
using_default_dict = False
|
||||
else:
|
||||
template_dict = self.default_chat_template
|
||||
using_default_dict = True
|
||||
if chat_template is not None and chat_template in template_dict:
|
||||
# The user can pass the name of a template to the chat template argument instead of an entire template
|
||||
chat_template = template_dict[chat_template]
|
||||
if using_default_dict:
|
||||
using_default_template = True
|
||||
elif chat_template is None:
|
||||
if tools is not None and "tool_use" in template_dict:
|
||||
chat_template = template_dict["tool_use"]
|
||||
elif "default" in template_dict:
|
||||
chat_template = template_dict["default"]
|
||||
else:
|
||||
raise ValueError(
|
||||
"This model has multiple chat templates with no default specified! Please either pass a chat "
|
||||
"template or the name of the template you wish to use to the `chat_template` argument. Available "
|
||||
f"template names are {sorted(template_dict.keys())}."
|
||||
)
|
||||
if using_default_dict:
|
||||
using_default_template = True
|
||||
|
||||
elif chat_template is None:
|
||||
# These are the cases when the model has a single template
|
||||
# priority: `chat_template` argument > `tokenizer.chat_template` > `tokenizer.default_chat_template
|
||||
if self.chat_template is not None:
|
||||
chat_template = self.chat_template
|
||||
else:
|
||||
chat_template = self.default_chat_template
|
||||
using_default_template = True
|
||||
|
||||
if using_default_template:
|
||||
logger.warning_once(
|
||||
"No chat template is set for this tokenizer, falling back to a default class-level template. This is "
|
||||
"very error-prone, because models are often trained with templates different from the class default! "
|
||||
"Default chat templates are a legacy feature and will be removed in Transformers v4.43, at which "
|
||||
"point any code depending on them will stop working. We recommend setting a valid chat template before "
|
||||
"then to ensure that this model continues working without issues."
|
||||
)
|
||||
chat_template = self.get_chat_template(chat_template, tools)
|
||||
|
||||
if return_assistant_tokens_mask and not re.search(r"\{\%-?\s*generation\s*-?\%\}", chat_template):
|
||||
logger.warning_once(
|
||||
|
|
@ -2012,6 +1965,77 @@ class PreTrainedTokenizerBase(SpecialTokensMixin, PushToHubMixin):
|
|||
jinja_env.globals["raise_exception"] = raise_exception
|
||||
return jinja_env.from_string(chat_template)
|
||||
|
||||
def get_chat_template(self, chat_template: Optional[str] = None, tools: Optional[List[Dict]] = None) -> str:
|
||||
"""
|
||||
Retrieve the chat template string used for tokenizing chat messages. This template is used
|
||||
internally by the `apply_chat_template` method and can also be used externally to retrieve the model's chat
|
||||
template for better generation tracking.
|
||||
|
||||
Args:
|
||||
chat_template (`str`, *optional*):
|
||||
A Jinja template or the name of a template to use for this conversion.
|
||||
It is usually not necessary to pass anything to this argument,
|
||||
as the model's template will be used by default.
|
||||
tools (`List[Dict]`, *optional*):
|
||||
A list of tools (callable functions) that will be accessible to the model. If the template does not
|
||||
support function calling, this argument will have no effect. Each tool should be passed as a JSON Schema,
|
||||
giving the name, description and argument types for the tool. See our
|
||||
[chat templating guide](https://huggingface.co/docs/transformers/main/en/chat_templating#automated-function-conversion-for-tool-use)
|
||||
for more information.
|
||||
|
||||
Returns:
|
||||
`str`: The chat template string.
|
||||
"""
|
||||
using_default_template = False
|
||||
# First, handle the cases when the model has a dict of multiple templates
|
||||
if isinstance(self.chat_template, dict) or (
|
||||
self.chat_template is None and isinstance(self.default_chat_template, dict)
|
||||
):
|
||||
if self.chat_template is not None:
|
||||
template_dict = self.chat_template
|
||||
using_default_dict = False
|
||||
else:
|
||||
template_dict = self.default_chat_template
|
||||
using_default_dict = True
|
||||
if chat_template is not None and chat_template in template_dict:
|
||||
# The user can pass the name of a template to the chat template argument instead of an entire template
|
||||
chat_template = template_dict[chat_template]
|
||||
if using_default_dict:
|
||||
using_default_template = True
|
||||
elif chat_template is None:
|
||||
if tools is not None and "tool_use" in template_dict:
|
||||
chat_template = template_dict["tool_use"]
|
||||
elif "default" in template_dict:
|
||||
chat_template = template_dict["default"]
|
||||
else:
|
||||
raise ValueError(
|
||||
"This model has multiple chat templates with no default specified! Please either pass a chat "
|
||||
"template or the name of the template you wish to use to the `chat_template` argument. Available "
|
||||
f"template names are {sorted(template_dict.keys())}."
|
||||
)
|
||||
if using_default_dict:
|
||||
using_default_template = True
|
||||
|
||||
elif chat_template is None:
|
||||
# These are the cases when the model has a single template
|
||||
# priority: `chat_template` argument > `tokenizer.chat_template` > `tokenizer.default_chat_template
|
||||
if self.chat_template is not None:
|
||||
chat_template = self.chat_template
|
||||
else:
|
||||
chat_template = self.default_chat_template
|
||||
using_default_template = True
|
||||
|
||||
if using_default_template:
|
||||
logger.warning_once(
|
||||
"No chat template is set for this tokenizer, falling back to a default class-level template. This is "
|
||||
"very error-prone, because models are often trained with templates different from the class default! "
|
||||
"Default chat templates are a legacy feature and will be removed in Transformers v4.43, at which "
|
||||
"point any code depending on them will stop working. We recommend setting a valid chat template before "
|
||||
"then to ensure that this model continues working without issues."
|
||||
)
|
||||
|
||||
return chat_template
|
||||
|
||||
@property
|
||||
def default_chat_template(self):
|
||||
"""
|
||||
|
|
|
|||
Loading…
Reference in a new issue