mirror of
https://github.com/saymrwulf/transformers.git
synced 2026-05-14 20:58:08 +00:00
temporary solution to handle saving file from dduf format
This commit is contained in:
parent
734a186fd2
commit
48833071c0
1 changed files with 10 additions and 5 deletions
|
|
@ -95,6 +95,7 @@ class T5TokenizerFast(PreTrainedTokenizerFast):
|
|||
add_prefix_space=None,
|
||||
**kwargs,
|
||||
):
|
||||
self.dduf_entries = kwargs.get("dduf_entries", None)
|
||||
# Add extra_ids to the special token list
|
||||
if additional_special_tokens is not None:
|
||||
extra_tokens = [x for x in additional_special_tokens if "<extra_id_" in str(x)]
|
||||
|
|
@ -132,7 +133,9 @@ class T5TokenizerFast(PreTrainedTokenizerFast):
|
|||
|
||||
@property
|
||||
def can_save_slow_tokenizer(self) -> bool:
|
||||
return os.path.isfile(self.vocab_file) if self.vocab_file else False
|
||||
# TODO: update this. Putting it to True for now
|
||||
# return os.path.isfile(self.vocab_file) if self.vocab_file else False
|
||||
return True
|
||||
|
||||
@staticmethod
|
||||
def _eventually_correct_t5_max_length(pretrained_model_name_or_path, max_model_length, init_max_model_length):
|
||||
|
|
@ -173,10 +176,12 @@ class T5TokenizerFast(PreTrainedTokenizerFast):
|
|||
if os.path.abspath(self.vocab_file) != os.path.abspath(out_vocab_file) and os.path.isfile(self.vocab_file):
|
||||
copyfile(self.vocab_file, out_vocab_file)
|
||||
logger.info(f"Copy vocab file to {out_vocab_file}")
|
||||
elif not os.path.isfile(self.vocab_file):
|
||||
with open(out_vocab_file, "wb") as fi:
|
||||
content_spiece_model = self.sp_model.serialized_model_proto()
|
||||
fi.write(content_spiece_model)
|
||||
# copyfile don't work with binary content e.g when we load file from an archive
|
||||
elif not os.path.isfile(self.vocab_file):
|
||||
with self.dduf_entries[self.vocab_file].as_mmap() as mm:
|
||||
with open(out_vocab_file, "wb") as out_file:
|
||||
out_file.write(mm)
|
||||
logger.info(f"Copy vocab file to {out_vocab_file}")
|
||||
|
||||
return (out_vocab_file,)
|
||||
|
||||
|
|
|
|||
Loading…
Reference in a new issue