mirror of
https://github.com/saymrwulf/transformers.git
synced 2026-05-14 20:58:08 +00:00
In group_texts function, drop last block if smaller than block_size (#17908)
This commit is contained in:
parent
f71895a633
commit
bfcd5743ee
2 changed files with 2 additions and 0 deletions
|
|
@ -141,6 +141,7 @@ Now you need a second preprocessing function to capture text truncated from any
|
|||
>>> def group_texts(examples):
|
||||
... concatenated_examples = {k: sum(examples[k], []) for k in examples.keys()}
|
||||
... total_length = len(concatenated_examples[list(examples.keys())[0]])
|
||||
... total_length = (total_length // block_size) * block_size
|
||||
... result = {
|
||||
... k: [t[i : i + block_size] for i in range(0, total_length, block_size)]
|
||||
... for k, t in concatenated_examples.items()
|
||||
|
|
|
|||
|
|
@ -141,6 +141,7 @@ Ahora necesitas una segunda función de preprocesamiento para capturar el texto
|
|||
>>> def group_texts(examples):
|
||||
... concatenated_examples = {k: sum(examples[k], []) for k in examples.keys()}
|
||||
... total_length = len(concatenated_examples[list(examples.keys())[0]])
|
||||
... total_length = (total_length // block_size) * block_size
|
||||
... result = {
|
||||
... k: [t[i : i + block_size] for i in range(0, total_length, block_size)]
|
||||
... for k, t in concatenated_examples.items()
|
||||
|
|
|
|||
Loading…
Reference in a new issue