mirror of
https://github.com/saymrwulf/transformers.git
synced 2026-05-14 20:58:08 +00:00
it's Friday night, let cross finger
This commit is contained in:
parent
b1db4f22b6
commit
f8c98d6173
1 changed files with 50 additions and 14 deletions
|
|
@ -19,15 +19,41 @@ Processor class for Kosmos2_5.
|
|||
from typing import List, Optional, Union
|
||||
|
||||
from ...image_processing_utils import BatchFeature
|
||||
from ...processing_utils import ProcessorMixin
|
||||
from ...tokenization_utils_base import PaddingStrategy, TextInput, TruncationStrategy
|
||||
from ...utils import TensorType, is_torch_available
|
||||
from ...image_utils import ImageInput
|
||||
from ...processing_utils import ImagesKwargs, ProcessingKwargs, ProcessorMixin, TextKwargs, Unpack
|
||||
from ...tokenization_utils_base import TextInput
|
||||
from ...utils import is_torch_available
|
||||
|
||||
|
||||
if is_torch_available():
|
||||
import torch
|
||||
|
||||
|
||||
class Kosmos2_5ImagesKwargs(ImagesKwargs, total=False):
|
||||
max_patches: Optional[int]
|
||||
num_image_tokens: Optional[int]
|
||||
|
||||
|
||||
class Kosmos2_5ProcessorKwargs(ProcessingKwargs, total=False):
|
||||
text_kwargs: TextKwargs
|
||||
images_kwargs: Kosmos2_5ImagesKwargs
|
||||
_defaults = {
|
||||
"text_kwargs": {
|
||||
"padding": True,
|
||||
"truncation": True,
|
||||
"max_length": None,
|
||||
"stride": 0,
|
||||
"pad_to_multiple_of": None,
|
||||
"return_attention_mask": None,
|
||||
"return_tensors": "pt",
|
||||
},
|
||||
"images_kwargs": {
|
||||
"max_patches": 4096,
|
||||
"num_image_tokens": 2048,
|
||||
},
|
||||
}
|
||||
|
||||
|
||||
class Kosmos2_5Processor(ProcessorMixin):
|
||||
r"""
|
||||
Constructs a Kosmos2_5 processor which wraps a PreTrainedTokenizerFast and Kosmos2_5 image processor into a single
|
||||
|
|
@ -58,18 +84,11 @@ class Kosmos2_5Processor(ProcessorMixin):
|
|||
|
||||
def __call__(
|
||||
self,
|
||||
images=None,
|
||||
images: ImageInput = None,
|
||||
text: Union[TextInput, List[TextInput]] = None,
|
||||
padding: Union[bool, str, PaddingStrategy] = True,
|
||||
truncation: Union[bool, str, TruncationStrategy] = True,
|
||||
max_length: Optional[int] = None,
|
||||
max_patches: Optional[int] = 4096,
|
||||
num_image_tokens: Optional[int] = 2048,
|
||||
stride: int = 0,
|
||||
pad_to_multiple_of: Optional[int] = None,
|
||||
return_attention_mask: Optional[bool] = None,
|
||||
return_tensors: Optional[Union[str, TensorType]] = "pt",
|
||||
**kwargs,
|
||||
audio=None,
|
||||
videos=None,
|
||||
**kwargs: Unpack[Kosmos2_5ProcessorKwargs],
|
||||
) -> BatchFeature:
|
||||
"""
|
||||
This method uses [`Kosmos2_5ImageProcessor.preprocess`] method to prepare image(s) for the model, and
|
||||
|
|
@ -85,6 +104,23 @@ class Kosmos2_5Processor(ProcessorMixin):
|
|||
if images is None:
|
||||
raise ValueError("Kosmos2_5Processor requires images to be passed.")
|
||||
|
||||
output_kwargs = self._merge_kwargs(
|
||||
Kosmos2_5ProcessorKwargs,
|
||||
tokenizer_init_kwargs=self.tokenizer.init_kwargs,
|
||||
**kwargs,
|
||||
)
|
||||
|
||||
max_patches = output_kwargs["images_kwargs"].setdefault("max_patches", None)
|
||||
num_image_tokens = output_kwargs["images_kwargs"].setdefault("num_image_tokens", None)
|
||||
|
||||
padding = output_kwargs["text_kwargs"].setdefault("padding", None)
|
||||
truncation = output_kwargs["text_kwargs"].setdefault("truncation", None)
|
||||
max_length = output_kwargs["text_kwargs"].setdefault("max_length", None)
|
||||
stride = output_kwargs["text_kwargs"].setdefault("stride", None)
|
||||
pad_to_multiple_of = output_kwargs["text_kwargs"].setdefault("pad_to_multiple_of", None)
|
||||
return_attention_mask = output_kwargs["text_kwargs"].setdefault("return_attention_mask", None)
|
||||
return_tensors = output_kwargs["text_kwargs"].setdefault("return_tensors", None)
|
||||
|
||||
encoding = BatchFeature()
|
||||
|
||||
if images is not None:
|
||||
|
|
|
|||
Loading…
Reference in a new issue