From e162cebfa3e9ca707d0d226a55b17cbe2c7ba719 Mon Sep 17 00:00:00 2001 From: Bibhabasu Mohapatra <68384968+bibhabasumohapatra@users.noreply.github.com> Date: Fri, 7 Oct 2022 18:53:24 +0530 Subject: [PATCH] add ONNX support for swin transformer (#19390) * swin transformer onnx support * Updated image dimensions as dynamic Co-authored-by: lewtun Co-authored-by: lewtun --- docs/source/en/serialization.mdx | 1 + src/transformers/models/swin/__init__.py | 4 ++-- .../models/swin/configuration_swin.py | 23 +++++++++++++++++++ src/transformers/onnx/features.py | 3 +++ tests/onnx/test_onnx_v2.py | 1 + 5 files changed, 30 insertions(+), 2 deletions(-) diff --git a/docs/source/en/serialization.mdx b/docs/source/en/serialization.mdx index 903d35da4..c6bd29bc6 100644 --- a/docs/source/en/serialization.mdx +++ b/docs/source/en/serialization.mdx @@ -94,6 +94,7 @@ Ready-made configurations include the following architectures: - RoFormer - SegFormer - SqueezeBERT +- Swin Transformer - T5 - ViT - XLM diff --git a/src/transformers/models/swin/__init__.py b/src/transformers/models/swin/__init__.py index 33a9bddee..63809f369 100644 --- a/src/transformers/models/swin/__init__.py +++ b/src/transformers/models/swin/__init__.py @@ -21,7 +21,7 @@ from typing import TYPE_CHECKING from ...utils import OptionalDependencyNotAvailable, _LazyModule, is_tf_available, is_torch_available -_import_structure = {"configuration_swin": ["SWIN_PRETRAINED_CONFIG_ARCHIVE_MAP", "SwinConfig"]} +_import_structure = {"configuration_swin": ["SWIN_PRETRAINED_CONFIG_ARCHIVE_MAP", "SwinConfig", "SwinOnnxConfig"]} try: @@ -53,7 +53,7 @@ else: ] if TYPE_CHECKING: - from .configuration_swin import SWIN_PRETRAINED_CONFIG_ARCHIVE_MAP, SwinConfig + from .configuration_swin import SWIN_PRETRAINED_CONFIG_ARCHIVE_MAP, SwinConfig, SwinOnnxConfig try: if not is_torch_available(): diff --git a/src/transformers/models/swin/configuration_swin.py b/src/transformers/models/swin/configuration_swin.py index 878a73e92..b1d0ceb9b 100644 --- a/src/transformers/models/swin/configuration_swin.py +++ b/src/transformers/models/swin/configuration_swin.py @@ -14,7 +14,13 @@ # limitations under the License. """ Swin Transformer model configuration""" +from collections import OrderedDict +from typing import Mapping + +from packaging import version + from ...configuration_utils import PretrainedConfig +from ...onnx import OnnxConfig from ...utils import logging @@ -145,3 +151,20 @@ class SwinConfig(PretrainedConfig): # we set the hidden_size attribute in order to make Swin work with VisionEncoderDecoderModel # this indicates the channel dimension after the last stage of the model self.hidden_size = int(embed_dim * 2 ** (len(depths) - 1)) + + +class SwinOnnxConfig(OnnxConfig): + + torch_onnx_minimum_version = version.parse("1.11") + + @property + def inputs(self) -> Mapping[str, Mapping[int, str]]: + return OrderedDict( + [ + ("pixel_values", {0: "batch", 1: "num_channels", 2: "height", 3: "width"}), + ] + ) + + @property + def atol_for_validation(self) -> float: + return 1e-4 diff --git a/src/transformers/onnx/features.py b/src/transformers/onnx/features.py index 535686f17..4d1af8746 100644 --- a/src/transformers/onnx/features.py +++ b/src/transformers/onnx/features.py @@ -471,6 +471,9 @@ class FeaturesManager: "question-answering", onnx_config_cls="models.squeezebert.SqueezeBertOnnxConfig", ), + "swin": supported_features_mapping( + "default", "image-classification", "masked-im", onnx_config_cls="models.swin.SwinOnnxConfig" + ), "t5": supported_features_mapping( "default", "default-with-past", diff --git a/tests/onnx/test_onnx_v2.py b/tests/onnx/test_onnx_v2.py index f3c19ed8f..dac4a2580 100644 --- a/tests/onnx/test_onnx_v2.py +++ b/tests/onnx/test_onnx_v2.py @@ -217,6 +217,7 @@ PYTORCH_EXPORT_MODELS = { ("longformer", "allenai/longformer-base-4096"), ("yolos", "hustvl/yolos-tiny"), ("segformer", "nvidia/segformer-b0-finetuned-ade-512-512"), + ("swin", "microsoft/swin-tiny-patch4-window7-224"), } PYTORCH_EXPORT_WITH_PAST_MODELS = {