diff --git a/onnxruntime/python/tools/transformers/benchmark.py b/onnxruntime/python/tools/transformers/benchmark.py
index 52208ef7ee..ba8694da4d 100644
--- a/onnxruntime/python/tools/transformers/benchmark.py
+++ b/onnxruntime/python/tools/transformers/benchmark.py
@@ -206,7 +206,7 @@ def run_pytorch(use_gpu, model_names, model_class, config_modifier, precision, n
 
     for model_name in model_names:
         config = AutoConfig.from_pretrained(model_name, torchscript=torchscript, cache_dir=cache_dir)
-        config_modifier(config)
+        config_modifier.modify(config)
         model = load_pretrained_model(model_name, config=config, cache_dir=cache_dir, custom_model_class=model_class)
         tokenizer = AutoTokenizer.from_pretrained(model_name, cache_dir=cache_dir)
 
@@ -248,6 +248,7 @@ def run_pytorch(use_gpu, model_names, model_class, config_modifier, precision, n
                     result = {
                         "engine": "torchscript" if torchscript else "torch",
                         "version": torch.__version__,
+                        "providers": "NA",
                         "device": "cuda" if use_gpu else "cpu",
                         "optimizer": "",
                         "precision": precision,
@@ -323,7 +324,7 @@ def run_tensorflow(use_gpu, model_names, model_class, config_modifier, precision
 
     for model_name in model_names:
         config = AutoConfig.from_pretrained(model_name, cache_dir=cache_dir)
-        config_modifier(config)
+        config_modifier.modify(config)
 
         model = load_pretrained_model(model_name,
                                       config=config,
@@ -381,6 +382,7 @@ def run_tensorflow(use_gpu, model_names, model_class, config_modifier, precision
                     result = {
                         "engine": "tensorflow",
                         "version": tf.__version__,
+                        "providers": "NA",
                         "device": "cuda" if use_gpu else "cpu",
                         "optimizer": "",
                         "precision": precision,
diff --git a/onnxruntime/python/tools/transformers/gpt2_beamsearch_helper.py b/onnxruntime/python/tools/transformers/gpt2_beamsearch_helper.py
index 2570673692..93d42ffb65 100644
--- a/onnxruntime/python/tools/transformers/gpt2_beamsearch_helper.py
+++ b/onnxruntime/python/tools/transformers/gpt2_beamsearch_helper.py
@@ -17,6 +17,7 @@ from typing import List, Dict, Tuple, Union
 from transformers import GPT2LMHeadModel, GPT2Config
 from benchmark_helper import Precision
 from gpt2_helper import Gpt2Helper, Gpt2Inputs, GPT2ModelNoPastState, MyGPT2Model, MyGPT2LMHeadModel, MyGPT2LMHeadModel_NoPadding
+from torch_onnx_export_helper import torch_onnx_export
 
 logger = logging.getLogger(__name__)
 
@@ -36,7 +37,7 @@ class Gpt2HelperFactory:
 
 
 class GPT2LMHeadModel_BeamSearchStep(GPT2LMHeadModel):
-    """Here we wrap a class for Onnx model conversion for GPT2LMHeadModel with past state and one 
+    """Here we wrap a class for Onnx model conversion for GPT2LMHeadModel with past state and one
     step beam search."""
     def __init__(self, config, batch_size, beam_size):
         super().__init__(config)
@@ -120,7 +121,7 @@ class GPT2LMHeadModel_BeamSearchStep(GPT2LMHeadModel):
 
 
 class GPT2LMHeadModel_ConfigurableOneStepSearch(GPT2LMHeadModel):
-    """Here we wrap a class for Onnx model conversion for GPT2LMHeadModel with past state and one 
+    """Here we wrap a class for Onnx model conversion for GPT2LMHeadModel with past state and one
     step beam search with configuration support."""
     def __init__(self,
                  config,
@@ -628,7 +629,7 @@ class Gpt2BeamSearchHelper(Gpt2Helper):
 
         Path(onnx_model_path).parent.mkdir(parents=True, exist_ok=True)
 
-        torch.onnx.export(
+        torch_onnx_export(
             model,
             args=tuple(input_list),
             f=onnx_model_path,
diff --git a/onnxruntime/python/tools/transformers/gpt2_helper.py b/onnxruntime/python/tools/transformers/gpt2_helper.py
index d0a2b92c5f..cc7712e163 100644
--- a/onnxruntime/python/tools/transformers/gpt2_helper.py
+++ b/onnxruntime/python/tools/transformers/gpt2_helper.py
@@ -21,6 +21,7 @@ from onnx_model import OnnxModel
 from fusion_utils import FusionUtils
 from benchmark_helper import Precision
 from io_binding_helper import IOBindingHelper
+from torch_onnx_export_helper import torch_onnx_export
 
 logger = logging.getLogger(__name__)
 
@@ -402,7 +403,7 @@ class Gpt2Helper:
 
         Path(onnx_model_path).parent.mkdir(parents=True, exist_ok=True)
 
-        torch.onnx.export(model,
+        torch_onnx_export(model,
                           args=tuple(input_list),
                           f=onnx_model_path,
                           input_names=input_names,
diff --git a/onnxruntime/python/tools/transformers/longformer/convert_longformer_to_onnx.py b/onnxruntime/python/tools/transformers/longformer/convert_longformer_to_onnx.py
index 3e9922bef3..fb7b0adefb 100644
--- a/onnxruntime/python/tools/transformers/longformer/convert_longformer_to_onnx.py
+++ b/onnxruntime/python/tools/transformers/longformer/convert_longformer_to_onnx.py
@@ -15,6 +15,8 @@
 #
 # For inference of the onnx model, you will need onnxruntime-gpu 1.7.0 or above.
 
+import sys
+import os
 import torch
 import numpy as np
 import argparse
@@ -25,6 +27,9 @@ from packaging import version
 from pathlib import Path
 from longformer_helper import LongformerHelper, PRETRAINED_LONGFORMER_MODELS
 
+sys.path.append(os.path.join(os.path.dirname(__file__), '..'))
+from torch_onnx_export_helper import torch_onnx_export
+
 
 @parse_args('v', 'v', 'v', 'v', 'v', 'v', 'v', 'i', 'i')
 def my_longformer_attention(g, input, weight, bias, mask, global_weight, global_bias, global_mask, num_heads, window):
@@ -223,7 +228,7 @@ def export_longformer(model, onnx_model_path, export_padding):
 
     Path(onnx_model_path).parent.mkdir(parents=True, exist_ok=True)
 
-    torch.onnx.export(model,
+    torch_onnx_export(model,
                       example_inputs,
                       onnx_model_path,
                       opset_version=11,
diff --git a/onnxruntime/python/tools/transformers/models/t5/past_helper.py b/onnxruntime/python/tools/transformers/models/t5/past_helper.py
index 3c585c23c8..0a9eb37be9 100644
--- a/onnxruntime/python/tools/transformers/models/t5/past_helper.py
+++ b/onnxruntime/python/tools/transformers/models/t5/past_helper.py
@@ -11,7 +11,6 @@ logger = logging.getLogger(__name__)
 
 class PastKeyValuesHelper:
     """ Helper functions to process past key values for encoder-decoder model"""
-
     @staticmethod
     def get_past_names(num_layers, present: bool = False):
         past_self_names = []
diff --git a/onnxruntime/python/tools/transformers/models/t5/t5_decoder.py b/onnxruntime/python/tools/transformers/models/t5/t5_decoder.py
index 5bfd530581..26e5d9733e 100644
--- a/onnxruntime/python/tools/transformers/models/t5/t5_decoder.py
+++ b/onnxruntime/python/tools/transformers/models/t5/t5_decoder.py
@@ -6,6 +6,8 @@
 
 from pathlib import Path
 from typing import List, Union
+import sys
+import os
 import logging
 import numpy
 import torch
@@ -14,6 +16,9 @@ from onnxruntime import InferenceSession
 from t5_encoder import T5EncoderInputs
 from past_helper import PastKeyValuesHelper
 
+sys.path.append(os.path.join(os.path.dirname(__file__), '..', '..'))
+from torch_onnx_export_helper import torch_onnx_export
+
 logger = logging.getLogger(__name__)
 
 
@@ -21,7 +26,6 @@ class T5DecoderInit(torch.nn.Module):
     """ A T5 decoder with LM head to create initial past key values.
         This model is only called once during starting decoding.
     """
-
     def __init__(self,
                  decoder: torch.nn.Module,
                  lm_head: torch.nn.Module,
@@ -58,7 +62,6 @@ class T5DecoderInit(torch.nn.Module):
 
 class T5Decoder(torch.nn.Module):
     """ A T5 decoder with LM head and past key values"""
-
     def __init__(self, decoder, lm_head, config):
         super().__init__()
         self.decoder = decoder
@@ -89,7 +92,6 @@ class T5Decoder(torch.nn.Module):
 
 
 class T5DecoderInputs:
-
     def __init__(self, decoder_input_ids, encoder_attention_mask, encoder_hidden_states, past_key_values=None):
         self.decoder_input_ids: torch.LongTensor = decoder_input_ids
         self.encoder_attention_mask: torch.LongTensor = encoder_attention_mask
@@ -160,7 +162,6 @@ class T5DecoderInputs:
 
 
 class T5DecoderHelper:
-
     @staticmethod
     def export_onnx(decoder: Union[T5Decoder, T5DecoderInit],
                     device: torch.device,
@@ -250,7 +251,7 @@ class T5DecoderHelper:
                     }
 
         Path(onnx_model_path).parent.mkdir(parents=True, exist_ok=True)
-        torch.onnx.export(decoder,
+        torch_onnx_export(decoder,
                           args=tuple(input_list),
                           f=onnx_model_path,
                           export_params=True,
diff --git a/onnxruntime/python/tools/transformers/models/t5/t5_encoder.py b/onnxruntime/python/tools/transformers/models/t5/t5_encoder.py
index c0086896b7..cf0f7f97ab 100644
--- a/onnxruntime/python/tools/transformers/models/t5/t5_encoder.py
+++ b/onnxruntime/python/tools/transformers/models/t5/t5_encoder.py
@@ -5,6 +5,8 @@
 # --------------------------------------------------------------------------
 
 import random
+import sys
+import os
 from pathlib import Path
 from typing import List
 import logging
@@ -13,12 +15,14 @@ import torch
 from transformers import T5Config
 from onnxruntime import InferenceSession
 
+sys.path.append(os.path.join(os.path.dirname(__file__), '..', '..'))
+from torch_onnx_export_helper import torch_onnx_export
+
 logger = logging.getLogger(__name__)
 
 
 class T5Encoder(torch.nn.Module):
     """ T5 encoder outputs only the last hidden state"""
-
     def __init__(self, encoder, config: T5Config):
         super().__init__()
         self.encoder = encoder
@@ -29,7 +33,6 @@ class T5Encoder(torch.nn.Module):
 
 
 class T5EncoderInputs:
-
     def __init__(self, input_ids, attention_mask):
         self.input_ids: torch.LongTensor = input_ids
         self.attention_mask: torch.LongTensor = attention_mask
@@ -44,7 +47,7 @@ class T5EncoderInputs:
             sequence_length (int): sequence length
             vocab_size (int): vocaburary size
             device (torch.device): device of output tensors
-        
+
         Returns:
             T5EncoderInputs: dummy inputs for encoder
         """
@@ -67,7 +70,6 @@ class T5EncoderInputs:
 
 
 class T5EncoderHelper:
-
     @staticmethod
     def export_onnx(encoder: T5Encoder,
                     device: torch.device,
@@ -93,7 +95,7 @@ class T5EncoderHelper:
             outputs = encoder(encoder_inputs.input_ids, encoder_inputs.attention_mask)
 
         Path(onnx_model_path).parent.mkdir(parents=True, exist_ok=True)
-        torch.onnx.export(encoder,
+        torch_onnx_export(encoder,
                           args=tuple(encoder_inputs.to_list()),
                           f=onnx_model_path,
                           export_params=True,
diff --git a/onnxruntime/python/tools/transformers/models/t5/t5_encoder_decoder_init.py b/onnxruntime/python/tools/transformers/models/t5/t5_encoder_decoder_init.py
index 29b82cda19..bbfff80591 100644
--- a/onnxruntime/python/tools/transformers/models/t5/t5_encoder_decoder_init.py
+++ b/onnxruntime/python/tools/transformers/models/t5/t5_encoder_decoder_init.py
@@ -6,6 +6,8 @@
 
 from pathlib import Path
 from typing import List
+import sys
+import os
 import logging
 import numpy
 import torch
@@ -15,13 +17,15 @@ from t5_encoder import T5Encoder, T5EncoderInputs
 from t5_decoder import T5DecoderInit
 from past_helper import PastKeyValuesHelper
 
+sys.path.append(os.path.join(os.path.dirname(__file__), '..', '..'))
+from torch_onnx_export_helper import torch_onnx_export
+
 logger = logging.getLogger(__name__)
 
 
 class T5EncoderDecoderInit(torch.nn.Module):
     """ A combination of T5Encoder and T5DecoderInit.
     """
-
     def __init__(self,
                  encoder: torch.nn.Module,
                  decoder: torch.nn.Module,
@@ -44,7 +48,6 @@ class T5EncoderDecoderInit(torch.nn.Module):
 
 
 class T5EncoderDecoderInitInputs:
-
     def __init__(self, encoder_input_ids, encoder_attention_mask, decoder_input_ids=None):
         self.encoder_input_ids: torch.LongTensor = encoder_input_ids
         self.encoder_attention_mask: torch.LongTensor = encoder_attention_mask
@@ -70,7 +73,6 @@ class T5EncoderDecoderInitInputs:
 
 
 class T5EncoderDecoderInitHelper:
-
     @staticmethod
     def export_onnx(model: T5EncoderDecoderInit,
                     device: torch.device,
@@ -153,7 +155,7 @@ class T5EncoderDecoderInitHelper:
                 dynamic_axes[name] = {0: 'batch_size', 1: num_heads, 2: sequence_length, 3: head_size}
 
         Path(onnx_model_path).parent.mkdir(parents=True, exist_ok=True)
-        torch.onnx.export(model,
+        torch_onnx_export(model,
                           args=tuple(input_list),
                           f=onnx_model_path,
                           export_params=True,
diff --git a/onnxruntime/python/tools/transformers/models/t5/t5_helper.py b/onnxruntime/python/tools/transformers/models/t5/t5_helper.py
index f04fa9941c..4bcb5d4284 100644
--- a/onnxruntime/python/tools/transformers/models/t5/t5_helper.py
+++ b/onnxruntime/python/tools/transformers/models/t5/t5_helper.py
@@ -22,7 +22,6 @@ PRETRAINED_T5_MODELS = ["t5-small", "t5-base", "t5-large", "t5-3B", "t5-11B"]
 
 
 class T5Helper:
-
     @staticmethod
     def get_onnx_path(output_dir: str, model_name_or_path: str, suffix: str = "", new_folder: bool = False) -> str:
         """Build onnx path
diff --git a/onnxruntime/python/tools/transformers/onnx_exporter.py b/onnxruntime/python/tools/transformers/onnx_exporter.py
index 972b738a40..04228cd02e 100644
--- a/onnxruntime/python/tools/transformers/onnx_exporter.py
+++ b/onnxruntime/python/tools/transformers/onnx_exporter.py
@@ -15,6 +15,7 @@ from benchmark_helper import create_onnxruntime_session, Precision, OptimizerInf
 from gpt2_helper import GPT2ModelNoPastState, PRETRAINED_GPT2_MODELS, TFGPT2ModelNoPastState
 from quantize_helper import QuantizeHelper
 from huggingface_models import MODEL_CLASSES
+from torch_onnx_export_helper import torch_onnx_export
 
 os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'
 
@@ -386,7 +387,7 @@ def export_onnx_model_from_pt(model_name, opset_version, use_external_data_forma
         dynamic_axes, output_names = build_dynamic_axes(example_inputs, example_outputs_flatten)
 
         replace_torch_functions()
-        torch.onnx.export(model=model,
+        torch_onnx_export(model=model,
                           args=tuple(example_inputs.values()),
                           f=onnx_model_path,
                           input_names=list(example_inputs.keys()),
diff --git a/onnxruntime/python/tools/transformers/torch_onnx_export_helper.py b/onnxruntime/python/tools/transformers/torch_onnx_export_helper.py
new file mode 100644
index 0000000000..0912ee396f
--- /dev/null
+++ b/onnxruntime/python/tools/transformers/torch_onnx_export_helper.py
@@ -0,0 +1,68 @@
+#-------------------------------------------------------------------------
+# Copyright (c) Microsoft Corporation.  All rights reserved.
+# Licensed under the MIT License.
+#--------------------------------------------------------------------------
+
+import torch
+TrainingMode = torch.onnx.TrainingMode
+from packaging.version import Version
+
+def torch_onnx_export(
+        model,
+        args,
+        f,
+        export_params=True,
+        verbose=False,
+        training=TrainingMode.EVAL,
+        input_names=None,
+        output_names=None,
+        operator_export_type=None,
+        opset_version=None,
+        _retain_param_name=None,
+        do_constant_folding=True,
+        example_outputs=None,
+        strip_doc_string=None,
+        dynamic_axes=None,
+        keep_initializers_as_inputs=None,
+        custom_opsets=None,
+        enable_onnx_checker=None,
+        use_external_data_format=None,
+        export_modules_as_functions=False):
+    if Version(torch.__version__) >= Version("1.11.0"):
+        torch.onnx.export(
+            model=model,
+            args=args,
+            f=f,
+            export_params=export_params,
+            verbose=verbose,
+            training=training,
+            input_names=input_names,
+            output_names=output_names,
+            operator_export_type=operator_export_type,
+            opset_version=opset_version,
+            do_constant_folding=do_constant_folding,
+            dynamic_axes=dynamic_axes,
+            keep_initializers_as_inputs=keep_initializers_as_inputs,
+            custom_opsets=custom_opsets,
+            export_modules_as_functions=export_modules_as_functions)
+    else:
+        torch.onnx.export(
+            model=model,
+            args=args,
+            f=f,
+            export_params=export_params,
+            verbose=verbose,
+            training=training,
+            input_names=input_names,
+            output_names=output_names,
+            operator_export_type=operator_export_type,
+            opset_version=opset_version,
+            _retain_param_name=_retain_param_name,
+            do_constant_folding=do_constant_folding,
+            example_outputs=example_outputs,
+            strip_doc_string=strip_doc_string,
+            dynamic_axes=dynamic_axes,
+            keep_initializers_as_inputs=keep_initializers_as_inputs,
+            custom_opsets=custom_opsets,
+            enable_onnx_checker=enable_onnx_checker,
+            use_external_data_format=use_external_data_format)