diff --git a/onnxruntime/python/tools/transformers/benchmark.py b/onnxruntime/python/tools/transformers/benchmark.py
index 991db1ff16..a1516ccda2 100644
--- a/onnxruntime/python/tools/transformers/benchmark.py
+++ b/onnxruntime/python/tools/transformers/benchmark.py
@@ -56,7 +56,7 @@ from onnx_exporter import create_onnxruntime_input, load_pretrained_model, expor
 
 logger = logging.getLogger('')
 
-from huggingface_models import MODELS
+from huggingface_models import MODELS, MODEL_CLASSES
 
 cpu_count = psutil.cpu_count(logical=True)
 # Set OMP environment variable before importing onnxruntime or torch.
@@ -66,7 +66,7 @@ if "OMP_NUM_THREADS" not in os.environ:
 import torch
 from transformers import (AutoConfig, AutoTokenizer, AutoModel, GPT2Model)
 
-def run_onnxruntime(use_gpu, model_names, precision, batch_sizes, sequence_lengths, repeat_times, input_counts,
+def run_onnxruntime(use_gpu, model_names, model_class, precision, batch_sizes, sequence_lengths, repeat_times, input_counts,
                     optimize_onnx, validate_onnx, cache_dir, onnx_dir, verbose, overwrite, disable_ort_io_binding,
                     use_raw_attention_mask, thread_num, model_fusion_statistics):
     import onnxruntime
@@ -91,9 +91,9 @@ def run_onnxruntime(use_gpu, model_names, precision, batch_sizes, sequence_lengt
 
             with torch.no_grad():
                 onnx_model_file, is_valid_onnx_model, vocab_size, max_sequence_length = export_onnx_model(
-                    model_name, MODELS[model_name][1], MODELS[model_name][2], MODELS[model_name][3], cache_dir,
-                    onnx_dir, input_names, use_gpu, precision, optimize_onnx, validate_onnx, use_raw_attention_mask,
-                    overwrite, model_fusion_statistics)
+                    model_name, MODELS[model_name][1], MODELS[model_name][2], MODELS[model_name][3], model_class,
+                    cache_dir, onnx_dir, input_names, use_gpu, precision, optimize_onnx, validate_onnx,
+                    use_raw_attention_mask, overwrite, model_fusion_statistics)
             if not is_valid_onnx_model:
                 continue
 
@@ -154,7 +154,7 @@ def run_onnxruntime(use_gpu, model_names, precision, batch_sizes, sequence_lengt
     return results
 
 
-def run_pytorch(use_gpu, model_names, precision, batch_sizes, sequence_lengths, repeat_times, torchscript, cache_dir,
+def run_pytorch(use_gpu, model_names, model_class, precision, batch_sizes, sequence_lengths, repeat_times, torchscript, cache_dir,
                 verbose):
     results = []
     if use_gpu and not torch.cuda.is_available():
@@ -165,7 +165,7 @@ def run_pytorch(use_gpu, model_names, precision, batch_sizes, sequence_lengths,
 
     for model_name in model_names:
         config = AutoConfig.from_pretrained(model_name, torchscript=torchscript, cache_dir=cache_dir)
-        model = load_pretrained_model(model_name, config=config, cache_dir=cache_dir)
+        model = load_pretrained_model(model_name, config=config, cache_dir=cache_dir, custom_model_class=model_class)
         tokenizer = AutoTokenizer.from_pretrained(model_name, cache_dir=cache_dir)
 
         max_input_size = tokenizer.max_model_input_sizes[model_name] if model_name in tokenizer.max_model_input_sizes else 1024
@@ -237,6 +237,13 @@ def parse_arguments():
                         choices=list(MODELS.keys()),
                         help="Pre-trained models in the list: " + ", ".join(MODELS.keys()))
 
+    parser.add_argument('--model_class',
+                        required=False,
+                        type=str,
+                        default=None,
+                        choices=list(MODEL_CLASSES.keys()),
+                        help='Model type selected in the list: ' + ', '.join(MODEL_CLASSES.keys()))
+
     parser.add_argument("-e",
                         "--engines",
                         required=False,
@@ -358,18 +365,18 @@ def main():
             logger.warning("--input_counts is not implemented for torch or torchscript engine.")
 
         if enable_torchscript:
-            results += run_pytorch(args.use_gpu, args.models, args.precision, args.batch_sizes, args.sequence_lengths,
+            results += run_pytorch(args.use_gpu, args.models, args.model_class, args.precision, args.batch_sizes, args.sequence_lengths,
                                    args.test_times, True, args.cache_dir, args.verbose)
 
         if enable_torch:
-            results += run_pytorch(args.use_gpu, args.models, args.precision, args.batch_sizes, args.sequence_lengths,
+            results += run_pytorch(args.use_gpu, args.models, args.model_class, args.precision, args.batch_sizes, args.sequence_lengths,
                                    args.test_times, False, args.cache_dir, args.verbose)
 
     model_fusion_statistics = {}
     if enable_onnxruntime:
         try:
             use_raw_attention_mask = True
-            results += run_onnxruntime(args.use_gpu, args.models, args.precision, args.batch_sizes,
+            results += run_onnxruntime(args.use_gpu, args.models, args.model_class, args.precision, args.batch_sizes,
                                        args.sequence_lengths, args.test_times, args.input_counts, args.optimize_onnx,
                                        args.validate_onnx, args.cache_dir, args.onnx_dir, args.verbose, args.overwrite,
                                        args.disable_ort_io_binding, use_raw_attention_mask, args.thread_num,
diff --git a/onnxruntime/python/tools/transformers/huggingface_models.py b/onnxruntime/python/tools/transformers/huggingface_models.py
index b417658582..126bbd4809 100644
--- a/onnxruntime/python/tools/transformers/huggingface_models.py
+++ b/onnxruntime/python/tools/transformers/huggingface_models.py
@@ -4,8 +4,21 @@
 # license information.
 # --------------------------------------------------------------------------
 
+from transformers import AutoModelForQuestionAnswering
+from transformers import AutoModelForSequenceClassification
+from transformers import AutoModelWithLMHead
+from transformers import AutoModel
+
+# Maps model class name to a tuple of model class
+MODEL_CLASSES = {
+    'AutoModel': AutoModel,
+    'AutoModelWithLMHead': AutoModelWithLMHead,
+    'AutoModelForSequenceClassification': AutoModelForSequenceClassification,
+    'AutoModelForQuestionAnswering': AutoModelForQuestionAnswering
+}
+
 # List of pretrained models: https://huggingface.co/transformers/pretrained_models.html
-# Pretrained model name to a tuple of input names, opset_version, use_external_data_format and optimization model type
+# Pretrained model name to a tuple of input names, opset_version, use_external_data_format, optimization model type
 MODELS = {
 # BERT
     "bert-base-uncased": (["input_ids", "attention_mask", "token_type_ids"], 11, False, "bert"),
@@ -57,7 +70,7 @@ MODELS = {
     "roberta-large-openai-detector": (["input_ids", "attention_mask"], 11, False, "bert"),
 # DistilBERT
     "distilbert-base-uncased": (["input_ids", "attention_mask"], 11, False, "bert"),
-    "distilbert-base-uncased-distilled-squad": (["input_ids", "attention_mask"], 11, False, "bert"), 
+    "distilbert-base-uncased-distilled-squad": (["input_ids", "attention_mask"], 11, False, "bert"),
     "distilbert-base-cased": (["input_ids", "attention_mask"], 11, False, "bert"),
     "distilbert-base-cased-distilled-squad": (["input_ids", "attention_mask"], 11, False, "bert"),
     "distilbert-base-german-cased": (["input_ids", "attention_mask"], 11, False, "bert"),
diff --git a/onnxruntime/python/tools/transformers/onnx_exporter.py b/onnxruntime/python/tools/transformers/onnx_exporter.py
index c4213f9099..c8b8c8d742 100644
--- a/onnxruntime/python/tools/transformers/onnx_exporter.py
+++ b/onnxruntime/python/tools/transformers/onnx_exporter.py
@@ -12,6 +12,7 @@ from transformers import AutoConfig, AutoTokenizer, AutoModel
 from benchmark_helper import create_onnxruntime_session, Precision
 from gpt2_helper import GPT2ModelNoPastState, PRETRAINED_GPT2_MODELS
 from quantize_helper import QuantizeHelper
+from huggingface_models import MODEL_CLASSES
 
 logger = logging.getLogger(__name__)
 
@@ -182,20 +183,40 @@ def optimize_onnx_model(onnx_model_path, optimized_model_path, model_type, num_a
         logger.info(f"Skip optimization since model existed: {optimized_model_path}")
 
 
-def load_pretrained_model(model_name, config, cache_dir):
+def modelclass_dispatcher(model_name, custom_model_class):
+    if (custom_model_class != None):
+        return MODEL_CLASSES[custom_model_class]
+
     if model_name in PRETRAINED_GPT2_MODELS:
-        return GPT2ModelNoPastState.from_pretrained(model_name, config=config, cache_dir=cache_dir)
-    return AutoModel.from_pretrained(model_name, config=config, cache_dir=cache_dir)
+        return GPT2ModelNoPastState
+
+    import re
+    if (re.search('-squad$', model_name) != None):
+        from transformers import AutoModelForQuestionAnswering
+        return AutoModelForQuestionAnswering
+    elif (re.search('-mprc$', model_name) != None):
+        from transformers import AutoModelForSequenceClassification
+        return AutoModelForSequenceClassification
+    elif (re.search('gpt2', model_name) != None):
+        from transformers import AutoModelWithLMHead
+        return AutoModelWithLMHead
+
+    return AutoModel
 
 
-def export_onnx_model(model_name, opset_version, use_external_data_format, model_type, cache_dir, onnx_dir, input_names,
-                      use_gpu, precision, optimize_onnx, validate_onnx, use_raw_attention_mask, overwrite,
+def load_pretrained_model(model_name, config, cache_dir, custom_model_class):
+    model_class = modelclass_dispatcher(model_name, custom_model_class)
+    return model_class.from_pretrained(model_name, config=config, cache_dir=cache_dir)
+
+
+def export_onnx_model(model_name, opset_version, use_external_data_format, model_type, model_class, cache_dir, onnx_dir,
+                      input_names, use_gpu, precision, optimize_onnx, validate_onnx, use_raw_attention_mask, overwrite,
                       model_fusion_statistics):
     config = AutoConfig.from_pretrained(model_name, cache_dir=cache_dir)
     if hasattr(config, 'return_dict'):
         config.return_dict = False
 
-    model = load_pretrained_model(model_name, config=config, cache_dir=cache_dir)
+    model = load_pretrained_model(model_name, config=config, cache_dir=cache_dir, custom_model_class=model_class)
     model.cpu()
 
     tokenizer = AutoTokenizer.from_pretrained(model_name, cache_dir=cache_dir)