diff --git a/onnxruntime/python/tools/transformers/test/conftest.py b/onnxruntime/python/tools/transformers/test/conftest.py
new file mode 100644
index 0000000000..4bd7525e0e
--- /dev/null
+++ b/onnxruntime/python/tools/transformers/test/conftest.py
@@ -0,0 +1,26 @@
+# -------------------------------------------------------------------------
+# Copyright (c) Microsoft Corporation.  All rights reserved.
+# Licensed under the MIT License.  See License.txt in the project root for
+# license information.
+# --------------------------------------------------------------------------
+"""Configuration for pytest."""
+
+import pytest
+
+
+def pytest_addoption(parser):
+    parser.addoption("--slow", action="store_true", default=False, help="run slow tests")
+
+
+def pytest_configure(config):
+    config.addinivalue_line("markers", "slow: mark test as slow to run")
+
+
+def pytest_collection_modifyitems(config, items):
+    if config.getoption("--slow"):
+        # --slow given: do not skip slow tests
+        return
+    skip_slow = pytest.mark.skip(reason="need --slow option to run")
+    for item in items:
+        if "slow" in item.keywords:
+            item.add_marker(skip_slow)
diff --git a/onnxruntime/python/tools/transformers/test/test_gpt2.py b/onnxruntime/python/tools/transformers/test/test_gpt2.py
index cb6b680af5..8f1258ed71 100644
--- a/onnxruntime/python/tools/transformers/test/test_gpt2.py
+++ b/onnxruntime/python/tools/transformers/test/test_gpt2.py
@@ -21,13 +21,16 @@ class TestGpt2(unittest.TestCase):
         csv_filename = main(args)
         self.assertTrue(os.path.exists(csv_filename))
 
+    @pytest.mark.slow
     def test_gpt2_fp32(self):
         self.run_benchmark_gpt2('-m gpt2 --precision fp32 -v -b 1 -s 128')
 
+    @pytest.mark.slow
     def test_gpt2_fp16(self):
         if 'CUDAExecutionProvider' in onnxruntime.get_available_providers():
             self.run_benchmark_gpt2('-m gpt2 --precision fp16 -o -b 1 -s 128 --use_gpu')
 
+    @pytest.mark.slow
     def test_gpt2_int8(self):
         self.run_benchmark_gpt2('-m gpt2 --precision int8 -o -b 1 -s 128')
 
diff --git a/onnxruntime/python/tools/transformers/test/test_optimizer.py b/onnxruntime/python/tools/transformers/test/test_optimizer.py
index e981bf7f87..b5649d6b55 100644
--- a/onnxruntime/python/tools/transformers/test/test_optimizer.py
+++ b/onnxruntime/python/tools/transformers/test/test_optimizer.py
@@ -152,8 +152,8 @@ class TestBertOptimization(unittest.TestCase):
             'LayerNormalization': 24,
             'SkipLayerNormalization': 0,
             'Gelu': 0,
-            'FastGelu': 12,
-            'BiasGelu': 0
+            'FastGelu': 0,
+            'BiasGelu': 12
         }
         self.verify_node_count(bert_model, expected_node_count, 'test_pytorch_model_1_gpu_onnxruntime')
 
@@ -279,40 +279,51 @@ class TestBertOptimization(unittest.TestCase):
         }
         self.verify_node_count(model, expected_node_count, 'test_bert_tf2onnx_0')
 
+    @pytest.mark.slow
     def test_huggingface_bert_fusion(self):
         self._test_optimizer_on_huggingface_model("bert-base-uncased", [1, 12, 0, 0, 12, 0, 24], inputs_count=1)
         self._test_optimizer_on_huggingface_model("bert-base-uncased", [1, 12, 0, 0, 12, 0, 24], inputs_count=2)
         self._test_optimizer_on_huggingface_model("bert-base-uncased", [1, 12, 0, 0, 12, 0, 24], inputs_count=3)
 
+    @pytest.mark.slow
     def test_huggingface_openaigpt_fusion(self):
         self._test_optimizer_on_huggingface_model("openai-gpt", [0, 12, 0, 12, 0, 24, 0])
 
+    @pytest.mark.slow
     def test_huggingface_gpt2_fusion(self):
         self._test_optimizer_on_huggingface_model("gpt2", [0, 12, 0, 12, 0, 25, 0])
 
+    @pytest.mark.slow
     def test_huggingface_xlm_fusion(self):
         self._test_optimizer_on_huggingface_model("xlm-mlm-ende-1024", [0, 6, 0, 0, 6, 0, 13])
 
+    @pytest.mark.slow
     def test_huggingface_roberta_fusion(self):
         self._test_optimizer_on_huggingface_model("roberta-base", [0, 12, 0, 0, 12, 0, 25])
 
+    @pytest.mark.slow
     def test_huggingface_distillbert_fusion(self):
         self._test_optimizer_on_huggingface_model("distilbert-base-uncased", [1, 6, 0, 0, 6, 0, 12], inputs_count=1)
         self._test_optimizer_on_huggingface_model("distilbert-base-uncased", [1, 6, 0, 0, 6, 0, 12], inputs_count=2)
 
+    @pytest.mark.slow
     def test_huggingface_camembert_fusion(self):
         # output not close issue
         self._test_optimizer_on_huggingface_model("camembert-base", [0, 12, 0, 0, 12, 0, 25], validate_model=False)
 
+    @pytest.mark.slow
     def test_huggingface_albert_fusion(self):
         self._test_optimizer_on_huggingface_model("albert-base-v1", [0, 12, 0, 0, 12, 0, 25])
 
+    @pytest.mark.slow
     def test_huggingface_t5_fusion(self):
         self._test_optimizer_on_huggingface_model("t5-small", [0, 0, 0, 0, 0, 0, 0])
 
+    @pytest.mark.slow
     def test_huggingface_xlmroberta_fusion(self):
         self._test_optimizer_on_huggingface_model("xlm-roberta-base", [0, 12, 0, 0, 12, 0, 25])
 
+    @pytest.mark.slow
     def test_huggingface_flaubert_fusion(self):
         # output not close issue
         self._test_optimizer_on_huggingface_model("flaubert/flaubert_base_cased", [0, 12, 0, 0, 12, 0, 25],
@@ -320,12 +331,15 @@ class TestBertOptimization(unittest.TestCase):
         self._test_optimizer_on_huggingface_model("flaubert/flaubert_small_cased", [0, 6, 0, 0, 6, 12, 1],
                                                   validate_model=False)
 
+    @pytest.mark.slow
     def test_huggingface_dialogpt_fusion(self):
         self._test_optimizer_on_huggingface_model("microsoft/DialoGPT-small", [0, 12, 0, 12, 0, 25, 0])
 
+    @pytest.mark.slow
     def test_huggingface_bart_fusion(self):
         self._test_optimizer_on_huggingface_model("facebook/bart-base", [0, 0, 0, 0, 12, 2, 30])
 
+    @pytest.mark.slow
     def test_bert_base_cased_from_tf(self):
         self._test_optimizer_on_tf_model("bert-base-cased", [1, 12, 0, 0, 12, 0, 24], 1)
         self._test_optimizer_on_tf_model("bert-base-cased", [1, 12, 0, 0, 12, 0, 24], 2)