diff --git a/docs/execution_providers/Nuphar-ExecutionProvider.md b/docs/execution_providers/Nuphar-ExecutionProvider.md
index 17244fcb8d..e3cbb960d6 100644
--- a/docs/execution_providers/Nuphar-ExecutionProvider.md
+++ b/docs/execution_providers/Nuphar-ExecutionProvider.md
@@ -148,7 +148,7 @@ sess = onnxruntime.InferenceSession(model_path)
 
     To save runtime JIT cost, Nuphar requires models to have shape inference information from ONNX after model is loaded. Some nodes in ONNX can generate dynamic output tensor shapes from input data value, i.e. ConstantOfShape, Tile, Slice in opset 10, Compress, etc. Those ops may block ONNX shape inference and make the part of graph after such nodes not runnable in Nuphar.
 
-    User may use Python script [symbolic_shape_infer.py](../../onnxruntime/core/providers/nuphar/scripts/symbolic_shape_infer.py) to run symbolic shape inference in ONNX model. This script adds output tensor shapes in the model in graph.value_info field, by doing symbolic dimension computation using sympy when there are Shape ops in model. Besides, running symbolic shape inference on ONNX model would make the graph more readable. Note that when using [model_editor.py](../../onnxruntime/core/providers/nuphar/scripts/model_editor.py) to convert models with LSTM/GRU/RNN to Scan, the resulting model may have incomplete shape inference. Running symbolic_shape_infer.py is needed to get the Scan ops in the model to run in Nuphar. Besides, please note that quantization should be the last step, after verified accuracy and performance of the edited floating point model.
+    User may use Python script [symbolic_shape_infer.py](../../onnxruntime/python/tools/symbolic_shape_infer.py) to run symbolic shape inference in ONNX model. This script adds output tensor shapes in the model in graph.value_info field, by doing symbolic dimension computation using sympy when there are Shape ops in model. Besides, running symbolic shape inference on ONNX model would make the graph more readable. Note that when using [model_editor.py](../../onnxruntime/core/providers/nuphar/scripts/model_editor.py) to convert models with LSTM/GRU/RNN to Scan, the resulting model may have incomplete shape inference. Running symbolic_shape_infer.py is needed to get the Scan ops in the model to run in Nuphar. Besides, please note that quantization should be the last step, after verified accuracy and performance of the edited floating point model.
 
     In addition, user may also manually add shapes to graph.value_info using [onnx.helper.make_tensor_value_info](https://github.com/onnx/onnx/blob/v1.5.0/onnx/helper.py#L290) with model specific knowledge. For example, if you have Hardmax output casted to bool as Compress input condition, then the unknown dimension of the output of Compress is actually 1.
 
diff --git a/docs/execution_providers/TensorRT-ExecutionProvider.md b/docs/execution_providers/TensorRT-ExecutionProvider.md
index d390c42dbd..b160ba8124 100644
--- a/docs/execution_providers/TensorRT-ExecutionProvider.md
+++ b/docs/execution_providers/TensorRT-ExecutionProvider.md
@@ -28,14 +28,14 @@ status = session_object.Load(model_file_name);
 The C API details are [here](../C_API.md#c-api).
 
 #### Shape Inference for TensorRT Subgraphs
-If some operators in the model are not supported by TensorRT, ONNX Runtime will partition the graph and only send supported subgraphs to TensorRT execution provider. Because TensorRT requires that all inputs of the subgraphs have shape specified, ONNX Runtime will throw error if there is no input shape info. In this case please run shape inference for the entire model first by running script [here](https://github.com/microsoft/onnxruntime/blob/master/onnxruntime/core/providers/nuphar/scripts/symbolic_shape_infer.py).
+If some operators in the model are not supported by TensorRT, ONNX Runtime will partition the graph and only send supported subgraphs to TensorRT execution provider. Because TensorRT requires that all inputs of the subgraphs have shape specified, ONNX Runtime will throw error if there is no input shape info. In this case please run shape inference for the entire model first by running script [here](https://github.com/microsoft/onnxruntime/blob/master/onnxruntime/python/tools/symbolic_shape_infer.py).
 
 #### Sample
 This example shows how to run Faster R-CNN model on TensorRT execution provider,
 
 First, download Faster R-CNN onnx model from onnx model zoo [here](https://github.com/onnx/models/tree/master/vision/object_detection_segmentation/faster-rcnn).
 
-Second, infer shapes in the model by running shape inference script [here](https://github.com/microsoft/onnxruntime/blob/master/onnxruntime/core/providers/nuphar/scripts/symbolic_shape_infer.py),
+Second, infer shapes in the model by running shape inference script [here](https://github.com/microsoft/onnxruntime/blob/master/onnxruntime/python/tools/symbolic_shape_infer.py),
 ```
 python symbolic_shape_infer.py --input /path/to/onnx/model/model.onnx --output /path/to/onnx/model/new_model.onnx --auto_merge
 ```
diff --git a/docs/python/notebooks/onnxruntime-nuphar-tutorial.ipynb b/docs/python/notebooks/onnxruntime-nuphar-tutorial.ipynb
index 2872f35781..ee9f5c762b 100644
--- a/docs/python/notebooks/onnxruntime-nuphar-tutorial.ipynb
+++ b/docs/python/notebooks/onnxruntime-nuphar-tutorial.ipynb
@@ -101,7 +101,7 @@
     "from onnxruntime.nuphar.model_editor import convert_to_scan_model\n",
     "from onnxruntime.nuphar.model_quantizer import convert_matmul_model\n",
     "from onnxruntime.nuphar.rnn_benchmark import generate_model, perf_test\n",
-    "from onnxruntime.nuphar.symbolic_shape_infer import SymbolicShapeInference"
+    "from onnxruntime.tools.symbolic_shape_infer import SymbolicShapeInference"
    ]
   },
   {
@@ -122,6 +122,7 @@
     "opset = model.opset_import.add()\n",
     "opset.domain == 'onnx'\n",
     "opset.version = 7 # ONNX opset 7 is required for LSTM op later\n",
+    "model.ir_version = onnx.IR_VERSION\n",
     "\n",
     "graph = model.graph\n",
     "X = 'input'\n",
diff --git a/onnxruntime/core/providers/nuphar/scripts/README.md b/onnxruntime/core/providers/nuphar/scripts/README.md
index d6ea31e745..f8224de5da 100644
--- a/onnxruntime/core/providers/nuphar/scripts/README.md
+++ b/onnxruntime/core/providers/nuphar/scripts/README.md
@@ -14,8 +14,4 @@ Quantize MatMul in model dynamically wrt. input
 
 * rnn_benchmark.py
 
-Benchmark for LSTM/GRU/RNN with model_editor and model_quantizer to show Nuphar's speed up for those models
-
-* symbolic_shape_infer.py
-
-Run symbolic shape inference with sympy. Nuphar relies on shape inference to run efficiently.
\ No newline at end of file
+Benchmark for LSTM/GRU/RNN with model_editor and model_quantizer to show Nuphar's speed up for those models
\ No newline at end of file
diff --git a/onnxruntime/core/providers/nuphar/scripts/model_editor.py b/onnxruntime/core/providers/nuphar/scripts/model_editor.py
index 274b907862..3b715761bc 100644
--- a/onnxruntime/core/providers/nuphar/scripts/model_editor.py
+++ b/onnxruntime/core/providers/nuphar/scripts/model_editor.py
@@ -7,7 +7,7 @@ from enum import Enum
 import numpy as np
 import onnx
 from .node_factory import NodeFactory, ensure_opset
-from .symbolic_shape_infer import SymbolicShapeInference, get_shape_from_type_proto
+from ..tools.symbolic_shape_infer import SymbolicShapeInference, get_shape_from_type_proto
 
 # trim outputs of LSTM/GRU/RNN if not used or outputed
 def trim_unused_outputs(node, graph):
diff --git a/onnxruntime/core/providers/nuphar/scripts/model_quantizer.py b/onnxruntime/core/providers/nuphar/scripts/model_quantizer.py
index bcf8e0252d..3efb7dc392 100644
--- a/onnxruntime/core/providers/nuphar/scripts/model_quantizer.py
+++ b/onnxruntime/core/providers/nuphar/scripts/model_quantizer.py
@@ -9,7 +9,7 @@ import numpy as np
 import onnx
 from onnx import helper, numpy_helper
 from .node_factory import NodeFactory, ensure_opset
-from .symbolic_shape_infer import SymbolicShapeInference
+from ..tools.symbolic_shape_infer import SymbolicShapeInference
 
 class QuantizeConfig:
     def __init__(self, signed, reserved_bits, type_bits):
diff --git a/onnxruntime/core/providers/nuphar/scripts/rnn_benchmark.py b/onnxruntime/core/providers/nuphar/scripts/rnn_benchmark.py
index 8985fca17a..dfc35e06e3 100644
--- a/onnxruntime/core/providers/nuphar/scripts/rnn_benchmark.py
+++ b/onnxruntime/core/providers/nuphar/scripts/rnn_benchmark.py
@@ -116,7 +116,7 @@ def perf_test(rnn_type, num_threads, input_dim, hidden_dim, bidirectional, layer
     with ScopedSetNumThreads(num_threads) as scoped_set_num_threads:
         # run Scan model converted from original in Nuphar
         from .model_editor import convert_to_scan_model
-        from .symbolic_shape_infer import SymbolicShapeInference
+        from ..tools.symbolic_shape_infer import SymbolicShapeInference
         scan_model_name = os.path.splitext(model_name)[0] + '_scan.onnx'
         convert_to_scan_model(model_name, scan_model_name)
         # note that symbolic shape inference is needed because model has symbolic batch dim, thus init_state is ConstantOfShape
diff --git a/onnxruntime/core/providers/nuphar/scripts/symbolic_shape_infer.py b/onnxruntime/python/tools/symbolic_shape_infer.py
similarity index 99%
rename from onnxruntime/core/providers/nuphar/scripts/symbolic_shape_infer.py
rename to onnxruntime/python/tools/symbolic_shape_infer.py
index 41a641b867..e1986dfc3a 100755
--- a/onnxruntime/core/providers/nuphar/scripts/symbolic_shape_infer.py
+++ b/onnxruntime/python/tools/symbolic_shape_infer.py
@@ -48,7 +48,7 @@ def as_scalar(x):
         assert len(x) == 1
         return x[0]
     elif type(x) == np.ndarray:
-        return np.asscalar(x)
+        return x.item()
     else:
         return x
 
@@ -328,20 +328,17 @@ class SymbolicShapeInference:
                         self.symbolic_dims_[str(new_dim)] = new_dim
 
     def _onnx_infer_single_node(self, node):
-        # skip onnx shape inference for Scan/Loop
-        skip_infer = node.op_type in ['Scan', 'Loop']
+        # skip onnx shape inference for some ops, as they are handled in _infer_*
+        skip_infer = node.op_type in ['If', 'Loop', 'Scan', 'SplitToSequence', 'ZipMap']
         if not skip_infer:
             # run single node inference with self.known_vi_ shapes
             # note that inference rely on initializer values is not handled
             # as we don't copy initializer weights to tmp_graph for inference speed purpose
-            if node.op_type == 'SplitToSequence':
-                make_value_info_func = helper.make_sequence_value_info
-            else:
-                make_value_info_func = helper.make_tensor_value_info
             tmp_graph = helper.make_graph([node],
                                           'tmp',
                                           [self.known_vi_[i] for i in node.input if i],
-                                          [make_value_info_func(i, onnx.TensorProto.UNDEFINED, None) for i in node.output])
+                                          [helper.make_tensor_value_info(i, onnx.TensorProto.UNDEFINED, None) for i in node.output])
+
             self.tmp_mp_.graph.CopyFrom(tmp_graph)
             self.tmp_mp_ = shape_inference.infer_shapes(self.tmp_mp_)
         for i_o in range(len(node.output)):
@@ -349,6 +346,8 @@ class SymbolicShapeInference:
             vi = self.out_mp_.graph.value_info.add()
             if not skip_infer:
                 vi.CopyFrom(self.tmp_mp_.graph.output[i_o])
+            else:
+                vi.name = o
             self.known_vi_[o] = vi
 
     def _onnx_infer_subgraph(self, node, subgraph, use_node_input=True):
@@ -407,7 +406,7 @@ class SymbolicShapeInference:
                 if len(v.shape) > 1:
                     new_v = None # ignore value for rank > 1
                 elif len(v.shape) == 0:
-                    new_v = int(np.asscalar(v))
+                    new_v = int(v.item())
                 else:
                     assert len(v.shape) == 1
                     new_v = [int(vv) for vv in v]
diff --git a/onnxruntime/python/tools/tensorrt/perf/README.md b/onnxruntime/python/tools/tensorrt/perf/README.md
index 0513cfdcd5..483e1ec122 100644
--- a/onnxruntime/python/tools/tensorrt/perf/README.md
+++ b/onnxruntime/python/tools/tensorrt/perf/README.md
@@ -154,7 +154,5 @@ The output of running benchmark:
 
 ```
 ## Dependencies
-- This test script uses following script to infer shape in the model for TensorRT execution provider. 
-https://github.com/microsoft/onnxruntime/blob/master/onnxruntime/core/providers/nuphar/scripts/symbolic_shape_infer.py
 - When inferencing model using CUDA float16, this script following script to convert nodes in model graph from float32 to float16. It also modifies the converting script a little bit to better cover more model graph conversion.
 https://github.com/microsoft/onnxconverter-common/blob/master/onnxconverter_common/float16.py
diff --git a/onnxruntime/python/tools/tensorrt/perf/benchmark.py b/onnxruntime/python/tools/tensorrt/perf/benchmark.py
index b4266afe3d..5a05398136 100644
--- a/onnxruntime/python/tools/tensorrt/perf/benchmark.py
+++ b/onnxruntime/python/tools/tensorrt/perf/benchmark.py
@@ -675,7 +675,7 @@ def create_session(model_path, providers, session_options):
         new_model_path = model_path[:].replace(".onnx", "_new.onnx")
 
         if not os.path.exists(new_model_path):
-            subprocess.run("python3 ../symbolic_shape_infer.py --input " + model_path + " --output " + new_model_path + " --auto_merge", shell=True, check=True)
+            subprocess.run("python3 -m onnxruntime.tools.symbolic_shape_infer --input " + model_path + " --output " + new_model_path + " --auto_merge", shell=True, check=True)
         session = onnxruntime.InferenceSession(new_model_path, providers=providers, sess_options=session_options)
         return session
     except Exception as e:
diff --git a/onnxruntime/test/python/onnxruntime_test_python_nuphar.py b/onnxruntime/test/python/onnxruntime_test_python_nuphar.py
index c2cece8b0f..1bc08d3b8c 100644
--- a/onnxruntime/test/python/onnxruntime_test_python_nuphar.py
+++ b/onnxruntime/test/python/onnxruntime_test_python_nuphar.py
@@ -363,7 +363,7 @@ class TestNuphar(unittest.TestCase):
         # run onnx_test_runner to verify results
         # use -M to disable memory pattern
         onnx_test_runner = os.path.join(cwd, 'onnx_test_runner')
-        subprocess.run([onnx_test_runner, '-e', 'nuphar', '-M', '-n', 'bidaf', cwd], check=True, cwd=cwd)
+        subprocess.run([onnx_test_runner, '-e', 'nuphar', '-M', '-c', '1', '-j', '1', '-n', 'bidaf', cwd], check=True, cwd=cwd)
 
         # test AOT on the quantized model
         if os.name not in ['nt', 'posix']:
@@ -426,7 +426,7 @@ class TestNuphar(unittest.TestCase):
         bert_squad_dir = os.path.join(cwd, 'download_sample_10')
         bert_squad_model = os.path.join(bert_squad_dir, 'bertsquad10.onnx')
         subprocess.run([
-            sys.executable, '-m', 'onnxruntime.nuphar.symbolic_shape_infer', '--input', bert_squad_model, '--output',
+            sys.executable, '-m', 'onnxruntime.tools.symbolic_shape_infer', '--input', bert_squad_model, '--output',
             bert_squad_model, '--auto_merge', '--int_max=1000000'
         ],
                        check=True,
@@ -667,22 +667,9 @@ class TestNuphar(unittest.TestCase):
             sess = onnxrt.InferenceSession(matmul_model_name)
             actual_y = sess.run([], test_inputs)
 
-            assert np.allclose(expected_y, actual_y)
+            assert np.allclose(expected_y, actual_y, atol=1e-7)
             print("finished " + matmul_model_name)
 
-    def test_symbolic_shape_infer(self):
-        cwd = os.getcwd()
-        test_model_dir = os.path.join(cwd, '..', 'models')
-        for filename in Path(test_model_dir).rglob('*.onnx'):
-            if filename.name.startswith('.'):
-                continue  # skip some bad model files
-            subprocess.run([
-                sys.executable, '-m', 'onnxruntime.nuphar.symbolic_shape_infer', '--input',
-                str(filename), '--auto_merge', '--int_max=100000', '--guess_output_rank'
-            ],
-                           check=True,
-                           cwd=cwd)
-
 
 if __name__ == '__main__':
     unittest.main()
diff --git a/onnxruntime/test/python/onnxruntime_test_python_symbolic_shape_infer.py b/onnxruntime/test/python/onnxruntime_test_python_symbolic_shape_infer.py
new file mode 100644
index 0000000000..19f455ff06
--- /dev/null
+++ b/onnxruntime/test/python/onnxruntime_test_python_symbolic_shape_infer.py
@@ -0,0 +1,27 @@
+# Copyright (c) Microsoft Corporation. All rights reserved.
+# Licensed under the MIT License.
+
+# -*- coding: UTF-8 -*-
+import unittest
+import os
+from onnxruntime.tools.symbolic_shape_infer import SymbolicShapeInference
+import sys
+from pathlib import Path
+
+class TestSymbolicShapeInference(unittest.TestCase):
+    def test_symbolic_shape_infer(self):
+        cwd = os.getcwd()
+        test_model_dir = os.path.join(cwd, '..', 'models')
+        for filename in Path(test_model_dir).rglob('*.onnx'):
+            if filename.name.startswith('.'):
+                continue  # skip some bad model files
+            print("Running symbolic shape inference on : " + str(filename))
+            SymbolicShapeInference.infer_shapes(
+                input_model=str(filename),
+                output_model=None,
+                auto_merge=True,
+                int_max=100000,
+                guess_output_rank=True)
+
+if __name__ == '__main__':
+    unittest.main()
diff --git a/tools/ci_build/build.py b/tools/ci_build/build.py
index e7762c26b2..735a2489db 100755
--- a/tools/ci_build/build.py
+++ b/tools/ci_build/build.py
@@ -111,6 +111,10 @@ def parse_arguments():
     parser.add_argument("--path_to_protoc_exe", help="Path to protoc exe.")
     parser.add_argument(
         "--fuzz_testing", action='store_true', help="Enable Fuzz testing of the onnxruntime.")
+    parser.add_argument(
+        "--enable_symbolic_shape_infer_tests", action='store_true',
+        help="""When running the Test phase, run symbolic shape inference against
+        available test data directories.""")
 
     # generate documentaiton
     parser.add_argument(
@@ -1206,6 +1210,10 @@ def run_onnxruntime_tests(args, source_dir, ctest_path, build_dir, configs):
 
             run_subprocess([sys.executable, 'onnxruntime_test_python.py'], cwd=cwd, dll_path=dll_path)
 
+            if args.enable_symbolic_shape_infer_tests:
+                run_subprocess([sys.executable, 'onnxruntime_test_python_symbolic_shape_infer.py'],
+                               cwd=cwd, dll_path=dll_path)
+
             # For CUDA enabled builds test IOBinding feature
             # Limit testing to Windows non-ARM builds for now
             iobinding_test = False
diff --git a/tools/ci_build/github/azure-pipelines/linux-ci-pipeline.yml b/tools/ci_build/github/azure-pipelines/linux-ci-pipeline.yml
index e708f05eb2..23390dc76e 100644
--- a/tools/ci_build/github/azure-pipelines/linux-ci-pipeline.yml
+++ b/tools/ci_build/github/azure-pipelines/linux-ci-pipeline.yml
@@ -38,6 +38,7 @@ jobs:
               --build_wheel \
               --use_openmp \
               --enable_onnx_tests \
+              --enable_symbolic_shape_infer_tests \
               --use_mklml --enable_pybind --build_java --build_nodejs \
               --cmake_extra_defines PYTHON_INCLUDE_DIR=/opt/python/cp37-cp37m/include/python3.7m PYTHON_LIBRARY=/usr/lib64/librt.so
       workingDirectory: $(Build.SourcesDirectory)
diff --git a/tools/ci_build/github/linux/docker/scripts/install_deps.sh b/tools/ci_build/github/linux/docker/scripts/install_deps.sh
index 087d5193b7..71a3ee0307 100755
--- a/tools/ci_build/github/linux/docker/scripts/install_deps.sh
+++ b/tools/ci_build/github/linux/docker/scripts/install_deps.sh
@@ -105,10 +105,7 @@ mv /tmp/src/gradle-6.3 /usr/local/gradle
 
 #Don't update 'wheel' to the latest version. see: https://github.com/pypa/auditwheel/issues/102
 ${PYTHON_EXE} -m pip install -r ${0/%install_deps\.sh/requirements\.txt}
-if [ $DEVICE_TYPE = "Normal" ]; then
-    ${PYTHON_EXE} -m pip install sympy==1.1.1
-elif [ $DEVICE_TYPE = "gpu" ]; then
-    ${PYTHON_EXE} -m pip install sympy==1.1.1
+if [ $DEVICE_TYPE = "gpu" ]; then
     if [[ $BUILD_EXTR_PAR = *--enable_training* ]]; then
       ${PYTHON_EXE} -m pip install --upgrade --pre torch==1.6.0.dev20200610 torchvision==0.7.0.dev20200610 torchtext==0.6.0.dev20200610 -f https://download.pytorch.org/whl/nightly/cu101/torch_nightly.html
       ${PYTHON_EXE} -m pip install  transformers==v2.10.0
diff --git a/tools/ci_build/github/linux/docker/scripts/requirements.txt b/tools/ci_build/github/linux/docker/scripts/requirements.txt
index 00350822b0..987ab51f0b 100644
--- a/tools/ci_build/github/linux/docker/scripts/requirements.txt
+++ b/tools/ci_build/github/linux/docker/scripts/requirements.txt
@@ -6,3 +6,4 @@ setuptools>=41.4.0
 wheel
 onnx==1.7.0
 argparse
+sympy==1.1.1