[TVM EP] Integrate tests for TVM EP into public onnxruntime CI (#10505)

* add support for bool type * add TVM EP support for tests * include TVM EP in python test pool * fix pylint * moved technical imports to a separate file * clean up post build actions & move _ld_preload.py extension to CMake level * add files for include TVM EP into CI * implement custom logger for TVM * replace TVM logging with ONNX RT logging * update link for TVM EP tutorial * clean up TVM EP cmake * add pybind auto enabling for TVM EP * fix blank spaces * code review fixes * replace print with comment * add list of EP without TVM EP * enable onnx tests * disable contrib ops and ml ops * reuse Dockerfile.ubuntu * Move install_tvm_test_dependencies.sh out of Docker context dir, update build definition. Co-authored-by: Edward Chen <18449977+edgchen1@users.noreply.github.com>
2026-07-23 19:32:23 +00:00 · 2022-02-24 18:24:23 +03:00 · 2022-02-24 18:24:23 +03:00 · 7dc7529ec8
commit 7dc7529ec8
parent ecf064f135
15 changed files with 220 additions and 85 deletions
--- a/cmake/CMakeLists.txt
+++ b/cmake/CMakeLists.txt
@ -1415,10 +1415,14 @@ if (onnxruntime_USE_TVM)
    set(USE_CUDA ON CACHE BOOL "Only defined for TVM" FORCE)
  endif()

+  add_compile_definitions(TVM_LOG_CUSTOMIZE=1)
+  add_library(tvm_custom_logger STATIC ${ONNXRUNTIME_ROOT}/core/providers/tvm/custom_logging.cc)
+
  set(USE_OPENMP gnu CACHE STRING "Only defined for TVM")
  add_subdirectory(${tvm_SOURCE_DIR} ${tvm_BINARY_DIR} EXCLUDE_FROM_ALL)

  set_target_properties(tvm PROPERTIES FOLDER ${tvm_SOURCE_DIR})
+  target_link_libraries(tvm PUBLIC tvm_custom_logger)

  set(TVM_INCLUDES ${tvm_SOURCE_DIR}/include
    ${tvm_SOURCE_DIR}/3rdparty/dmlc-core/include
--- a/cmake/external/tvm.cmake
+++ b/cmake/external/tvm.cmake
@ -10,6 +10,7 @@ if (onnxruntime_USE_TVM)
  FetchContent_GetProperties(tvm)
  if(NOT tvm_POPULATED)
    FetchContent_Populate(tvm)
+    file(CREATE_LINK ${tvm_BINARY_DIR} ${tvm_SOURCE_DIR}/build SYMBOLIC)
  endif()

  set(tvm_INCLUDE_DIRS ${tvm_SOURCE_DIR}/include)
--- a/cmake/onnxruntime_python.cmake
+++ b/cmake/onnxruntime_python.cmake
@ -716,26 +716,21 @@ if (onnxruntime_USE_TVM)
    COMMAND ${CMAKE_COMMAND} -E copy
        $<TARGET_FILE:onnxruntime_providers_tvm>
        $<TARGET_FILE_DIR:${build_output_target}>/onnxruntime/capi/
-    # TODO(vvchernov): why?
-    COMMAND ${CMAKE_COMMAND} -E copy
-        ${tvm_BINARY_DIR}/libtvm*
-        ${tvm_SOURCE_DIR}/python/tvm
  )

-  # TODO(vvchernov): repeat?
  add_custom_command(
    TARGET onnxruntime_pybind11_state POST_BUILD
      WORKING_DIRECTORY ${tvm_SOURCE_DIR}/python
-      COMMAND ${Python_EXECUTABLE} setup.py build_ext --inplace
-      COMMAND ${CMAKE_COMMAND} -E rm
-        ${tvm_SOURCE_DIR}/python/tvm/*.so
-      COMMAND ${CMAKE_COMMAND} -E env TVM_LIBRARY_PATH=${tvm_BINARY_DIR}
-          ${Python_EXECUTABLE} setup.py bdist_wheel
-      COMMAND ${CMAKE_COMMAND} -E copy
-        ${tvm_BINARY_DIR}/libtvm*
-        ${tvm_SOURCE_DIR}/python/tvm
+      COMMAND ${Python_EXECUTABLE} setup.py bdist_wheel
    )

+  add_custom_command(
+    TARGET onnxruntime_pybind11_state POST_BUILD
+    COMMAND ${Python_EXECUTABLE}
+          $<TARGET_FILE_DIR:${build_output_target}>/onnxruntime/providers/tvm/extend_python_file.py
+          --target_file $<TARGET_FILE_DIR:${build_output_target}>/onnxruntime/capi/_ld_preload.py
+  )
+
 endif()

 if (onnxruntime_USE_DML)
--- a/cmake/onnxruntime_unittests.cmake
+++ b/cmake/onnxruntime_unittests.cmake
@ -1291,12 +1291,4 @@ if (NOT onnxruntime_MINIMAL_BUILD AND NOT onnxruntime_EXTENDED_MINIMAL_BUILD
  endif()
 endif()

-if (onnxruntime_USE_TVM)
-  # find_library(TVM_LIBS NAMES libtvm.so PATHS ${tvm_SOURCE_DIR}/lib)
-  # link_directories(onnxruntime_test_all ${TVM_LIBS})
-  find_library(PYTHON_LIBS NAMES libpython PATHS /usr/local/lib)
-  #target_link_libraries(onnxruntime_test_all PRIVATE ${PYTHON_LIBRARIES} -lutil)
-  # set(CMAKE_SHARED_LINKER_FLAGS "-Wl,-rpath,${TVM_LIBS}")
-endif()
-
 include(onnxruntime_fuzz_test.cmake)
--- a/docs/TVM_EP.md
+++ b/docs/TVM_EP.md
@ -69,7 +69,7 @@ python3 -m pip install $whl_path
 Package for TVM EP:
 ```bash
 cd <path_to_onnx_runtime>
-python3 -m pip3 uninstall onnxruntime onnxruntime-tvm -y
+python3 -m pip uninstall onnxruntime onnxruntime-tvm -y
 whl_path=$(find ./build/<OS_NAME>/Release/dist -name "*.whl")
 python3 -m pip install $whl_path
 ```
@ -126,7 +126,7 @@ tvm_session = onnxruntime.InferenceSession(model_path, sess_options=so, provider
 ```

 ## Samples
- [Sample notebook for ResNet50 inference with TVM EP](https://github.com/octoml/onnxruntime/blob/vc/rename/docs/python/inference/notebooks/onnxruntime-tvm-tutorial.ipynb)
+- [Sample notebook for ResNet50 inference with TVM EP](https://github.com/microsoft/onnxruntime/blob/master/docs/python/inference/notebooks/onnxruntime-tvm-tutorial.ipynb)

 ## Known issues
 - At this moment, the TVM EP has only been verified on UNIX/Linux systems.
--- a/onnxruntime/core/providers/tvm/custom_logging.cc
+++ b/onnxruntime/core/providers/tvm/custom_logging.cc
@ -0,0 +1,41 @@
+// Copyright (c) Microsoft Corporation. All rights reserved.
+// Licensed under the MIT License.
+//
+// Enable custom logging - this will cause TVM to use a custom implementation
+// of tvm::runtime::detail::LogMessage. We use this to change the absolute
+// file path to relative file path.
+
+#include <ctime>
+#include <iomanip>
+#include <iostream>
+#include <string>
+#include <vector>
+
+// TODO(agladyshev): Make conditional choice of sep for Windows and UNIX
+std::string GetFileName(const std::string& file_path, char sep = '/') {
+    return {std::next(file_path.begin(), file_path.find_last_of(sep) + 1),
+            file_path.end()};
+}
+
+std::string GetTimedLogMessage(const std::string& file, int lineno, const std::string& message) {
+    std::stringstream sstream;
+    std::string file_name = GetFileName(file);
+    std::time_t t = std::time(nullptr);
+    sstream << "[" << std::put_time(std::localtime(&t), "%H:%M:%S") << "][TVM] "
+            << file_name << ":" << lineno << ": " + message;
+    return sstream.str();
+}
+
+namespace tvm {
+namespace runtime {
+namespace detail {
+    void LogFatalImpl(const std::string& file, int lineno, const std::string& message) {
+        throw std::runtime_error(GetTimedLogMessage(file, lineno, message));
+    }
+
+    void LogMessageImpl(const std::string& file, int lineno, const std::string& message) {
+        std::cerr << GetTimedLogMessage(file, lineno, message) << std::endl;
+    }
+}  // namespace detail
+}  // namespace runtime
+}  // namespace tvm
--- a/onnxruntime/core/providers/tvm/tvm_execution_provider.cc
+++ b/onnxruntime/core/providers/tvm/tvm_execution_provider.cc
@ -297,6 +297,7 @@ TvmExecutionProvider::GetCapability(const GraphViewer& graph_viewer,

 common::Status TvmExecutionProvider::Compile(const std::vector<Node*>& nodes,
                                              std::vector<NodeComputeInfo>& node_compute_funcs) {
+  PrintProviderOptions();
  for (auto* fused_node : nodes) {
    auto func_body = fused_node->GetFunctionBody();
    if (!func_body)
@ -446,8 +447,6 @@ void TvmExecutionProvider::ProcessInfo() {
  if(info_.opt_level < 1) {
    info_.opt_level = default_opt_level;
  }
-
-  PrintInfo();
 }

 void TvmExecutionProvider::ProcessCPUTarget() {
@ -471,8 +470,8 @@ void TvmExecutionProvider::ProcessGPUTarget() {
  ORT_NOT_IMPLEMENTED("GPU target auto-defenition is not implemented now!");
 }

-void TvmExecutionProvider::PrintInfo() const {
-  LOG(INFO) << "TVM EP options:\n" <<
+void TvmExecutionProvider::PrintProviderOptions() const {
+  LOGS(*GetLogger(), INFO) << "TVM EP options:\n" <<
  "target: " << info_.target << "\n" <<
  "target_host: " << info_.target_host << "\n" <<
  "opt level: " << info_.opt_level << "\n" <<
--- a/onnxruntime/core/providers/tvm/tvm_execution_provider.h
+++ b/onnxruntime/core/providers/tvm/tvm_execution_provider.h
@ -52,7 +52,7 @@ class TvmExecutionProvider : public IExecutionProvider {
  void ProcessInfo();
  void ProcessCPUTarget();
  void ProcessGPUTarget();
-  void PrintInfo() const;
+  void PrintProviderOptions() const;
  // Bindings for compute info
  int CreateStateFunc(ComputeContext*, FunctionState*);
  TvmModule* CompileFunc(std::string func_name, const TVMTensorShapes& input_shapes);
--- a/onnxruntime/core/providers/tvm/tvm_utils.h
+++ b/onnxruntime/core/providers/tvm/tvm_utils.h
@ -23,6 +23,8 @@ inline DLDataType GetDataType(ONNXTensorElementDataType type) {
    return {kDLInt, 64, 1};
  } else if (type == ONNX_TENSOR_ELEMENT_DATA_TYPE_INT32) {
    return {kDLInt, 32, 1};
+  } else if (type == ONNX_TENSOR_ELEMENT_DATA_TYPE_BOOL) {
+    return {kDLUInt, 1, 1};
  } else {
    ORT_NOT_IMPLEMENTED("Unsupported data type");
  }
--- a/onnxruntime/python/providers/tvm/extend_python_file.py
+++ b/onnxruntime/python/providers/tvm/extend_python_file.py
@ -0,0 +1,47 @@
+# -------------------------------------------------------------------------
+# Copyright (c) Microsoft Corporation.  All rights reserved.
+# Licensed under the MIT License.  See License.txt in the project root for
+# license information.
+# --------------------------------------------------------------------------
+
+import argparse
+import textwrap
+
+
+def rewrite_target_file(target):
+    with open(target, 'a') as f:
+        f.write(textwrap.dedent(
+            """
+            import warnings
+
+            try:
+                # This import is necessary in order to delegate the loading of libtvm.so to TVM.
+                import tvm
+            except ImportError as e:
+                warnings.warn(
+                    f"WARNING: Failed to import TVM, libtvm.so was not loaded. More details: {e}"
+                )
+            try:
+                # Working between the C++ and Python parts in TVM EP is done using the PackedFunc and
+                # Registry classes. In order to use a Python function in C++ code, it must be registered in
+                # the global table of functions. Registration is carried out through the JIT interface,
+                # so it is necessary to call special functions for registration.
+                # To do this, we need to make the following import.
+                import onnxruntime.providers.tvm
+            except ImportError as e:
+                warnings.warn(
+                    f"WARNING: Failed to register python functions to work with TVM EP. More details: {e}"
+                )
+            """
+        ))
+
+
+def main():
+    parser = argparse.ArgumentParser()
+    parser.add_argument("--target_file", type=str, required=True, help="Path to the file to be expanded.")
+    args = parser.parse_args()
+    rewrite_target_file(args.target_file)
+
+
+if __name__ == '__main__':
+    main()
--- a/onnxruntime/test/python/onnxruntime_test_python.py
+++ b/onnxruntime/test/python/onnxruntime_test_python.py
@ -18,7 +18,20 @@ from onnxruntime.capi.onnxruntime_pybind11_state import Fail
 if platform.system() == 'Windows' and sys.version_info.major >= 3 and sys.version_info.minor >= 8:
    os.add_dll_directory(os.getcwd())

-available_providers = [
+available_providers = [provider for provider in onnxrt.get_available_providers()]
+
+# TVM EP doesn't support:
+# * calling Run() on different threads using the same session object
+# * symbolic inputs
+# * string inputs
+# * byte type inputs
+# * object type inputs
+# * void type inputs
+# * SequenceConstruct operator
+# * custom operators
+# * testSequenceInsert
+# * testSequenceLength
+available_providers_without_tvm = [
    provider for provider in onnxrt.get_available_providers()
    if provider not in {'TvmExecutionProvider'}]

@ -383,18 +396,19 @@ class TestInferenceSession(unittest.TestCase):
        np.testing.assert_allclose(output_expected, rescontiguous[0], rtol=1e-05, atol=1e-08)

    def testRunModelMultipleThreads(self):
-        available_providers = onnxrt.get_available_providers()
-
-        # Skip this test for a "pure" DML onnxruntime python wheel. We keep this test enabled for instances where both DML and CUDA
-        # EPs are available (Windows GPU CI pipeline has this config) - this test will pass because CUDA has higher precendence than DML
-        # and the nodes are assigned to only the CUDA EP (which supports this test)
-        if ('DmlExecutionProvider' in available_providers and not 'CUDAExecutionProvider' in available_providers):
-            print("Skipping testRunModelMultipleThreads as the DML EP does not support calling Run() on different threads using the same session object ")
+        # Skip this test for a "pure" DML onnxruntime python wheel.
+        # We keep this test enabled for instances where both DML and CUDA EPs are available
+        # (Windows GPU CI pipeline has this config) - this test will pass because CUDA has higher precedence
+        # than DML and the nodes are assigned to only the CUDA EP (which supports this test).
+        if 'DmlExecutionProvider' in available_providers and 'CUDAExecutionProvider' not in available_providers:
+            print("Skipping testRunModelMultipleThreads as the DML EP does not support calling Run()"
+                  " on different threads using the same session object.")
        else:
            so = onnxrt.SessionOptions()
            so.log_verbosity_level = 1
            so.logid = "MultiThreadsTest"
-            sess = onnxrt.InferenceSession(get_name("mul_1.onnx"), sess_options=so, providers=available_providers)
+            sess = onnxrt.InferenceSession(get_name("mul_1.onnx"), sess_options=so,
+                                           providers=available_providers_without_tvm)
            ro1 = onnxrt.RunOptions()
            ro1.logid = "thread1"
            t1 = threading.Thread(target=self.run_model, args=(sess, ro1))
@ -415,7 +429,7 @@ class TestInferenceSession(unittest.TestCase):
        np.testing.assert_allclose(output_expected, res[0], rtol=1e-05, atol=1e-08)

    def testStringListAsInput(self):
-        sess = onnxrt.InferenceSession(get_name("identity_string.onnx"), providers=onnxrt.get_available_providers())
+        sess = onnxrt.InferenceSession(get_name("identity_string.onnx"), providers=available_providers_without_tvm)
        x = np.array(['this', 'is', 'identity', 'test'], dtype=str).reshape((2, 2))
        x_name = sess.get_inputs()[0].name
        res = sess.run([], {x_name: x.tolist()})
@ -426,7 +440,7 @@ class TestInferenceSession(unittest.TestCase):
        self.assertTrue('CPU' in device or 'GPU' in device)

    def testRunModelSymbolicInput(self):
-        sess = onnxrt.InferenceSession(get_name("matmul_2.onnx"), providers=available_providers)
+        sess = onnxrt.InferenceSession(get_name("matmul_2.onnx"), providers=available_providers_without_tvm)
        x = np.array([[1.0, 2.0], [3.0, 4.0], [5.0, 6.0]], dtype=np.float32)
        input_name = sess.get_inputs()[0].name
        self.assertEqual(input_name, "X")
@ -475,7 +489,7 @@ class TestInferenceSession(unittest.TestCase):
        np.testing.assert_equal(output_expected, res[0])

    def testStringInput1(self):
-        sess = onnxrt.InferenceSession(get_name("identity_string.onnx"), providers=onnxrt.get_available_providers())
+        sess = onnxrt.InferenceSession(get_name("identity_string.onnx"), providers=available_providers_without_tvm)
        x = np.array(['this', 'is', 'identity', 'test'], dtype=str).reshape((2, 2))

        x_name = sess.get_inputs()[0].name
@ -496,7 +510,7 @@ class TestInferenceSession(unittest.TestCase):
        np.testing.assert_equal(x, res[0])

    def testStringInput2(self):
-        sess = onnxrt.InferenceSession(get_name("identity_string.onnx"), providers=onnxrt.get_available_providers())
+        sess = onnxrt.InferenceSession(get_name("identity_string.onnx"), providers=available_providers_without_tvm)
        x = np.array(['Olá', '你好', '여보세요', 'hello'], dtype=str).reshape((2, 2))

        x_name = sess.get_inputs()[0].name
@ -517,7 +531,7 @@ class TestInferenceSession(unittest.TestCase):
        np.testing.assert_equal(x, res[0])

    def testInputBytes(self):
-        sess = onnxrt.InferenceSession(get_name("identity_string.onnx"), providers=available_providers)
+        sess = onnxrt.InferenceSession(get_name("identity_string.onnx"), providers=available_providers_without_tvm)
        x = np.array([b'this', b'is', b'identity', b'test']).reshape((2, 2))

        x_name = sess.get_inputs()[0].name
@ -538,7 +552,7 @@ class TestInferenceSession(unittest.TestCase):
        np.testing.assert_equal(x, res[0].astype('|S8'))

    def testInputObject(self):
-        sess = onnxrt.InferenceSession(get_name("identity_string.onnx"), providers=available_providers)
+        sess = onnxrt.InferenceSession(get_name("identity_string.onnx"), providers=available_providers_without_tvm)
        x = np.array(['this', 'is', 'identity', 'test'], object).reshape((2, 2))

        x_name = sess.get_inputs()[0].name
@ -559,7 +573,7 @@ class TestInferenceSession(unittest.TestCase):
        np.testing.assert_equal(x, res[0])

    def testInputVoid(self):
-        sess = onnxrt.InferenceSession(get_name("identity_string.onnx"), providers=available_providers)
+        sess = onnxrt.InferenceSession(get_name("identity_string.onnx"), providers=available_providers_without_tvm)
        # numpy 1.20+ doesn't automatically pad the bytes based entries in the array when dtype is np.void,
        # so we use inputs where that is the case
        x = np.array([b'must', b'have', b'same', b'size'], dtype=np.void).reshape((2, 2))
@ -654,7 +668,7 @@ class TestInferenceSession(unittest.TestCase):

    def testSequenceLength(self):
        sess = onnxrt.InferenceSession(get_name("sequence_length.onnx"),
-                                       providers=onnxrt.get_available_providers())
+                                       providers=available_providers_without_tvm)
        x = [
            np.array([1.0, 0.0, 3.0, 44.0, 23.0, 11.0], dtype=np.float32).reshape((2, 3)),
            np.array([1.0, 0.0, 3.0, 44.0, 23.0, 11.0], dtype=np.float32).reshape((2, 3))
@ -676,7 +690,7 @@ class TestInferenceSession(unittest.TestCase):

    def testSequenceConstruct(self):
        sess = onnxrt.InferenceSession(get_name("sequence_construct.onnx"),
-                                       providers=available_providers)
+                                       providers=available_providers_without_tvm)

        self.assertEqual(sess.get_inputs()[0].type, 'tensor(int64)')
        self.assertEqual(sess.get_inputs()[1].type, 'tensor(int64)')
@ -706,7 +720,7 @@ class TestInferenceSession(unittest.TestCase):
        opt = onnxrt.SessionOptions()
        opt.execution_mode = onnxrt.ExecutionMode.ORT_SEQUENTIAL
        sess = onnxrt.InferenceSession(get_name("sequence_insert.onnx"), sess_options=opt,
-                                       providers=onnxrt.get_available_providers())
+                                       providers=available_providers_without_tvm)

        self.assertEqual(sess.get_inputs()[0].type, 'seq(tensor(int64))')
        self.assertEqual(sess.get_inputs()[1].type, 'tensor(int64)')
@ -837,10 +851,8 @@ class TestInferenceSession(unittest.TestCase):
        so1 = onnxrt.SessionOptions()
        so1.register_custom_ops_library(shared_library)

-        available_providers = onnxrt.get_available_providers()
-
        # Model loading successfully indicates that the custom op node could be resolved successfully
-        sess1 = onnxrt.InferenceSession(custom_op_model, sess_options=so1, providers=available_providers)
+        sess1 = onnxrt.InferenceSession(custom_op_model, sess_options=so1, providers=available_providers_without_tvm)
        #Run with input data
        input_name_0 = sess1.get_inputs()[0].name
        input_name_1 = sess1.get_inputs()[1].name
@ -856,12 +868,12 @@ class TestInferenceSession(unittest.TestCase):
        so2 = so1

        # Model loading successfully indicates that the custom op node could be resolved successfully
-        sess2 = onnxrt.InferenceSession(custom_op_model, sess_options=so2, providers=available_providers)
+        sess2 = onnxrt.InferenceSession(custom_op_model, sess_options=so2, providers=available_providers_without_tvm)

        # Create another SessionOptions instance with the same shared library referenced
        so3 = onnxrt.SessionOptions()
        so3.register_custom_ops_library(shared_library)
-        sess3 = onnxrt.InferenceSession(custom_op_model, sess_options=so3, providers=available_providers)
+        sess3 = onnxrt.InferenceSession(custom_op_model, sess_options=so3, providers=available_providers_without_tvm)

    def testOrtValue(self):

--- a/setup.py
+++ b/setup.py
@ -12,7 +12,6 @@ from shutil import copyfile
 import platform
 import subprocess
 import sys
-import textwrap
 import datetime

 from pathlib import Path
@ -146,33 +145,6 @@ try:
                    f.write('    import os\n')
                    f.write('    os.environ["ORT_TENSORRT_UNAVAILABLE"] = "1"\n')

-        def _rewrite_ld_preload_tvm(self):
-            with open('onnxruntime/capi/_ld_preload.py', 'a') as f:
-                f.write(textwrap.dedent(
-                    """
-                    import warnings
-
-                    try:
-                        # This import is necessary in order to delegate the loading of libtvm.so to TVM.
-                        import tvm
-                    except ImportError as e:
-                        warnings.warn(
-                            f"WARNING: Failed to import TVM, libtvm.so was not loaded. More details: {e}"
-                        )
-                    try:
-                        # Working between the C++ and Python parts in TVM EP is done using the PackedFunc and
-                        # Registry classes. In order to use a Python function in C++ code, it must be registered in
-                        # the global table of functions. Registration is carried out through the JIT interface,
-                        # so it is necessary to call special functions for registration.
-                        # To do this, we need to make the following import.
-                        import onnxruntime.providers.tvm
-                    except ImportError as e:
-                        warnings.warn(
-                            f"WARNING: Failed to register python functions to work with TVM EP. More details: {e}"
-                        )
-                    """
-                ))
-
        def run(self):
            if is_manylinux:
                source = 'onnxruntime/capi/onnxruntime_pybind11_state.so'
@ -235,8 +207,6 @@ try:
                self._rewrite_ld_preload(to_preload)
                self._rewrite_ld_preload_cuda(to_preload_cuda)
                self._rewrite_ld_preload_tensorrt(to_preload_tensorrt)
-            if package_name == 'onnxruntime-tvm':
-                self._rewrite_ld_preload_tvm()
            _bdist_wheel.run(self)
            if is_manylinux and not disable_auditwheel_repair:
                file = glob(path.join(self.dist_dir, '*linux*.whl'))[0]
--- a/tools/ci_build/build.py
+++ b/tools/ci_build/build.py
@ -1593,6 +1593,10 @@ def run_onnxruntime_tests(args, source_dir, ctest_path, build_dir, configs):
            if args.use_tensorrt:
                return

+            python_path = None
+            if args.use_tvm:
+                python_path = os.path.join(build_dir, config, "_deps", "tvm-src", "python")
+
            # Disable python tests in a reduced build as we don't know which ops have been included and which
            # models can run.
            if is_reduced_ops_build(args) or args.minimal_build is not None:
@ -1601,7 +1605,8 @@ def run_onnxruntime_tests(args, source_dir, ctest_path, build_dir, configs):
            if is_windows():
                cwd = os.path.join(cwd, config)

-            run_subprocess([sys.executable, 'onnxruntime_test_python.py'], cwd=cwd, dll_path=dll_path)
+            run_subprocess([sys.executable, 'onnxruntime_test_python.py'],
+                           cwd=cwd, dll_path=dll_path, python_path=python_path)

            if not args.disable_contrib_ops:
                run_subprocess([sys.executable, 'onnxruntime_test_python_sparse_matmul.py'],
@ -1649,7 +1654,8 @@ def run_onnxruntime_tests(args, source_dir, ctest_path, build_dir, configs):
                onnx_test = False

            if onnx_test:
-                run_subprocess([sys.executable, 'onnxruntime_test_python_backend.py'], cwd=cwd, dll_path=dll_path)
+                run_subprocess([sys.executable, 'onnxruntime_test_python_backend.py'], cwd=cwd, dll_path=dll_path,
+                               python_path=python_path)
                if not args.disable_contrib_ops:
                    run_subprocess([sys.executable, '-m', 'unittest', 'discover', '-s', 'quantization'],
                                   cwd=cwd, dll_path=dll_path)
@ -2055,7 +2061,7 @@ def main():
    if args.use_migraphx:
        args.use_rocm = True

-    if args.build_wheel or args.gen_doc:
+    if args.build_wheel or args.gen_doc or args.use_tvm:
        args.enable_pybind = True

    if args.build_csharp or args.build_nuget or args.build_java or args.build_nodejs:
--- a/tools/ci_build/github/azure-pipelines/linux-tvm-ci-pipeline.yml
+++ b/tools/ci_build/github/azure-pipelines/linux-tvm-ci-pipeline.yml
@ -0,0 +1,61 @@
+jobs:
+  - job: Linux_TVM_CI
+    timeoutInMinutes: 180
+    workspace:
+      clean: all
+    pool: Linux-CPU-2019
+    steps:
+      - checkout: self
+        clean: true
+        submodules: recursive
+
+      - template: templates/get-docker-image-steps.yml
+        parameters:
+          Dockerfile: tools/ci_build/github/linux/docker/Dockerfile.ubuntu
+          Context: tools/ci_build/github/linux/docker
+          DockerBuildArgs: "--build-arg BUILD_UID=$( id -u )"
+          Repository: onnxruntimecpubuild
+
+      - task: CmdLine@2
+        inputs:
+          script: |
+            mkdir -p $HOME/.onnx
+            docker run --rm \
+              --volume /data/onnx:/data/onnx:ro \
+              --volume $(Build.SourcesDirectory):/onnxruntime_src \
+              --volume $(Build.BinariesDirectory):/build \
+              --volume /data/models:/build/models:ro \
+              --volume $HOME/.onnx:/home/onnxruntimedev/.onnx \
+              -e NIGHTLY_BUILD \
+              -e BUILD_BUILDNUMBER \
+              onnxruntimecpubuild \
+                /bin/bash -c \
+                  "/onnxruntime_src/tools/ci_build/github/linux/tvm/install_tvm_test_dependencies.sh \
+                    python3 && \
+                  python3 /onnxruntime_src/tools/ci_build/build.py \
+                    --build_dir /build \
+                    --config Release \
+                    --skip_submodule_sync \
+                    --parallel \
+                    --enable_pybind \
+                    --disable_contrib_ops \
+                    --disable_ml_ops \
+                    --skip_onnx_tests \
+                    --use_tvm"
+          workingDirectory: $(Build.SourcesDirectory)
+
+      - task: PublishTestResults@2
+        displayName: 'Publish unit test results'
+        inputs:
+          testResultsFiles: '**/*.results.xml'
+          searchFolder: '$(Build.BinariesDirectory)'
+          testRunTitle: 'Unit Test Run'
+        condition: succeededOrFailed()
+
+      - template: templates/component-governance-component-detection-steps.yml
+        parameters:
+          condition: 'succeeded'
+
+      - task: mspremier.PostBuildCleanup.PostBuildCleanup-task.PostBuildCleanup@3
+        displayName: 'Clean Agent Directories'
+        condition: always()
--- a/tools/ci_build/github/linux/tvm/install_tvm_test_dependencies.sh
+++ b/tools/ci_build/github/linux/tvm/install_tvm_test_dependencies.sh
@ -0,0 +1,5 @@
+#!/bin/bash
+set -e -x
+
+PYTHON_EXE=$1
+${PYTHON_EXE} -m pip install decorator scipy