From 7dc7529ec8e4384bddcd090f0c14c42c98cf37a1 Mon Sep 17 00:00:00 2001
From: Alexey Gladyshev <wotpricol@mail.ru>
Date: Thu, 24 Feb 2022 18:24:23 +0300
Subject: [PATCH] [TVM EP] Integrate tests for TVM EP into public onnxruntime
 CI (#10505)

* add support for bool type

* add TVM EP support for tests

* include TVM EP in python test pool

* fix pylint

* moved technical imports to a separate file

* clean up post build actions & move _ld_preload.py extension to CMake level

* add files for include TVM EP into CI

* implement custom logger for TVM

* replace TVM logging with ONNX RT logging

* update link for TVM EP tutorial

* clean up TVM EP cmake

* add pybind auto enabling for TVM EP

* fix blank spaces

* code review fixes

* replace print with comment

* add list of EP without TVM EP

* enable onnx tests

* disable contrib ops and ml ops

* reuse Dockerfile.ubuntu

* Move install_tvm_test_dependencies.sh out of Docker context dir, update build definition.

Co-authored-by: Edward Chen <18449977+edgchen1@users.noreply.github.com>
---
 cmake/CMakeLists.txt                          |  4 ++
 cmake/external/tvm.cmake                      |  1 +
 cmake/onnxruntime_python.cmake                | 21 +++----
 cmake/onnxruntime_unittests.cmake             |  8 ---
 docs/TVM_EP.md                                |  4 +-
 .../core/providers/tvm/custom_logging.cc      | 41 +++++++++++++
 .../providers/tvm/tvm_execution_provider.cc   |  7 +--
 .../providers/tvm/tvm_execution_provider.h    |  2 +-
 onnxruntime/core/providers/tvm/tvm_utils.h    |  2 +
 .../providers/tvm/extend_python_file.py       | 47 ++++++++++++++
 .../test/python/onnxruntime_test_python.py    | 60 ++++++++++--------
 setup.py                                      | 30 ---------
 tools/ci_build/build.py                       | 12 +++-
 .../azure-pipelines/linux-tvm-ci-pipeline.yml | 61 +++++++++++++++++++
 .../tvm/install_tvm_test_dependencies.sh      |  5 ++
 15 files changed, 220 insertions(+), 85 deletions(-)
 create mode 100644 onnxruntime/core/providers/tvm/custom_logging.cc
 create mode 100644 onnxruntime/python/providers/tvm/extend_python_file.py
 create mode 100644 tools/ci_build/github/azure-pipelines/linux-tvm-ci-pipeline.yml
 create mode 100755 tools/ci_build/github/linux/tvm/install_tvm_test_dependencies.sh

diff --git a/cmake/CMakeLists.txt b/cmake/CMakeLists.txt
index 6ae60eb453..28ccea6473 100644
--- a/cmake/CMakeLists.txt
+++ b/cmake/CMakeLists.txt
@@ -1415,10 +1415,14 @@ if (onnxruntime_USE_TVM)
     set(USE_CUDA ON CACHE BOOL "Only defined for TVM" FORCE)
   endif()
 
+  add_compile_definitions(TVM_LOG_CUSTOMIZE=1)
+  add_library(tvm_custom_logger STATIC ${ONNXRUNTIME_ROOT}/core/providers/tvm/custom_logging.cc)
+
   set(USE_OPENMP gnu CACHE STRING "Only defined for TVM")
   add_subdirectory(${tvm_SOURCE_DIR} ${tvm_BINARY_DIR} EXCLUDE_FROM_ALL)
 
   set_target_properties(tvm PROPERTIES FOLDER ${tvm_SOURCE_DIR})
+  target_link_libraries(tvm PUBLIC tvm_custom_logger)
 
   set(TVM_INCLUDES ${tvm_SOURCE_DIR}/include
     ${tvm_SOURCE_DIR}/3rdparty/dmlc-core/include
diff --git a/cmake/external/tvm.cmake b/cmake/external/tvm.cmake
index d3c1a1900e..7a4ab8179b 100644
--- a/cmake/external/tvm.cmake
+++ b/cmake/external/tvm.cmake
@@ -10,6 +10,7 @@ if (onnxruntime_USE_TVM)
   FetchContent_GetProperties(tvm)
   if(NOT tvm_POPULATED)
     FetchContent_Populate(tvm)
+    file(CREATE_LINK ${tvm_BINARY_DIR} ${tvm_SOURCE_DIR}/build SYMBOLIC)
   endif()
 
   set(tvm_INCLUDE_DIRS ${tvm_SOURCE_DIR}/include)
diff --git a/cmake/onnxruntime_python.cmake b/cmake/onnxruntime_python.cmake
index a10ed54e5e..746ec55d47 100644
--- a/cmake/onnxruntime_python.cmake
+++ b/cmake/onnxruntime_python.cmake
@@ -716,26 +716,21 @@ if (onnxruntime_USE_TVM)
     COMMAND ${CMAKE_COMMAND} -E copy
         $<TARGET_FILE:onnxruntime_providers_tvm>
         $<TARGET_FILE_DIR:${build_output_target}>/onnxruntime/capi/
-    # TODO(vvchernov): why?
-    COMMAND ${CMAKE_COMMAND} -E copy
-        ${tvm_BINARY_DIR}/libtvm*
-        ${tvm_SOURCE_DIR}/python/tvm
   )
 
-  # TODO(vvchernov): repeat?
   add_custom_command(
     TARGET onnxruntime_pybind11_state POST_BUILD
       WORKING_DIRECTORY ${tvm_SOURCE_DIR}/python
-      COMMAND ${Python_EXECUTABLE} setup.py build_ext --inplace
-      COMMAND ${CMAKE_COMMAND} -E rm
-        ${tvm_SOURCE_DIR}/python/tvm/*.so
-      COMMAND ${CMAKE_COMMAND} -E env TVM_LIBRARY_PATH=${tvm_BINARY_DIR}
-          ${Python_EXECUTABLE} setup.py bdist_wheel
-      COMMAND ${CMAKE_COMMAND} -E copy
-        ${tvm_BINARY_DIR}/libtvm*
-        ${tvm_SOURCE_DIR}/python/tvm
+      COMMAND ${Python_EXECUTABLE} setup.py bdist_wheel
     )
 
+  add_custom_command(
+    TARGET onnxruntime_pybind11_state POST_BUILD
+    COMMAND ${Python_EXECUTABLE}
+          $<TARGET_FILE_DIR:${build_output_target}>/onnxruntime/providers/tvm/extend_python_file.py
+          --target_file $<TARGET_FILE_DIR:${build_output_target}>/onnxruntime/capi/_ld_preload.py
+  )
+
 endif()
 
 if (onnxruntime_USE_DML)
diff --git a/cmake/onnxruntime_unittests.cmake b/cmake/onnxruntime_unittests.cmake
index b074d10028..eb132632f5 100644
--- a/cmake/onnxruntime_unittests.cmake
+++ b/cmake/onnxruntime_unittests.cmake
@@ -1291,12 +1291,4 @@ if (NOT onnxruntime_MINIMAL_BUILD AND NOT onnxruntime_EXTENDED_MINIMAL_BUILD
   endif()
 endif()
 
-if (onnxruntime_USE_TVM)
-  # find_library(TVM_LIBS NAMES libtvm.so PATHS ${tvm_SOURCE_DIR}/lib)
-  # link_directories(onnxruntime_test_all ${TVM_LIBS})
-  find_library(PYTHON_LIBS NAMES libpython PATHS /usr/local/lib)
-  #target_link_libraries(onnxruntime_test_all PRIVATE ${PYTHON_LIBRARIES} -lutil)
-  # set(CMAKE_SHARED_LINKER_FLAGS "-Wl,-rpath,${TVM_LIBS}")
-endif()
-
 include(onnxruntime_fuzz_test.cmake)
diff --git a/docs/TVM_EP.md b/docs/TVM_EP.md
index a80f8d2c47..dc4273433b 100644
--- a/docs/TVM_EP.md
+++ b/docs/TVM_EP.md
@@ -69,7 +69,7 @@ python3 -m pip install $whl_path
 Package for TVM EP:
 ```bash
 cd <path_to_onnx_runtime>
-python3 -m pip3 uninstall onnxruntime onnxruntime-tvm -y
+python3 -m pip uninstall onnxruntime onnxruntime-tvm -y
 whl_path=$(find ./build/<OS_NAME>/Release/dist -name "*.whl")
 python3 -m pip install $whl_path
 ```
@@ -126,7 +126,7 @@ tvm_session = onnxruntime.InferenceSession(model_path, sess_options=so, provider
 ```
 
 ## Samples
-- [Sample notebook for ResNet50 inference with TVM EP](https://github.com/octoml/onnxruntime/blob/vc/rename/docs/python/inference/notebooks/onnxruntime-tvm-tutorial.ipynb)
+- [Sample notebook for ResNet50 inference with TVM EP](https://github.com/microsoft/onnxruntime/blob/master/docs/python/inference/notebooks/onnxruntime-tvm-tutorial.ipynb)
 
 ## Known issues
 - At this moment, the TVM EP has only been verified on UNIX/Linux systems.
diff --git a/onnxruntime/core/providers/tvm/custom_logging.cc b/onnxruntime/core/providers/tvm/custom_logging.cc
new file mode 100644
index 0000000000..08053e456a
--- /dev/null
+++ b/onnxruntime/core/providers/tvm/custom_logging.cc
@@ -0,0 +1,41 @@
+// Copyright (c) Microsoft Corporation. All rights reserved.
+// Licensed under the MIT License.
+//
+// Enable custom logging - this will cause TVM to use a custom implementation
+// of tvm::runtime::detail::LogMessage. We use this to change the absolute
+// file path to relative file path.
+
+#include <ctime>
+#include <iomanip>
+#include <iostream>
+#include <string>
+#include <vector>
+
+// TODO(agladyshev): Make conditional choice of sep for Windows and UNIX
+std::string GetFileName(const std::string& file_path, char sep = '/') {
+    return {std::next(file_path.begin(), file_path.find_last_of(sep) + 1),
+            file_path.end()};
+}
+
+std::string GetTimedLogMessage(const std::string& file, int lineno, const std::string& message) {
+    std::stringstream sstream;
+    std::string file_name = GetFileName(file);
+    std::time_t t = std::time(nullptr);
+    sstream << "[" << std::put_time(std::localtime(&t), "%H:%M:%S") << "][TVM] "
+            << file_name << ":" << lineno << ": " + message;
+    return sstream.str();
+}
+
+namespace tvm {
+namespace runtime {
+namespace detail {
+    void LogFatalImpl(const std::string& file, int lineno, const std::string& message) {
+        throw std::runtime_error(GetTimedLogMessage(file, lineno, message));
+    }
+
+    void LogMessageImpl(const std::string& file, int lineno, const std::string& message) {
+        std::cerr << GetTimedLogMessage(file, lineno, message) << std::endl;
+    }
+}  // namespace detail
+}  // namespace runtime
+}  // namespace tvm
diff --git a/onnxruntime/core/providers/tvm/tvm_execution_provider.cc b/onnxruntime/core/providers/tvm/tvm_execution_provider.cc
index 3b25b3cfbd..6628d28213 100644
--- a/onnxruntime/core/providers/tvm/tvm_execution_provider.cc
+++ b/onnxruntime/core/providers/tvm/tvm_execution_provider.cc
@@ -297,6 +297,7 @@ TvmExecutionProvider::GetCapability(const GraphViewer& graph_viewer,
 
 common::Status TvmExecutionProvider::Compile(const std::vector<Node*>& nodes,
                                               std::vector<NodeComputeInfo>& node_compute_funcs) {
+  PrintProviderOptions();
   for (auto* fused_node : nodes) {
     auto func_body = fused_node->GetFunctionBody();
     if (!func_body)
@@ -446,8 +447,6 @@ void TvmExecutionProvider::ProcessInfo() {
   if(info_.opt_level < 1) {
     info_.opt_level = default_opt_level;
   }
-
-  PrintInfo();
 }
 
 void TvmExecutionProvider::ProcessCPUTarget() {
@@ -471,8 +470,8 @@ void TvmExecutionProvider::ProcessGPUTarget() {
   ORT_NOT_IMPLEMENTED("GPU target auto-defenition is not implemented now!");
 }
 
-void TvmExecutionProvider::PrintInfo() const {
-  LOG(INFO) << "TVM EP options:\n" <<
+void TvmExecutionProvider::PrintProviderOptions() const {
+  LOGS(*GetLogger(), INFO) << "TVM EP options:\n" <<
   "target: " << info_.target << "\n" <<
   "target_host: " << info_.target_host << "\n" <<
   "opt level: " << info_.opt_level << "\n" <<
diff --git a/onnxruntime/core/providers/tvm/tvm_execution_provider.h b/onnxruntime/core/providers/tvm/tvm_execution_provider.h
index 633a872e41..6a5d2a2b4c 100644
--- a/onnxruntime/core/providers/tvm/tvm_execution_provider.h
+++ b/onnxruntime/core/providers/tvm/tvm_execution_provider.h
@@ -52,7 +52,7 @@ class TvmExecutionProvider : public IExecutionProvider {
   void ProcessInfo();
   void ProcessCPUTarget();
   void ProcessGPUTarget();
-  void PrintInfo() const;
+  void PrintProviderOptions() const;
   // Bindings for compute info
   int CreateStateFunc(ComputeContext*, FunctionState*);
   TvmModule* CompileFunc(std::string func_name, const TVMTensorShapes& input_shapes);
diff --git a/onnxruntime/core/providers/tvm/tvm_utils.h b/onnxruntime/core/providers/tvm/tvm_utils.h
index 15f51c3fd4..ab0e8da565 100644
--- a/onnxruntime/core/providers/tvm/tvm_utils.h
+++ b/onnxruntime/core/providers/tvm/tvm_utils.h
@@ -23,6 +23,8 @@ inline DLDataType GetDataType(ONNXTensorElementDataType type) {
     return {kDLInt, 64, 1};
   } else if (type == ONNX_TENSOR_ELEMENT_DATA_TYPE_INT32) {
     return {kDLInt, 32, 1};
+  } else if (type == ONNX_TENSOR_ELEMENT_DATA_TYPE_BOOL) {
+    return {kDLUInt, 1, 1};
   } else {
     ORT_NOT_IMPLEMENTED("Unsupported data type");
   }
diff --git a/onnxruntime/python/providers/tvm/extend_python_file.py b/onnxruntime/python/providers/tvm/extend_python_file.py
new file mode 100644
index 0000000000..96beb113d6
--- /dev/null
+++ b/onnxruntime/python/providers/tvm/extend_python_file.py
@@ -0,0 +1,47 @@
+# -------------------------------------------------------------------------
+# Copyright (c) Microsoft Corporation.  All rights reserved.
+# Licensed under the MIT License.  See License.txt in the project root for
+# license information.
+# --------------------------------------------------------------------------
+
+import argparse
+import textwrap
+
+
+def rewrite_target_file(target):
+    with open(target, 'a') as f:
+        f.write(textwrap.dedent(
+            """
+            import warnings
+
+            try:
+                # This import is necessary in order to delegate the loading of libtvm.so to TVM.
+                import tvm
+            except ImportError as e:
+                warnings.warn(
+                    f"WARNING: Failed to import TVM, libtvm.so was not loaded. More details: {e}"
+                )
+            try:
+                # Working between the C++ and Python parts in TVM EP is done using the PackedFunc and
+                # Registry classes. In order to use a Python function in C++ code, it must be registered in
+                # the global table of functions. Registration is carried out through the JIT interface,
+                # so it is necessary to call special functions for registration.
+                # To do this, we need to make the following import.
+                import onnxruntime.providers.tvm
+            except ImportError as e:
+                warnings.warn(
+                    f"WARNING: Failed to register python functions to work with TVM EP. More details: {e}"
+                )
+            """
+        ))
+
+
+def main():
+    parser = argparse.ArgumentParser()
+    parser.add_argument("--target_file", type=str, required=True, help="Path to the file to be expanded.")
+    args = parser.parse_args()
+    rewrite_target_file(args.target_file)
+
+
+if __name__ == '__main__':
+    main()
diff --git a/onnxruntime/test/python/onnxruntime_test_python.py b/onnxruntime/test/python/onnxruntime_test_python.py
index af4abb05ac..e6e982aff1 100644
--- a/onnxruntime/test/python/onnxruntime_test_python.py
+++ b/onnxruntime/test/python/onnxruntime_test_python.py
@@ -18,7 +18,20 @@ from onnxruntime.capi.onnxruntime_pybind11_state import Fail
 if platform.system() == 'Windows' and sys.version_info.major >= 3 and sys.version_info.minor >= 8:
     os.add_dll_directory(os.getcwd())
 
-available_providers = [
+available_providers = [provider for provider in onnxrt.get_available_providers()]
+
+# TVM EP doesn't support:
+# * calling Run() on different threads using the same session object
+# * symbolic inputs
+# * string inputs
+# * byte type inputs
+# * object type inputs
+# * void type inputs
+# * SequenceConstruct operator
+# * custom operators
+# * testSequenceInsert
+# * testSequenceLength
+available_providers_without_tvm = [
     provider for provider in onnxrt.get_available_providers()
     if provider not in {'TvmExecutionProvider'}]
 
@@ -383,18 +396,19 @@ class TestInferenceSession(unittest.TestCase):
         np.testing.assert_allclose(output_expected, rescontiguous[0], rtol=1e-05, atol=1e-08)
 
     def testRunModelMultipleThreads(self):
-        available_providers = onnxrt.get_available_providers()
-
-        # Skip this test for a "pure" DML onnxruntime python wheel. We keep this test enabled for instances where both DML and CUDA
-        # EPs are available (Windows GPU CI pipeline has this config) - this test will pass because CUDA has higher precendence than DML
-        # and the nodes are assigned to only the CUDA EP (which supports this test)
-        if ('DmlExecutionProvider' in available_providers and not 'CUDAExecutionProvider' in available_providers):
-            print("Skipping testRunModelMultipleThreads as the DML EP does not support calling Run() on different threads using the same session object ")
+        # Skip this test for a "pure" DML onnxruntime python wheel.
+        # We keep this test enabled for instances where both DML and CUDA EPs are available
+        # (Windows GPU CI pipeline has this config) - this test will pass because CUDA has higher precedence
+        # than DML and the nodes are assigned to only the CUDA EP (which supports this test).
+        if 'DmlExecutionProvider' in available_providers and 'CUDAExecutionProvider' not in available_providers:
+            print("Skipping testRunModelMultipleThreads as the DML EP does not support calling Run()"
+                  " on different threads using the same session object.")
         else:
             so = onnxrt.SessionOptions()
             so.log_verbosity_level = 1
             so.logid = "MultiThreadsTest"
-            sess = onnxrt.InferenceSession(get_name("mul_1.onnx"), sess_options=so, providers=available_providers)
+            sess = onnxrt.InferenceSession(get_name("mul_1.onnx"), sess_options=so,
+                                           providers=available_providers_without_tvm)
             ro1 = onnxrt.RunOptions()
             ro1.logid = "thread1"
             t1 = threading.Thread(target=self.run_model, args=(sess, ro1))
@@ -415,7 +429,7 @@ class TestInferenceSession(unittest.TestCase):
         np.testing.assert_allclose(output_expected, res[0], rtol=1e-05, atol=1e-08)
 
     def testStringListAsInput(self):
-        sess = onnxrt.InferenceSession(get_name("identity_string.onnx"), providers=onnxrt.get_available_providers())
+        sess = onnxrt.InferenceSession(get_name("identity_string.onnx"), providers=available_providers_without_tvm)
         x = np.array(['this', 'is', 'identity', 'test'], dtype=str).reshape((2, 2))
         x_name = sess.get_inputs()[0].name
         res = sess.run([], {x_name: x.tolist()})
@@ -426,7 +440,7 @@ class TestInferenceSession(unittest.TestCase):
         self.assertTrue('CPU' in device or 'GPU' in device)
 
     def testRunModelSymbolicInput(self):
-        sess = onnxrt.InferenceSession(get_name("matmul_2.onnx"), providers=available_providers)
+        sess = onnxrt.InferenceSession(get_name("matmul_2.onnx"), providers=available_providers_without_tvm)
         x = np.array([[1.0, 2.0], [3.0, 4.0], [5.0, 6.0]], dtype=np.float32)
         input_name = sess.get_inputs()[0].name
         self.assertEqual(input_name, "X")
@@ -475,7 +489,7 @@ class TestInferenceSession(unittest.TestCase):
         np.testing.assert_equal(output_expected, res[0])
 
     def testStringInput1(self):
-        sess = onnxrt.InferenceSession(get_name("identity_string.onnx"), providers=onnxrt.get_available_providers())
+        sess = onnxrt.InferenceSession(get_name("identity_string.onnx"), providers=available_providers_without_tvm)
         x = np.array(['this', 'is', 'identity', 'test'], dtype=str).reshape((2, 2))
 
         x_name = sess.get_inputs()[0].name
@@ -496,7 +510,7 @@ class TestInferenceSession(unittest.TestCase):
         np.testing.assert_equal(x, res[0])
 
     def testStringInput2(self):
-        sess = onnxrt.InferenceSession(get_name("identity_string.onnx"), providers=onnxrt.get_available_providers())
+        sess = onnxrt.InferenceSession(get_name("identity_string.onnx"), providers=available_providers_without_tvm)
         x = np.array(['Olá', '你好', '여보세요', 'hello'], dtype=str).reshape((2, 2))
 
         x_name = sess.get_inputs()[0].name
@@ -517,7 +531,7 @@ class TestInferenceSession(unittest.TestCase):
         np.testing.assert_equal(x, res[0])
 
     def testInputBytes(self):
-        sess = onnxrt.InferenceSession(get_name("identity_string.onnx"), providers=available_providers)
+        sess = onnxrt.InferenceSession(get_name("identity_string.onnx"), providers=available_providers_without_tvm)
         x = np.array([b'this', b'is', b'identity', b'test']).reshape((2, 2))
 
         x_name = sess.get_inputs()[0].name
@@ -538,7 +552,7 @@ class TestInferenceSession(unittest.TestCase):
         np.testing.assert_equal(x, res[0].astype('|S8'))
 
     def testInputObject(self):
-        sess = onnxrt.InferenceSession(get_name("identity_string.onnx"), providers=available_providers)
+        sess = onnxrt.InferenceSession(get_name("identity_string.onnx"), providers=available_providers_without_tvm)
         x = np.array(['this', 'is', 'identity', 'test'], object).reshape((2, 2))
 
         x_name = sess.get_inputs()[0].name
@@ -559,7 +573,7 @@ class TestInferenceSession(unittest.TestCase):
         np.testing.assert_equal(x, res[0])
 
     def testInputVoid(self):
-        sess = onnxrt.InferenceSession(get_name("identity_string.onnx"), providers=available_providers)
+        sess = onnxrt.InferenceSession(get_name("identity_string.onnx"), providers=available_providers_without_tvm)
         # numpy 1.20+ doesn't automatically pad the bytes based entries in the array when dtype is np.void,
         # so we use inputs where that is the case
         x = np.array([b'must', b'have', b'same', b'size'], dtype=np.void).reshape((2, 2))
@@ -654,7 +668,7 @@ class TestInferenceSession(unittest.TestCase):
 
     def testSequenceLength(self):
         sess = onnxrt.InferenceSession(get_name("sequence_length.onnx"),
-                                       providers=onnxrt.get_available_providers())
+                                       providers=available_providers_without_tvm)
         x = [
             np.array([1.0, 0.0, 3.0, 44.0, 23.0, 11.0], dtype=np.float32).reshape((2, 3)),
             np.array([1.0, 0.0, 3.0, 44.0, 23.0, 11.0], dtype=np.float32).reshape((2, 3))
@@ -676,7 +690,7 @@ class TestInferenceSession(unittest.TestCase):
 
     def testSequenceConstruct(self):
         sess = onnxrt.InferenceSession(get_name("sequence_construct.onnx"),
-                                       providers=available_providers)
+                                       providers=available_providers_without_tvm)
 
         self.assertEqual(sess.get_inputs()[0].type, 'tensor(int64)')
         self.assertEqual(sess.get_inputs()[1].type, 'tensor(int64)')
@@ -706,7 +720,7 @@ class TestInferenceSession(unittest.TestCase):
         opt = onnxrt.SessionOptions()
         opt.execution_mode = onnxrt.ExecutionMode.ORT_SEQUENTIAL
         sess = onnxrt.InferenceSession(get_name("sequence_insert.onnx"), sess_options=opt,
-                                       providers=onnxrt.get_available_providers())
+                                       providers=available_providers_without_tvm)
 
         self.assertEqual(sess.get_inputs()[0].type, 'seq(tensor(int64))')
         self.assertEqual(sess.get_inputs()[1].type, 'tensor(int64)')
@@ -837,10 +851,8 @@ class TestInferenceSession(unittest.TestCase):
         so1 = onnxrt.SessionOptions()
         so1.register_custom_ops_library(shared_library)
 
-        available_providers = onnxrt.get_available_providers()
-
         # Model loading successfully indicates that the custom op node could be resolved successfully
-        sess1 = onnxrt.InferenceSession(custom_op_model, sess_options=so1, providers=available_providers)
+        sess1 = onnxrt.InferenceSession(custom_op_model, sess_options=so1, providers=available_providers_without_tvm)
         #Run with input data
         input_name_0 = sess1.get_inputs()[0].name
         input_name_1 = sess1.get_inputs()[1].name
@@ -856,12 +868,12 @@ class TestInferenceSession(unittest.TestCase):
         so2 = so1
 
         # Model loading successfully indicates that the custom op node could be resolved successfully
-        sess2 = onnxrt.InferenceSession(custom_op_model, sess_options=so2, providers=available_providers)
+        sess2 = onnxrt.InferenceSession(custom_op_model, sess_options=so2, providers=available_providers_without_tvm)
 
         # Create another SessionOptions instance with the same shared library referenced
         so3 = onnxrt.SessionOptions()
         so3.register_custom_ops_library(shared_library)
-        sess3 = onnxrt.InferenceSession(custom_op_model, sess_options=so3, providers=available_providers)
+        sess3 = onnxrt.InferenceSession(custom_op_model, sess_options=so3, providers=available_providers_without_tvm)
 
     def testOrtValue(self):
 
diff --git a/setup.py b/setup.py
index 2cee9f58ef..744ccd7031 100644
--- a/setup.py
+++ b/setup.py
@@ -12,7 +12,6 @@ from shutil import copyfile
 import platform
 import subprocess
 import sys
-import textwrap
 import datetime
 
 from pathlib import Path
@@ -146,33 +145,6 @@ try:
                     f.write('    import os\n')
                     f.write('    os.environ["ORT_TENSORRT_UNAVAILABLE"] = "1"\n')
 
-        def _rewrite_ld_preload_tvm(self):
-            with open('onnxruntime/capi/_ld_preload.py', 'a') as f:
-                f.write(textwrap.dedent(
-                    """
-                    import warnings
-
-                    try:
-                        # This import is necessary in order to delegate the loading of libtvm.so to TVM.
-                        import tvm
-                    except ImportError as e:
-                        warnings.warn(
-                            f"WARNING: Failed to import TVM, libtvm.so was not loaded. More details: {e}"
-                        )
-                    try:
-                        # Working between the C++ and Python parts in TVM EP is done using the PackedFunc and
-                        # Registry classes. In order to use a Python function in C++ code, it must be registered in
-                        # the global table of functions. Registration is carried out through the JIT interface,
-                        # so it is necessary to call special functions for registration.
-                        # To do this, we need to make the following import.
-                        import onnxruntime.providers.tvm
-                    except ImportError as e:
-                        warnings.warn(
-                            f"WARNING: Failed to register python functions to work with TVM EP. More details: {e}"
-                        )
-                    """
-                ))
-
         def run(self):
             if is_manylinux:
                 source = 'onnxruntime/capi/onnxruntime_pybind11_state.so'
@@ -235,8 +207,6 @@ try:
                 self._rewrite_ld_preload(to_preload)
                 self._rewrite_ld_preload_cuda(to_preload_cuda)
                 self._rewrite_ld_preload_tensorrt(to_preload_tensorrt)
-            if package_name == 'onnxruntime-tvm':
-                self._rewrite_ld_preload_tvm()
             _bdist_wheel.run(self)
             if is_manylinux and not disable_auditwheel_repair:
                 file = glob(path.join(self.dist_dir, '*linux*.whl'))[0]
diff --git a/tools/ci_build/build.py b/tools/ci_build/build.py
index 401c4b8bc4..7bd961e2af 100644
--- a/tools/ci_build/build.py
+++ b/tools/ci_build/build.py
@@ -1593,6 +1593,10 @@ def run_onnxruntime_tests(args, source_dir, ctest_path, build_dir, configs):
             if args.use_tensorrt:
                 return
 
+            python_path = None
+            if args.use_tvm:
+                python_path = os.path.join(build_dir, config, "_deps", "tvm-src", "python")
+
             # Disable python tests in a reduced build as we don't know which ops have been included and which
             # models can run.
             if is_reduced_ops_build(args) or args.minimal_build is not None:
@@ -1601,7 +1605,8 @@ def run_onnxruntime_tests(args, source_dir, ctest_path, build_dir, configs):
             if is_windows():
                 cwd = os.path.join(cwd, config)
 
-            run_subprocess([sys.executable, 'onnxruntime_test_python.py'], cwd=cwd, dll_path=dll_path)
+            run_subprocess([sys.executable, 'onnxruntime_test_python.py'],
+                           cwd=cwd, dll_path=dll_path, python_path=python_path)
 
             if not args.disable_contrib_ops:
                 run_subprocess([sys.executable, 'onnxruntime_test_python_sparse_matmul.py'],
@@ -1649,7 +1654,8 @@ def run_onnxruntime_tests(args, source_dir, ctest_path, build_dir, configs):
                 onnx_test = False
 
             if onnx_test:
-                run_subprocess([sys.executable, 'onnxruntime_test_python_backend.py'], cwd=cwd, dll_path=dll_path)
+                run_subprocess([sys.executable, 'onnxruntime_test_python_backend.py'], cwd=cwd, dll_path=dll_path,
+                               python_path=python_path)
                 if not args.disable_contrib_ops:
                     run_subprocess([sys.executable, '-m', 'unittest', 'discover', '-s', 'quantization'],
                                    cwd=cwd, dll_path=dll_path)
@@ -2055,7 +2061,7 @@ def main():
     if args.use_migraphx:
         args.use_rocm = True
 
-    if args.build_wheel or args.gen_doc:
+    if args.build_wheel or args.gen_doc or args.use_tvm:
         args.enable_pybind = True
 
     if args.build_csharp or args.build_nuget or args.build_java or args.build_nodejs:
diff --git a/tools/ci_build/github/azure-pipelines/linux-tvm-ci-pipeline.yml b/tools/ci_build/github/azure-pipelines/linux-tvm-ci-pipeline.yml
new file mode 100644
index 0000000000..3aaee71dab
--- /dev/null
+++ b/tools/ci_build/github/azure-pipelines/linux-tvm-ci-pipeline.yml
@@ -0,0 +1,61 @@
+jobs:
+  - job: Linux_TVM_CI
+    timeoutInMinutes: 180
+    workspace:
+      clean: all
+    pool: Linux-CPU-2019
+    steps:
+      - checkout: self
+        clean: true
+        submodules: recursive
+
+      - template: templates/get-docker-image-steps.yml
+        parameters:
+          Dockerfile: tools/ci_build/github/linux/docker/Dockerfile.ubuntu
+          Context: tools/ci_build/github/linux/docker
+          DockerBuildArgs: "--build-arg BUILD_UID=$( id -u )"
+          Repository: onnxruntimecpubuild
+
+      - task: CmdLine@2
+        inputs:
+          script: |
+            mkdir -p $HOME/.onnx
+            docker run --rm \
+              --volume /data/onnx:/data/onnx:ro \
+              --volume $(Build.SourcesDirectory):/onnxruntime_src \
+              --volume $(Build.BinariesDirectory):/build \
+              --volume /data/models:/build/models:ro \
+              --volume $HOME/.onnx:/home/onnxruntimedev/.onnx \
+              -e NIGHTLY_BUILD \
+              -e BUILD_BUILDNUMBER \
+              onnxruntimecpubuild \
+                /bin/bash -c \
+                  "/onnxruntime_src/tools/ci_build/github/linux/tvm/install_tvm_test_dependencies.sh \
+                    python3 && \
+                  python3 /onnxruntime_src/tools/ci_build/build.py \
+                    --build_dir /build \
+                    --config Release \
+                    --skip_submodule_sync \
+                    --parallel \
+                    --enable_pybind \
+                    --disable_contrib_ops \
+                    --disable_ml_ops \
+                    --skip_onnx_tests \
+                    --use_tvm"
+          workingDirectory: $(Build.SourcesDirectory)
+
+      - task: PublishTestResults@2
+        displayName: 'Publish unit test results'
+        inputs:
+          testResultsFiles: '**/*.results.xml'
+          searchFolder: '$(Build.BinariesDirectory)'
+          testRunTitle: 'Unit Test Run'
+        condition: succeededOrFailed()
+
+      - template: templates/component-governance-component-detection-steps.yml
+        parameters:
+          condition: 'succeeded'
+
+      - task: mspremier.PostBuildCleanup.PostBuildCleanup-task.PostBuildCleanup@3
+        displayName: 'Clean Agent Directories'
+        condition: always()
diff --git a/tools/ci_build/github/linux/tvm/install_tvm_test_dependencies.sh b/tools/ci_build/github/linux/tvm/install_tvm_test_dependencies.sh
new file mode 100755
index 0000000000..37bad9916e
--- /dev/null
+++ b/tools/ci_build/github/linux/tvm/install_tvm_test_dependencies.sh
@@ -0,0 +1,5 @@
+#!/bin/bash
+set -e -x
+
+PYTHON_EXE=$1
+${PYTHON_EXE} -m pip install decorator scipy