From b46310b34996bcc47ca73d8ffe7c5af7ebc9a67f Mon Sep 17 00:00:00 2001
From: Zuwei Zhao <4123666+Zuwei-Zhao@users.noreply.github.com>
Date: Thu, 1 Jul 2021 11:34:03 -0500
Subject: [PATCH] Integrate onnxruntime-extensions into onnxruntime. (#8143)

Co-authored-by: Zuwei Zhao <zuzhao@microsoft.com>
---
 .gitmodules                                   |   3 +
 cmake/CMakeLists.txt                          |  17 ++++
 cmake/external/onnxruntime-extensions         |   1 +
 cmake/onnxruntime_session.cmake               |   3 +
 .../core/session/onnxruntime_c_api.h          |   5 +
 .../core/session/onnxruntime_cxx_api.h        |   2 +
 .../core/session/onnxruntime_cxx_inline.h     |   5 +
 onnxruntime/core/session/onnxruntime_c_api.cc |  21 ++++
 onnxruntime/core/session/ort_apis.h           |   1 +
 onnxruntime/test/shared_lib/test_inference.cc |  94 ++++++++++++++++++
 .../test/testdata/custom_op_negpos.onnx       | Bin 0 -> 134 bytes
 .../test/testdata/custom_op_string_lower.onnx | Bin 0 -> 167 bytes
 onnxruntime/wasm/api.cc                       |   5 +
 tools/ci_build/build.py                       |   7 ++
 14 files changed, 164 insertions(+)
 create mode 160000 cmake/external/onnxruntime-extensions
 create mode 100644 onnxruntime/test/testdata/custom_op_negpos.onnx
 create mode 100644 onnxruntime/test/testdata/custom_op_string_lower.onnx

diff --git a/.gitmodules b/.gitmodules
index 329780cd77..d46a4de855 100644
--- a/.gitmodules
+++ b/.gitmodules
@@ -79,3 +79,6 @@
 	url = https://github.com/emscripten-core/emsdk.git
 	branch = 2.0.23
 	ignore = dirty
+[submodule "cmake/external/onnxruntime-extensions"]
+	path = cmake/external/onnxruntime-extensions
+	url = https://github.com/microsoft/onnxruntime-extensions.git
diff --git a/cmake/CMakeLists.txt b/cmake/CMakeLists.txt
index acb8be710c..d891731903 100644
--- a/cmake/CMakeLists.txt
+++ b/cmake/CMakeLists.txt
@@ -153,6 +153,9 @@ option(onnxruntime_ENABLE_EAGER_MODE "build ort eager mode")
 # build separate library of schemas of (custom) ops used by ORT (for ONNX to MLIR translation)
 option(onnxruntime_BUILD_OPSCHEMA_LIB "Build op schema library" ON)
 
+# option to enable custom operators in onnxruntime-extensions
+option(onnxruntime_ENABLE_EXTENSION_CUSTOM_OPS "Enable custom operators in onnxruntime-extensions" OFF)
+
 # Single output director for all binaries
 set (RUNTIME_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/bin CACHE PATH "Single output directory for all binaries.")
 
@@ -1219,6 +1222,20 @@ if (onnxruntime_USE_TVM)
   list(APPEND onnxruntime_EXTERNAL_DEPENDENCIES tvm nnvm_compiler)
 endif()
 
+if (onnxruntime_ENABLE_EXTENSION_CUSTOM_OPS)
+  # add compile definition to enable custom operators in onnxruntime-extensions
+  add_compile_definitions(ENABLE_EXTENSION_CUSTOM_OPS)
+
+  # set options for onnxruntime-extensions
+  set(OCOS_ENABLE_CTEST OFF CACHE INTERNAL "")
+  set(OCOS_ENABLE_STATIC_LIB ON CACHE INTERNAL "")
+  set(OCOS_ENABLE_SPM_TOKENIZER OFF CACHE INTERNAL "")
+  add_subdirectory(external/onnxruntime-extensions EXCLUDE_FROM_ALL)
+  # target library or executable are defined in CMakeLists.txt of onnxruntime-extensions
+  target_include_directories(ocos_operators PRIVATE ${RE2_INCLUDE_DIR} external/json/include)
+  target_include_directories(ortcustomops PUBLIC external/onnxruntime-extensions/shared)
+endif()
+
 if (APPLE OR CMAKE_SYSTEM_NAME STREQUAL "Android")
   #onnx/onnx/proto_utils.h:34:16: error: 'SetTotalBytesLimit' is deprecated: Please use the single
   #parameter version of SetTotalBytesLimit(). The second parameter is ignored.
diff --git a/cmake/external/onnxruntime-extensions b/cmake/external/onnxruntime-extensions
new file mode 160000
index 0000000000..800e360ef3
--- /dev/null
+++ b/cmake/external/onnxruntime-extensions
@@ -0,0 +1 @@
+Subproject commit 800e360ef3603c743392b96a9adca3e210e4d2b2
diff --git a/cmake/onnxruntime_session.cmake b/cmake/onnxruntime_session.cmake
index bb5eb0d166..08359e91a8 100644
--- a/cmake/onnxruntime_session.cmake
+++ b/cmake/onnxruntime_session.cmake
@@ -17,6 +17,9 @@ if(onnxruntime_ENABLE_INSTRUMENT)
 endif()
 target_include_directories(onnxruntime_session PRIVATE ${ONNXRUNTIME_ROOT} ${eigen_INCLUDE_DIRS})
 target_link_libraries(onnxruntime_session PRIVATE nlohmann_json::nlohmann_json)
+if(onnxruntime_ENABLE_EXTENSION_CUSTOM_OPS)
+  target_link_libraries(onnxruntime_session PRIVATE ortcustomops)
+endif()
 add_dependencies(onnxruntime_session ${onnxruntime_EXTERNAL_DEPENDENCIES})
 set_target_properties(onnxruntime_session PROPERTIES FOLDER "ONNXRuntime")
 if (onnxruntime_USE_CUDA)
diff --git a/include/onnxruntime/core/session/onnxruntime_c_api.h b/include/onnxruntime/core/session/onnxruntime_c_api.h
index cf26fb9492..8a547cd6f5 100644
--- a/include/onnxruntime/core/session/onnxruntime_c_api.h
+++ b/include/onnxruntime/core/session/onnxruntime_c_api.h
@@ -1445,6 +1445,11 @@ struct OrtApi {
   * Use this API to release the instance of OrtTensorRTProviderV2.
   */
   ORT_CLASS_RELEASE2(TensorRTProviderOptions);
+
+  /**
+  * Enable custom operators in onnxruntime-extensions: https://github.com/microsoft/onnxruntime-extensions.git
+  */
+  ORT_API2_STATUS(EnableOrtCustomOps, _Inout_ OrtSessionOptions* options);
 };
 
 /*
diff --git a/include/onnxruntime/core/session/onnxruntime_cxx_api.h b/include/onnxruntime/core/session/onnxruntime_cxx_api.h
index 838ac56835..63f2202357 100644
--- a/include/onnxruntime/core/session/onnxruntime_cxx_api.h
+++ b/include/onnxruntime/core/session/onnxruntime_cxx_api.h
@@ -311,6 +311,8 @@ struct SessionOptions : Base<OrtSessionOptions> {
   SessionOptions& EnableProfiling(const ORTCHAR_T* profile_file_prefix);
   SessionOptions& DisableProfiling();
 
+  SessionOptions& EnableOrtCustomOps();
+
   SessionOptions& EnableMemPattern();
   SessionOptions& DisableMemPattern();
 
diff --git a/include/onnxruntime/core/session/onnxruntime_cxx_inline.h b/include/onnxruntime/core/session/onnxruntime_cxx_inline.h
index 22b6faa047..a4596fc205 100644
--- a/include/onnxruntime/core/session/onnxruntime_cxx_inline.h
+++ b/include/onnxruntime/core/session/onnxruntime_cxx_inline.h
@@ -440,6 +440,11 @@ inline SessionOptions& SessionOptions::DisableProfiling() {
   return *this;
 }
 
+inline SessionOptions& SessionOptions::EnableOrtCustomOps() {
+  ThrowOnError(GetApi().EnableOrtCustomOps(p_));
+  return *this;
+}
+
 inline SessionOptions& SessionOptions::EnableMemPattern() {
   ThrowOnError(GetApi().EnableMemPattern(p_));
   return *this;
diff --git a/onnxruntime/core/session/onnxruntime_c_api.cc b/onnxruntime/core/session/onnxruntime_c_api.cc
index 505526c994..ad743598e2 100644
--- a/onnxruntime/core/session/onnxruntime_c_api.cc
+++ b/onnxruntime/core/session/onnxruntime_c_api.cc
@@ -35,6 +35,11 @@
 #include "abi_session_options_impl.h"
 #include "core/framework/TensorSeq.h"
 #include "core/platform/ort_mutex.h"
+
+#ifdef ENABLE_EXTENSION_CUSTOM_OPS
+#include "ortcustomops.h"
+#endif
+
 #ifdef USE_CUDA
 #include "core/providers/cuda/cuda_provider_factory.h"
 #endif
@@ -403,6 +408,21 @@ ORT_API_STATUS_IMPL(OrtApis::RegisterCustomOpsLibrary, _Inout_ OrtSessionOptions
   API_IMPL_END
 }
 
+ORT_API_STATUS_IMPL(OrtApis::EnableOrtCustomOps, _Inout_ OrtSessionOptions* options) {
+  API_IMPL_BEGIN
+
+  if (options) {
+#ifdef ENABLE_EXTENSION_CUSTOM_OPS
+  return RegisterCustomOps(options, OrtGetApiBase());
+#else
+  return OrtApis::CreateStatus(ORT_FAIL, "EnableOrtCustomOps: Custom operators in onnxruntime-extensions are not enabled");
+#endif
+  }
+  return nullptr;
+
+  API_IMPL_END
+}
+
 namespace {
 // provider either model_path, or modal_data + model_data_length.
 static ORT_STATUS_PTR CreateSessionAndLoadModel(_In_ const OrtSessionOptions* options,
@@ -2275,6 +2295,7 @@ static constexpr OrtApi ort_api_1_to_9 = {
     &OrtApis::UpdateTensorRTProviderOptions,
     &OrtApis::GetTensorRTProviderOptionsAsString,
     &OrtApis::ReleaseTensorRTProviderOptions,
+    &OrtApis::EnableOrtCustomOps,
 };
 
 // Assert to do a limited check to ensure Version 1 of OrtApi never changes (will detect an addition or deletion but not if they cancel out each other)
diff --git a/onnxruntime/core/session/ort_apis.h b/onnxruntime/core/session/ort_apis.h
index f18682385f..15c49592ec 100644
--- a/onnxruntime/core/session/ort_apis.h
+++ b/onnxruntime/core/session/ort_apis.h
@@ -285,4 +285,5 @@ ORT_API_STATUS_IMPL(UpdateTensorRTProviderOptions, _Inout_ OrtTensorRTProviderOp
                     size_t num_keys);
 ORT_API_STATUS_IMPL(GetTensorRTProviderOptionsAsString, _In_ const OrtTensorRTProviderOptionsV2* tensorrt_options, _Inout_ OrtAllocator* allocator, _Outptr_ char** ptr);
 ORT_API(void, ReleaseTensorRTProviderOptions, _Frees_ptr_opt_ OrtTensorRTProviderOptionsV2*);
+ORT_API_STATUS_IMPL(EnableOrtCustomOps, _Inout_ OrtSessionOptions* options);
 }  // namespace OrtApis
diff --git a/onnxruntime/test/shared_lib/test_inference.cc b/onnxruntime/test/shared_lib/test_inference.cc
index ffc56e678f..382e7e1908 100644
--- a/onnxruntime/test/shared_lib/test_inference.cc
+++ b/onnxruntime/test/shared_lib/test_inference.cc
@@ -176,6 +176,11 @@ static constexpr PATH_TYPE OPTIONAL_INPUT_OUTPUT_CUSTOM_OP_MODEL_URI = TSTR("tes
 static constexpr PATH_TYPE OPTIONAL_INPUT_OUTPUT_CUSTOM_OP_MODEL_URI_2 = TSTR("testdata/foo_bar_2.onnx");
 static constexpr PATH_TYPE CUSTOM_OP_MODEL_WITH_ATTRIBUTES_URI = TSTR("testdata/foo_bar_3.onnx");
 
+#ifdef ENABLE_EXTENSION_CUSTOM_OPS
+static constexpr PATH_TYPE ORT_CUSTOM_OPS_MODEL_URI = TSTR("testdata/custom_op_string_lower.onnx");
+static constexpr PATH_TYPE ORT_CUSTOM_OPS_MODEL_URI_2 = TSTR("testdata/custom_op_negpos.onnx");
+#endif
+
 #ifdef ENABLE_LANGUAGE_INTEROP_OPS
 static constexpr PATH_TYPE PYOP_FLOAT_MODEL_URI = TSTR("testdata/pyop_1.onnx");
 static constexpr PATH_TYPE PYOP_MULTI_MODEL_URI = TSTR("testdata/pyop_2.onnx");
@@ -268,6 +273,95 @@ TEST(CApiTest, custom_op_handler) {
 #endif
 }
 
+#ifdef ENABLE_EXTENSION_CUSTOM_OPS
+// test enabled ort-customops negpos
+TEST(CApiTest, test_enable_ort_customops_negpos) {
+
+  Ort::MemoryInfo info("Cpu", OrtDeviceAllocator, 0, OrtMemTypeDefault);
+  auto allocator = std::make_unique<MockedOrtAllocator>();
+
+  // Create Inputs
+  std::vector<Ort::Value> ort_inputs;
+  std::vector<float> input_data = {-1.1f, 2.2f, 4.4f, -5.5f};
+  std::vector<int64_t> input_dims = {2, 2};
+  ort_inputs.emplace_back(Ort::Value::CreateTensor<float>(info, const_cast<float*>(input_data.data()), input_data.size(), input_dims.data(), input_dims.size()));
+
+  // Create Session with ORT CustomOps
+  Ort::SessionOptions session_options;
+  session_options.EnableOrtCustomOps();
+  Ort::Session session(*ort_env, ORT_CUSTOM_OPS_MODEL_URI_2, session_options);
+
+  // Create Input and Output Names
+  std::vector<const char*> input_names = {"X"};
+  const char* output_names[] = {"out0", "out1"};
+
+  // Run Session
+  std::vector<Ort::Value> ort_outputs = session.Run(Ort::RunOptions{}, input_names.data(), ort_inputs.data(), ort_inputs.size(), output_names, countof(output_names));
+
+  // Validate Results
+  ASSERT_EQ(ort_outputs.size(), 2u);
+
+  std::vector<int64_t> out_dims = {2, 2};
+  std::vector<float> values_out0 = {-1.1f, 0.0f, 0.0f, -5.5f};
+  auto type_info = ort_outputs[0].GetTensorTypeAndShapeInfo();
+  ASSERT_EQ(type_info.GetShape(), out_dims);
+  size_t total_len = type_info.GetElementCount();
+  ASSERT_EQ(values_out0.size(), total_len);
+
+  float* f = ort_outputs[0].GetTensorMutableData<float>();
+  for (size_t i = 0; i != total_len; ++i) {
+    ASSERT_EQ(values_out0[i], f[i]);
+  }
+}
+
+// test enabled ort-customops stringlower
+TEST(CApiTest, test_enable_ort_customops_stringlower) {
+
+  auto allocator = std::make_unique<MockedOrtAllocator>();
+
+  // Create Inputs
+  std::vector<Ort::Value> ort_inputs;
+  std::string input_data{"HI, This is ENGINEER from Microsoft."};
+  const char* const input_strings[] = {input_data.c_str()};
+  std::vector<int64_t> input_dims = {1, 1};
+
+  Ort::Value input_tensor = Ort::Value::CreateTensor(allocator.get(), input_dims.data(), input_dims.size(), ONNX_TENSOR_ELEMENT_DATA_TYPE_STRING);
+  input_tensor.FillStringTensor(input_strings, 1U);
+  ort_inputs.push_back(std::move(input_tensor));
+
+  // Create Session with ORT CustomOps
+  Ort::SessionOptions session_options;
+  session_options.EnableOrtCustomOps();
+  Ort::Session session(*ort_env, ORT_CUSTOM_OPS_MODEL_URI, session_options);
+
+  // Create Input and Output Names
+  std::vector<const char*> input_names = {"input_1"};
+  const char* output_names[] = {"customout"};
+
+  // Run Session
+  std::vector<Ort::Value> ort_outputs = session.Run(Ort::RunOptions{nullptr}, input_names.data(), ort_inputs.data(), ort_inputs.size(), output_names, countof(output_names));
+
+  // Validate Results
+  ASSERT_EQ(ort_outputs.size(), 1u);
+
+  std::vector<int64_t> out_dims = {1, 1};
+  auto type_info = ort_outputs[0].GetTensorTypeAndShapeInfo();
+  ASSERT_EQ(type_info.GetShape(), out_dims);
+  ASSERT_EQ(type_info.GetElementType(), ONNX_TENSOR_ELEMENT_DATA_TYPE_STRING);
+
+  std::string output_data{"hi, this is engineer from microsoft."};
+  auto expected_string = output_data.c_str();
+  size_t expected_string_len = strlen(expected_string);
+  auto data_length = ort_outputs[0].GetStringTensorDataLength();
+  ASSERT_EQ(expected_string_len, data_length);
+
+  std::string result(data_length, '\0');
+  std::vector<size_t> offsets(type_info.GetElementCount());
+  ort_outputs[0].GetStringTensorContent((void*)result.data(), data_length, offsets.data(), offsets.size());
+  ASSERT_STREQ(result.c_str(), expected_string);
+}
+#endif
+
 //test custom op which accepts float and double as inputs
 TEST(CApiTest, varied_input_custom_op_handler) {
   std::vector<Input> inputs(2);
diff --git a/onnxruntime/test/testdata/custom_op_negpos.onnx b/onnxruntime/test/testdata/custom_op_negpos.onnx
new file mode 100644
index 0000000000000000000000000000000000000000..78fef46309ddfa90ea0f0d3193ebc1ab7a7ed628
GIT binary patch
literal 134
zcmd;Jx2oaN;9`srV#zNpF#wZ>N^E|q=>hr0R{V*Xdii;I6?)0}c_l@eNkZHZ3E%vb
n)SM_`kTyOp9u7t!4lX7RCJ;^%<ANB3UEB~|y%P%;g8&Zz0^b;M

literal 0
HcmV?d00001

diff --git a/onnxruntime/test/testdata/custom_op_string_lower.onnx b/onnxruntime/test/testdata/custom_op_string_lower.onnx
new file mode 100644
index 0000000000000000000000000000000000000000..63e90233b8d9f3d07eff72fbb63e69a5a1e2e46d
GIT binary patch
literal 167
zcmd;Jx9Vl&lH+2}%qu7@i8mDD%uGqmE6FUWG*sg7gfh8IxDdiZoXMrdCHcAer6o$-
z!6ikRdFej+<*7wh{E3-*`FVL2ddc~DK;a}I){@lX5`!pFgyCFV92^`%EL;p+3`yc#
R2!n)>B%Fk~&`lCx1OPC!F0=pu

literal 0
HcmV?d00001

diff --git a/onnxruntime/wasm/api.cc b/onnxruntime/wasm/api.cc
index 8413197086..63aa5c10a4 100644
--- a/onnxruntime/wasm/api.cc
+++ b/onnxruntime/wasm/api.cc
@@ -122,6 +122,11 @@ OrtSession* OrtCreateSession(void* data, size_t data_length, OrtSessionOptions*
     return nullptr;
   }
 
+#ifdef ENABLE_EXTENSION_CUSTOM_OPS
+  // Enable ORT CustomOps in onnxruntime-extensions
+  RETURN_NULLPTR_IF_ERROR(EnableOrtCustomOps, session_options);
+#endif
+
 #if defined(__EMSCRIPTEN_PTHREADS__)
   RETURN_NULLPTR_IF_ERROR(DisablePerSessionThreads, session_options);
 #else
diff --git a/tools/ci_build/build.py b/tools/ci_build/build.py
index 189fe74c38..4101e64254 100644
--- a/tools/ci_build/build.py
+++ b/tools/ci_build/build.py
@@ -350,6 +350,11 @@ def parse_arguments():
     parser.add_argument(
         "--emsdk_version", default="2.0.23", help="Specify version of emsdk")
 
+    # Enable onnxruntime-extensions
+    parser.add_argument(
+        "--enable_onnxruntime_extensions", action='store_true',
+        help="Enable custom operators in onnxruntime-extensions")
+
     # Arguments needed by CI
     parser.add_argument(
         "--cmake_path", default="cmake", help="Path to the CMake program.")
@@ -755,6 +760,8 @@ def generate_build_tree(cmake_path, source_dir, build_dir, cuda_home, cudnn_home
         "-Donnxruntime_ENABLE_WEBASSEMBLY_DEBUG_INFO=" + ("ON" if args.enable_wasm_debug_info else "OFF"),
         "-Donnxruntime_WEBASSEMBLY_MALLOC=" + args.wasm_malloc,
         "-Donnxruntime_ENABLE_EAGER_MODE=" + ("ON" if args.build_eager_mode else "OFF"),
+        # enable custom operators in onnxruntime-extensions
+        "-Donnxruntime_ENABLE_EXTENSION_CUSTOM_OPS=" + ("ON" if args.enable_onnxruntime_extensions else "OFF"),
     ]
     if args.use_cuda:
         cmake_args += ["-Donnxruntime_USE_CUDA=ON", "-Donnxruntime_CUDA_VERSION=" + args.cuda_version,