From e2dd1315c779f9f60aedd0243d61af625fdf5cc3 Mon Sep 17 00:00:00 2001 From: pengwa Date: Tue, 31 Jan 2023 12:34:45 +0800 Subject: [PATCH] Fix build for --enable_language_interop_ops + DISABLE_ABSEIL=ON (#14469) ### Fix build error on Windows when building with " --enable_language_interop_ops -cmake_extra_defines onnxruntime_DISABLE_ABSEIL=ON" This is a subsequent fix after https://github.com/microsoft/onnxruntime/pull/14309, which fixed build for onnxruntime_DISABLE_ABSEIL=ON build. Going furthur, if we enable --enable_language_interop_ops, there are following two errors: ``` test_symm_qgemm.cpp test_transpose.cpp onnxruntime_session.lib(inference_session.obj) : error LNK2019: unresolved external symbol "void __cdecl onnxruntime::L oadInterOp(class std::basic_string,class std::allocator > const &,cla ss std::vector > &,class std::function const &)" (?LoadInterOp@onnxruntime@@YAXAEBV?$basic_string@_WU?$char_traits@_W@std@@V?$allocato r@_W@2@@std@@AEAV?$vector@UCustomOpDomain@Ort@@V?$allocator@UCustomOpDomain@Ort@@@std@@@3@AEBV?$function@$$A6AXPEBD@Z@3 @@Z) referenced in function "public: __cdecl ::operator()(class std::shared_pt r &)const " (??R@@QEBA@AEAV?$shared_ptr@VModel@onnxr untime@@@std@@@Z) [C:\Users\pengwa\dev\onnxruntime\build\Windows\RelWithDebInfo\onnxruntime_test_trainer.vcxproj] onnxruntime_session.lib(inference_session.obj) : error LNK2019: unresolved external symbol "void __cdecl onnxruntime::L oadInterOp(class onnx::ModelProto const &,class std::vector > &,class std::function const &)" (?LoadInterOp@onnxruntime@@YAXAEBVModelP roto@onnx@@AEAV?$vector@UCustomOpDomain@Ort@@V?$allocator@UCustomOpDomain@Ort@@@std@@@std@@AEBV?$function@$$A6AXPEBD@Z@ 5@@Z) referenced in function "public: __cdecl ::operator()(class std::shared_p tr &)const " (??R@@QEBA@AEAV?$shared_ptr@VModel@onnx runtime@@@std@@@Z) [C:\Users\pengwa\dev\onnxruntime\build\Windows\RelWithDebInfo\onnxruntime_test_trainer.vcxproj] C:\Users\pengwa\dev\onnxruntime\build\Windows\RelWithDebInfo\RelWithDebInfo\onnxruntime_test_trainer.exe : fatal error LNK1120: 2 unresolved externals [C:\Users\pengwa\dev\onnxruntime\build\Windows\RelWithDebInfo\onnxruntime_test_trainer. vcxproj] onnxruntime.vcxproj -> C:\Users\pengwa\dev\onnxruntime\build\Windows\RelWithDebInfo\RelWithDebInfo\onnxruntime.dll onnxruntime_test_utils.vcxproj -> C:\Users\pengwa\dev\onnxruntime\build\Windows\RelWithDebInfo\RelWithDebInfo\onnxrun time_test_utils.lib CUDACOMPILE : nvcc warning : The 'compute_35', 'compute_37', 'sm_35', and 'sm_37' architectures are deprecated, and may be removed in a future release (Use -Wno-deprecated-gpu-targets to suppress warning). [C:\Users\pengwa\dev\onnxruntime \build\Windows\RelWithDebInfo\custom_op_library.vcxproj] cuda_ops.cu CUDACOMPILE : nvcc warning : The 'compute_35', 'compute_37', 'sm_35', and 'sm_37' architectures are deprecated, and may be removed in a future release (Use -Wno-deprecated-gpu-targets to suppress warning). [C:\Users\pengwa\dev\onnxruntime \build\Windows\RelWithDebInfo\onnxruntime_test_cuda_ops_lib.vcxproj] ``` ``` kernel_type_str_resolver_utils_test.cc local_kernel_registry_test.cc C:\Users\pengwa\dev\onnxruntime\onnxruntime\test\framework\allocation_planner_test.cc(1388,9): error C2220: the followin g warning is treated as an error [C:\Users\pengwa\dev\onnxruntime\build\Windows\RelWithDebInfo\onnxruntime_test_all.vcxp roj] C:\Users\pengwa\dev\onnxruntime\onnxruntime\test\framework\allocation_planner_test.cc(1388,9): warning C4067: unexpected tokens following preprocessor directive - expected a newline [C:\Users\pengwa\dev\onnxruntime\build\Windows\RelWithDebI nfo\onnxruntime_test_all.vcxproj] ``` ### Motivation and Context --- cmake/onnxruntime_unittests.cmake | 5 +++++ .../test/framework/allocation_planner_test.cc | 15 +++++++-------- 2 files changed, 12 insertions(+), 8 deletions(-) diff --git a/cmake/onnxruntime_unittests.cmake b/cmake/onnxruntime_unittests.cmake index 1660f64b1a..8ae2432b80 100644 --- a/cmake/onnxruntime_unittests.cmake +++ b/cmake/onnxruntime_unittests.cmake @@ -1342,6 +1342,11 @@ if (NOT onnxruntime_ENABLE_TRAINING_TORCH_INTEROP) onnxruntime_common onnxruntime_flatbuffers ) + + if (onnxruntime_ENABLE_LANGUAGE_INTEROP_OPS) + list(APPEND ONNXRUNTIME_TEST_LIBS onnxruntime_language_interop onnxruntime_pyop) + endif() + target_link_libraries(onnxruntime_test_trainer PRIVATE ${ONNXRUNTIME_TEST_LIBS} ${onnxruntime_EXTERNAL_LIBRARIES} diff --git a/onnxruntime/test/framework/allocation_planner_test.cc b/onnxruntime/test/framework/allocation_planner_test.cc index b24579792e..3cda40dff4 100644 --- a/onnxruntime/test/framework/allocation_planner_test.cc +++ b/onnxruntime/test/framework/allocation_planner_test.cc @@ -73,7 +73,7 @@ struct UnaryNode { : UnaryNode(graph, "Transpose", p_input_arg, p_output_arg) {} UnaryNode(onnxruntime::Graph& graph, std::string& node_name, const std::string& op, std::vector& inputs, - std::vector& outputs) : input_args(inputs), output_args(outputs) { + std::vector& outputs) : input_args(inputs), output_args(outputs) { p_node = &graph.AddNode(node_name, op, "test op", input_args, output_args); } }; @@ -1314,9 +1314,9 @@ TEST_F(PlannerTest, MultiStreamMultiOutput) { std::vector input1{Arg(Graph_input1), Arg(Graph_input2), Arg(Graph_input3)}, output1{Arg(Arg1), Arg(Arg2)}, input2{Arg(Arg1), Arg(Arg2)}, output2{Arg(Arg3)}; AddNode(*cudaKernel, node1, input1, output1); - std::unique_ptr<::onnxruntime::KernelDef> cpuKernel = KernelDefBuilder().SetName("Add").Provider(kCpuExecutionProvider).SinceVersion(7,12).Build(); + std::unique_ptr<::onnxruntime::KernelDef> cpuKernel = KernelDefBuilder().SetName("Add").Provider(kCpuExecutionProvider).SinceVersion(7, 12).Build(); AddNode(*cpuKernel, node2, input2, output2); - + CUDAExecutionProviderInfo epi; onnxruntime::ProviderInfo_CUDA& ep = onnxruntime::GetProviderInfo_CUDA(); auto epFactory = ep.CreateExecutionProviderFactory(epi); @@ -1345,7 +1345,7 @@ TEST_F(PlannerTest, MultiStreamMultiOutput) { // \ / // node3 // node1 and node2 are in the same stream, both has an output which will be consumed by node3 in a different stream -// TODO(leca): the ideal case is there is only 1 wait step before launching node3, +// TODO(leca): the ideal case is there is only 1 wait step before launching node3, // as there is a specific order between node1 and node2 if they are in the same stream, thus node3 will only need to wait the latter one TEST_F(PlannerTest, MultiStream2NodesSameStreamConsumedBy1NodeInDifferentStream) { std::unique_ptr<::onnxruntime::KernelDef> cudaKernel = KernelDefBuilder().SetName("Transpose").Provider(kCudaExecutionProvider).SinceVersion(1, 10).Build(); @@ -1385,7 +1385,7 @@ TEST_F(PlannerTest, MultiStream2NodesSameStreamConsumedBy1NodeInDifferentStream) } #endif -#if not defined(__wasm__) and defined(ORT_ENABLE_STREAM) +#if !defined(__wasm__) && defined(ORT_ENABLE_STREAM) TEST_F(PlannerTest, ParaPlanCreation) { TypeProto graph_in_type; @@ -1823,13 +1823,12 @@ TEST_F(PlannerTest, ParaPlanCreation) { std::string reused; ORT_ENFORCE(main_graph_ort_value_index_map.GetName(per_value_plan.reused_buffer, reused).IsOK()); reuse_pairs.erase(reused); - } //if - } //for + } // if + } // for ASSERT_TRUE(reuse_pairs.empty()); } TEST_F(PlannerTest, TestMultiStreamConfig) { - const char* type = "DeviceBasedPartitioner"; constexpr size_t type_len = 22;