diff --git a/include/onnxruntime/core/providers/tensorrt/tensorrt_provider_factory.h b/include/onnxruntime/core/providers/tensorrt/tensorrt_provider_factory.h deleted file mode 100644 index 44debc901c..0000000000 --- a/include/onnxruntime/core/providers/tensorrt/tensorrt_provider_factory.h +++ /dev/null @@ -1,14 +0,0 @@ -// Copyright (c) Microsoft Corporation. All rights reserved. -// Licensed under the MIT License. - -#include "onnxruntime_c_api.h" - -#ifdef __cplusplus -extern "C" { -#endif - -ORT_API_STATUS(OrtSessionOptionsAppendExecutionProvider_Tensorrt, _In_ OrtSessionOptions* options, int device_id); - -#ifdef __cplusplus -} -#endif diff --git a/include/onnxruntime/core/providers/tensorrt/tensorrt_provider_options.h b/include/onnxruntime/core/providers/tensorrt/tensorrt_provider_options.h index e7d0f9f03a..8f2b5af870 100644 --- a/include/onnxruntime/core/providers/tensorrt/tensorrt_provider_options.h +++ b/include/onnxruntime/core/providers/tensorrt/tensorrt_provider_options.h @@ -11,38 +11,38 @@ /// User can only get the instance of OrtTensorRTProviderOptionsV2 via CreateTensorRTProviderOptions. /// struct OrtTensorRTProviderOptionsV2 { - int device_id; // cuda device id. - int has_user_compute_stream; // indicator of user specified CUDA compute stream. - void* user_compute_stream; // user specified CUDA compute stream. - int trt_max_partition_iterations; // maximum iterations for TensorRT parser to get capability - int trt_min_subgraph_size; // minimum size of TensorRT subgraphs - size_t trt_max_workspace_size; // maximum workspace size for TensorRT. - int trt_fp16_enable; // enable TensorRT FP16 precision. Default 0 = false, nonzero = true - int trt_int8_enable; // enable TensorRT INT8 precision. Default 0 = false, nonzero = true - const char* trt_int8_calibration_table_name; // TensorRT INT8 calibration table name. - int trt_int8_use_native_calibration_table; // use native TensorRT generated calibration table. Default 0 = false, nonzero = true - int trt_dla_enable; // enable DLA. Default 0 = false, nonzero = true - int trt_dla_core; // DLA core number. Default 0 - int trt_dump_subgraphs; // dump TRT subgraph. Default 0 = false, nonzero = true - int trt_engine_cache_enable; // enable engine caching. Default 0 = false, nonzero = true - const char* trt_engine_cache_path; // specify engine cache path - int trt_engine_decryption_enable; // enable engine decryption. Default 0 = false, nonzero = true - const char* trt_engine_decryption_lib_path; // specify engine decryption library path - int trt_force_sequential_engine_build; // force building TensorRT engine sequentially. Default 0 = false, nonzero = true - int trt_context_memory_sharing_enable; // enable context memory sharing between subgraphs. Default 0 = false, nonzero = true - int trt_layer_norm_fp32_fallback; // force Pow + Reduce ops in layer norm to FP32. Default 0 = false, nonzero = true - int trt_timing_cache_enable; // enable TensorRT timing cache. Default 0 = false, nonzero = true - int trt_force_timing_cache; // force the TensorRT cache to be used even if device profile does not match. Default 0 = false, nonzero = true - int trt_detailed_build_log; // Enable detailed build step logging on TensorRT EP with timing for each engine build. Default 0 = false, nonzero = true - int trt_build_heuristics_enable; // Build engine using heuristics to reduce build time. Default 0 = false, nonzero = true - int trt_sparsity_enable; // Control if sparsity can be used by TRT. Default 0 = false, 1 = true - int trt_builder_optimization_level; // Set the builder optimization level. WARNING: levels below 3 do not guarantee good engine performance, but greatly improve build time. Default 3, valid range [0-5] - int trt_auxiliary_streams; // Set maximum number of auxiliary streams per inference stream. Setting this value to 0 will lead to optimal memory usage. Default -1 = heuristics - const char* trt_tactic_sources; // pecify the tactics to be used by adding (+) or removing (-) tactics from the default - // tactic sources (default = all available tactics) e.g. "-CUDNN,+CUBLAS" available keys: "CUBLAS"|"CUBLAS_LT"|"CUDNN"|"EDGE_MASK_CONVOLUTIONS" - const char* trt_extra_plugin_lib_paths; // specify extra TensorRT plugin library paths - const char* trt_profile_min_shapes; // Specify the range of the input shapes to build the engine with - const char* trt_profile_max_shapes; // Specify the range of the input shapes to build the engine with - const char* trt_profile_opt_shapes; // Specify the range of the input shapes to build the engine with - int trt_cuda_graph_enable; // Enable CUDA graph in ORT TRT + int device_id{0}; // cuda device id. + int has_user_compute_stream{0}; // indicator of user specified CUDA compute stream. + void* user_compute_stream{nullptr}; // user specified CUDA compute stream. + int trt_max_partition_iterations{1000}; // maximum iterations for TensorRT parser to get capability + int trt_min_subgraph_size{1}; // minimum size of TensorRT subgraphs + size_t trt_max_workspace_size{1 << 30}; // maximum workspace size for TensorRT. + int trt_fp16_enable{0}; // enable TensorRT FP16 precision. Default 0 = false, nonzero = true + int trt_int8_enable{0}; // enable TensorRT INT8 precision. Default 0 = false, nonzero = true + const char* trt_int8_calibration_table_name{nullptr}; // TensorRT INT8 calibration table name. + int trt_int8_use_native_calibration_table{0}; // use native TensorRT generated calibration table. Default 0 = false, nonzero = true + int trt_dla_enable{0}; // enable DLA. Default 0 = false, nonzero = true + int trt_dla_core{0}; // DLA core number. Default 0 + int trt_dump_subgraphs{0}; // dump TRT subgraph. Default 0 = false, nonzero = true + int trt_engine_cache_enable{0}; // enable engine caching. Default 0 = false, nonzero = true + const char* trt_engine_cache_path{nullptr}; // specify engine cache path + int trt_engine_decryption_enable{0}; // enable engine decryption. Default 0 = false, nonzero = true + const char* trt_engine_decryption_lib_path{nullptr}; // specify engine decryption library path + int trt_force_sequential_engine_build{0}; // force building TensorRT engine sequentially. Default 0 = false, nonzero = true + int trt_context_memory_sharing_enable{0}; // enable context memory sharing between subgraphs. Default 0 = false, nonzero = true + int trt_layer_norm_fp32_fallback{0}; // force Pow + Reduce ops in layer norm to FP32. Default 0 = false, nonzero = true + int trt_timing_cache_enable{0}; // enable TensorRT timing cache. Default 0 = false, nonzero = true + int trt_force_timing_cache{0}; // force the TensorRT cache to be used even if device profile does not match. Default 0 = false, nonzero = true + int trt_detailed_build_log{0}; // Enable detailed build step logging on TensorRT EP with timing for each engine build. Default 0 = false, nonzero = true + int trt_build_heuristics_enable{0}; // Build engine using heuristics to reduce build time. Default 0 = false, nonzero = true + int trt_sparsity_enable{0}; // Control if sparsity can be used by TRT. Default 0 = false, 1 = true + int trt_builder_optimization_level{3}; // Set the builder optimization level. WARNING: levels below 3 do not guarantee good engine performance, but greatly improve build time. Default 3, valid range [0-5] + int trt_auxiliary_streams{-1}; // Set maximum number of auxiliary streams per inference stream. Setting this value to 0 will lead to optimal memory usage. Default -1 = heuristics + const char* trt_tactic_sources{nullptr}; // pecify the tactics to be used by adding (+) or removing (-) tactics from the default + // tactic sources (default = all available tactics) e.g. "-CUDNN,+CUBLAS" available keys: "CUBLAS"|"CUBLAS_LT"|"CUDNN"|"EDGE_MASK_CONVOLUTIONS" + const char* trt_extra_plugin_lib_paths{nullptr}; // specify extra TensorRT plugin library paths + const char* trt_profile_min_shapes{nullptr}; // Specify the range of the input shapes to build the engine with + const char* trt_profile_max_shapes{nullptr}; // Specify the range of the input shapes to build the engine with + const char* trt_profile_opt_shapes{nullptr}; // Specify the range of the input shapes to build the engine with + int trt_cuda_graph_enable{0}; // Enable CUDA graph in ORT TRT }; diff --git a/include/onnxruntime/core/session/onnxruntime_c_api.h b/include/onnxruntime/core/session/onnxruntime_c_api.h index 486e2ff2b9..8393978120 100644 --- a/include/onnxruntime/core/session/onnxruntime_c_api.h +++ b/include/onnxruntime/core/session/onnxruntime_c_api.h @@ -4572,6 +4572,14 @@ ORT_API_STATUS(OrtSessionOptionsAppendExecutionProvider_MIGraphX, _In_ OrtSessio */ ORT_API_STATUS(OrtSessionOptionsAppendExecutionProvider_Dnnl, _In_ OrtSessionOptions* options, int use_arena); +/* + * This is the old way to add the TensorRT provider to the session, please use SessionOptionsAppendExecutionProvider_TensorRT_V2 above to access the latest functionality + * This function always exists, but will only succeed if Onnxruntime was built with TensorRT support and the TensorRT provider shared library exists + * + * \param device_id CUDA device id, starts from zero. + */ +ORT_API_STATUS(OrtSessionOptionsAppendExecutionProvider_Tensorrt, _In_ OrtSessionOptions* options, int device_id); + #ifdef __cplusplus } #endif diff --git a/java/src/main/native/ai_onnxruntime_OrtSession_SessionOptions.c b/java/src/main/native/ai_onnxruntime_OrtSession_SessionOptions.c index d3239c7442..3a1c0d1bb8 100644 --- a/java/src/main/native/ai_onnxruntime_OrtSession_SessionOptions.c +++ b/java/src/main/native/ai_onnxruntime_OrtSession_SessionOptions.c @@ -19,7 +19,6 @@ #include "onnxruntime/core/providers/nnapi/nnapi_provider_factory.h" #include "onnxruntime/core/providers/tvm/tvm_provider_factory.h" #include "onnxruntime/core/providers/openvino/openvino_provider_factory.h" -#include "onnxruntime/core/providers/tensorrt/tensorrt_provider_factory.h" #include "onnxruntime/core/providers/acl/acl_provider_factory.h" #include "onnxruntime/core/providers/armnn/armnn_provider_factory.h" #include "onnxruntime/core/providers/coreml/coreml_provider_factory.h" diff --git a/js/node/src/session_options_helper.cc b/js/node/src/session_options_helper.cc index 70e63da7ce..a0de832d87 100644 --- a/js/node/src/session_options_helper.cc +++ b/js/node/src/session_options_helper.cc @@ -16,7 +16,6 @@ #include "core/providers/dml/dml_provider_factory.h" #endif #ifdef USE_TENSORRT -#include "core/providers/tensorrt/tensorrt_provider_factory.h" #include "core/providers/tensorrt/tensorrt_provider_options.h" #endif #ifdef USE_COREML diff --git a/onnxruntime/core/providers/tensorrt/tensorrt_execution_provider_custom_ops.cc b/onnxruntime/core/providers/tensorrt/tensorrt_execution_provider_custom_ops.cc index 54a4d16e4e..4e466a5d56 100644 --- a/onnxruntime/core/providers/tensorrt/tensorrt_execution_provider_custom_ops.cc +++ b/onnxruntime/core/providers/tensorrt/tensorrt_execution_provider_custom_ops.cc @@ -26,13 +26,63 @@ extern TensorrtLogger& GetTensorrtLogger(); * Note: Current TRT plugin doesn't have APIs to get number of inputs/outputs of the plugin. * So, TensorRTCustomOp uses variadic inputs/outputs to pass ONNX graph validation. */ -common::Status CreateTensorRTCustomOpDomainList(TensorrtExecutionProviderInfo& info) { +common::Status CreateTensorRTCustomOpDomainList(std::vector& domain_list, const std::string extra_plugin_lib_paths) { std::unique_ptr custom_op_domain = std::make_unique(); custom_op_domain->domain_ = "trt.plugins"; // Load any extra TRT plugin library if any. // When the TRT plugin library is loaded, the global static object is created and the plugin is registered to TRT registry. // This is done through macro, for example, REGISTER_TENSORRT_PLUGIN(VisionTransformerPluginCreator). + // extra_plugin_lib_paths has the format of "path_1;path_2....;path_n" + static bool is_loaded = false; + if (!extra_plugin_lib_paths.empty() && !is_loaded) { + std::stringstream extra_plugin_libs(extra_plugin_lib_paths); + std::string lib; + while (std::getline(extra_plugin_libs, lib, ';')) { + auto status = LoadDynamicLibrary(ToPathString(lib)); + if (status == Status::OK()) { + LOGS_DEFAULT(VERBOSE) << "[TensorRT EP] Successfully load " << lib; + } else { + LOGS_DEFAULT(WARNING) << "[TensorRT EP]" << status.ToString(); + } + } + is_loaded = true; + } + + try { + // Get all registered TRT plugins from registry + LOGS_DEFAULT(VERBOSE) << "[TensorRT EP] Getting all registered TRT plugins from TRT plugin registry ..."; + TensorrtLogger trt_logger = GetTensorrtLogger(); + initLibNvInferPlugins(&trt_logger, ""); + + int num_plugin_creator = 0; + auto plugin_creators = getPluginRegistry()->getPluginCreatorList(&num_plugin_creator); + std::unordered_set registered_plugin_names; + + for (int i = 0; i < num_plugin_creator; i++) { + auto plugin_creator = plugin_creators[i]; + std::string plugin_name(plugin_creator->getPluginName()); + LOGS_DEFAULT(VERBOSE) << "[TensorRT EP] " << plugin_name << ", version : " << plugin_creator->getPluginVersion(); + + // plugin has different versions and we only register once + if (registered_plugin_names.find(plugin_name) != registered_plugin_names.end()) { + continue; + } + + std::unique_ptr trt_custom_op = std::make_unique(onnxruntime::kTensorrtExecutionProvider, nullptr); + trt_custom_op->SetName(plugin_creator->getPluginName()); + custom_op_domain->custom_ops_.push_back(trt_custom_op.release()); + registered_plugin_names.insert(plugin_name); + } + domain_list.push_back(custom_op_domain.release()); + } catch (const std::exception&) { + LOGS_DEFAULT(WARNING) << "[TensorRT EP] Failed to get TRT plugins from TRT plugin registration. Therefore, TRT EP can't create custom ops for TRT plugins"; + } + return Status::OK(); +} + +common::Status CreateTensorRTCustomOpDomainList(TensorrtExecutionProviderInfo& info) { + std::vector domain_list; std::string extra_plugin_lib_paths{""}; if (info.has_trt_options) { if (!info.extra_plugin_lib_paths.empty()) { @@ -44,48 +94,11 @@ common::Status CreateTensorRTCustomOpDomainList(TensorrtExecutionProviderInfo& i extra_plugin_lib_paths = extra_plugin_lib_paths_env; } } - - // extra_plugin_lib_paths has the format of "path_1;path_2....;path_n" - if (!extra_plugin_lib_paths.empty()) { - std::stringstream extra_plugin_libs(extra_plugin_lib_paths); - std::string lib; - while (std::getline(extra_plugin_libs, lib, ';')) { - auto status = LoadDynamicLibrary(ToPathString(lib)); - if (status == Status::OK()) { - LOGS_DEFAULT(VERBOSE) << "[TensorRT EP] Successfully load " << lib; - } else { - LOGS_DEFAULT(WARNING) << "[TensorRT EP]" << status.ToString(); - } - } + auto status = CreateTensorRTCustomOpDomainList(domain_list, extra_plugin_lib_paths); + if (!domain_list.empty()) { + info.custom_op_domain_list = domain_list; } - - // Get all registered TRT plugins from registry - LOGS_DEFAULT(VERBOSE) << "[TensorRT EP] Getting all registered TRT plugins from TRT plugin registry ..."; - TensorrtLogger trt_logger = GetTensorrtLogger(); - initLibNvInferPlugins(&trt_logger, ""); - - int num_plugin_creator = 0; - auto plugin_creators = getPluginRegistry()->getPluginCreatorList(&num_plugin_creator); - std::unordered_set registered_plugin_names; - - for (int i = 0; i < num_plugin_creator; i++) { - auto plugin_creator = plugin_creators[i]; - std::string plugin_name(plugin_creator->getPluginName()); - LOGS_DEFAULT(VERBOSE) << "[TensorRT EP] " << plugin_name << ", version : " << plugin_creator->getPluginVersion(); - - // plugin has different versions and we only register once - if (registered_plugin_names.find(plugin_name) != registered_plugin_names.end()) { - continue; - } - - std::unique_ptr trt_custom_op = std::make_unique(onnxruntime::kTensorrtExecutionProvider, nullptr); - trt_custom_op->SetName(plugin_creator->getPluginName()); - custom_op_domain->custom_ops_.push_back(trt_custom_op.release()); - registered_plugin_names.insert(plugin_name); - } - info.custom_op_domain_list.push_back(custom_op_domain.release()); - - return common::Status::OK(); + return Status::OK(); } void ReleaseTensorRTCustomOpDomain(OrtCustomOpDomain* domain) { diff --git a/onnxruntime/core/providers/tensorrt/tensorrt_execution_provider_custom_ops.h b/onnxruntime/core/providers/tensorrt/tensorrt_execution_provider_custom_ops.h index 98ac3220ab..35bd38d818 100644 --- a/onnxruntime/core/providers/tensorrt/tensorrt_execution_provider_custom_ops.h +++ b/onnxruntime/core/providers/tensorrt/tensorrt_execution_provider_custom_ops.h @@ -13,6 +13,7 @@ using namespace onnxruntime; namespace onnxruntime { common::Status LoadDynamicLibrary(onnxruntime::PathString library_name); +common::Status CreateTensorRTCustomOpDomainList(std::vector& domain_list, const std::string extra_plugin_lib_paths); common::Status CreateTensorRTCustomOpDomainList(TensorrtExecutionProviderInfo& info); void ReleaseTensorRTCustomOpDomain(OrtCustomOpDomain* domain); void ReleaseTensorRTCustomOpDomainList(std::vector& custom_op_domain_list); diff --git a/onnxruntime/core/providers/tensorrt/tensorrt_execution_provider_info.cc b/onnxruntime/core/providers/tensorrt/tensorrt_execution_provider_info.cc index 515fc1c62c..cb7a568d09 100644 --- a/onnxruntime/core/providers/tensorrt/tensorrt_execution_provider_info.cc +++ b/onnxruntime/core/providers/tensorrt/tensorrt_execution_provider_info.cc @@ -186,4 +186,211 @@ ProviderOptions TensorrtExecutionProviderInfo::ToProviderOptions(const OrtTensor }; return options; } + +/** + * Update OrtTensorRTProviderOptionsV2 instance with ProviderOptions (map of string-based key-value pairs) + * + * Please note that it will reset the OrtTensorRTProviderOptionsV2 instance first and then set up the provided provider options + * See TensorrtExecutionProviderInfo::FromProviderOptions() for more details. This function will be called by the C API UpdateTensorRTProviderOptions() also. + * + * \param provider_options - a pointer to OrtTensorRTProviderOptionsV2 instance + * \param options - a reference to ProviderOptions instance + * \param string_copy - if it's true, it uses strncpy() to copy 'provider option' string from ProviderOptions instance to where the 'provider option' const char pointer in OrtTensorRTProviderOptionsV2 instance points to. + * it it's false, it only saves the pointer and no strncpy(). + * + * Note: If there is strncpy involved, please remember to deallocate or simply call C API ReleaseTensorRTProviderOptions. + */ +void TensorrtExecutionProviderInfo::UpdateProviderOptions(void* provider_options, const ProviderOptions& options, bool string_copy) { + if (provider_options == nullptr) { + return; + } + TensorrtExecutionProviderInfo internal_options = onnxruntime::TensorrtExecutionProviderInfo::FromProviderOptions(options); + auto& trt_provider_options_v2 = *reinterpret_cast(provider_options); + trt_provider_options_v2.device_id = internal_options.device_id; + + // The 'has_user_compute_stream' of the OrtTensorRTProviderOptionsV2 instance can be set by C API UpdateTensorRTProviderOptionsWithValue() as well + // We only set the 'has_user_compute_stream' of the OrtTensorRTProviderOptionsV2 instance if it is provided in options + if (options.find("has_user_compute_stream") != options.end()) { + trt_provider_options_v2.has_user_compute_stream = internal_options.has_user_compute_stream; + } + + trt_provider_options_v2.trt_max_partition_iterations = internal_options.max_partition_iterations; + trt_provider_options_v2.trt_min_subgraph_size = internal_options.min_subgraph_size; + trt_provider_options_v2.trt_max_workspace_size = internal_options.max_workspace_size; + trt_provider_options_v2.trt_fp16_enable = internal_options.fp16_enable; + trt_provider_options_v2.trt_int8_enable = internal_options.int8_enable; + + if (string_copy) { + char* dest = nullptr; + auto str_size = internal_options.int8_calibration_table_name.size(); + if (str_size == 0) { + trt_provider_options_v2.trt_int8_calibration_table_name = nullptr; + } else { + dest = new char[str_size + 1]; +#ifdef _MSC_VER + strncpy_s(dest, str_size + 1, internal_options.int8_calibration_table_name.c_str(), str_size); +#else + strncpy(dest, internal_options.int8_calibration_table_name.c_str(), str_size); +#endif + dest[str_size] = '\0'; + trt_provider_options_v2.trt_int8_calibration_table_name = (const char*)dest; + } + } else { + trt_provider_options_v2.trt_int8_calibration_table_name = internal_options.int8_calibration_table_name.c_str(); + } + + trt_provider_options_v2.trt_int8_use_native_calibration_table = internal_options.int8_use_native_calibration_table; + trt_provider_options_v2.trt_dla_enable = internal_options.dla_enable; + trt_provider_options_v2.trt_dla_core = internal_options.dla_core; + trt_provider_options_v2.trt_dump_subgraphs = internal_options.dump_subgraphs; + trt_provider_options_v2.trt_engine_cache_enable = internal_options.engine_cache_enable; + + if (string_copy) { + char* dest = nullptr; + auto str_size = internal_options.engine_cache_path.size(); + if (str_size == 0) { + trt_provider_options_v2.trt_engine_cache_path = nullptr; + } else { + dest = new char[str_size + 1]; +#ifdef _MSC_VER + strncpy_s(dest, str_size + 1, internal_options.engine_cache_path.c_str(), str_size); +#else + strncpy(dest, internal_options.engine_cache_path.c_str(), str_size); +#endif + dest[str_size] = '\0'; + trt_provider_options_v2.trt_engine_cache_path = (const char*)dest; + } + } else { + trt_provider_options_v2.trt_engine_cache_path = internal_options.engine_cache_path.c_str(); + } + + trt_provider_options_v2.trt_engine_decryption_enable = internal_options.engine_decryption_enable; + + if (string_copy) { + char* dest = nullptr; + auto str_size = internal_options.engine_decryption_lib_path.size(); + if (str_size == 0) { + trt_provider_options_v2.trt_engine_decryption_lib_path = nullptr; + } else { + dest = new char[str_size + 1]; +#ifdef _MSC_VER + strncpy_s(dest, str_size + 1, internal_options.engine_decryption_lib_path.c_str(), str_size); +#else + strncpy(dest, internal_options.engine_decryption_lib_path.c_str(), str_size); +#endif + dest[str_size] = '\0'; + trt_provider_options_v2.trt_engine_decryption_lib_path = (const char*)dest; + } + } else { + trt_provider_options_v2.trt_engine_decryption_lib_path = internal_options.engine_decryption_lib_path.c_str(); + } + + trt_provider_options_v2.trt_force_sequential_engine_build = internal_options.force_sequential_engine_build; + trt_provider_options_v2.trt_context_memory_sharing_enable = internal_options.context_memory_sharing_enable; + trt_provider_options_v2.trt_layer_norm_fp32_fallback = internal_options.layer_norm_fp32_fallback; + trt_provider_options_v2.trt_timing_cache_enable = internal_options.timing_cache_enable; + trt_provider_options_v2.trt_force_timing_cache = internal_options.force_timing_cache; + trt_provider_options_v2.trt_detailed_build_log = internal_options.detailed_build_log; + trt_provider_options_v2.trt_build_heuristics_enable = internal_options.build_heuristics_enable; + trt_provider_options_v2.trt_sparsity_enable = internal_options.sparsity_enable; + trt_provider_options_v2.trt_builder_optimization_level = internal_options.builder_optimization_level; + trt_provider_options_v2.trt_auxiliary_streams = internal_options.auxiliary_streams; + + if (string_copy) { + char* dest = nullptr; + auto str_size = internal_options.tactic_sources.size(); + if (str_size == 0) { + trt_provider_options_v2.trt_tactic_sources = nullptr; + } else { + dest = new char[str_size + 1]; +#ifdef _MSC_VER + strncpy_s(dest, str_size + 1, internal_options.tactic_sources.c_str(), str_size); +#else + strncpy(dest, internal_options.tactic_sources.c_str(), str_size); +#endif + dest[str_size] = '\0'; + trt_provider_options_v2.trt_tactic_sources = (const char*)dest; + } + } else { + trt_provider_options_v2.trt_tactic_sources = internal_options.tactic_sources.c_str(); + } + + if (string_copy) { + char* dest = nullptr; + auto str_size = internal_options.extra_plugin_lib_paths.size(); + if (str_size == 0) { + trt_provider_options_v2.trt_extra_plugin_lib_paths = nullptr; + } else { + dest = new char[str_size + 1]; +#ifdef _MSC_VER + strncpy_s(dest, str_size + 1, internal_options.extra_plugin_lib_paths.c_str(), str_size); +#else + strncpy(dest, internal_options.extra_plugin_lib_paths.c_str(), str_size); +#endif + dest[str_size] = '\0'; + trt_provider_options_v2.trt_extra_plugin_lib_paths = (const char*)dest; + } + } else { + trt_provider_options_v2.trt_extra_plugin_lib_paths = internal_options.extra_plugin_lib_paths.c_str(); + } + + if (string_copy) { + char* dest = nullptr; + auto str_size = internal_options.profile_min_shapes.size(); + if (str_size == 0) { + trt_provider_options_v2.trt_profile_min_shapes = nullptr; + } else { + dest = new char[str_size + 1]; +#ifdef _MSC_VER + strncpy_s(dest, str_size + 1, internal_options.profile_min_shapes.c_str(), str_size); +#else + strncpy(dest, internal_options.profile_min_shapes.c_str(), str_size); +#endif + dest[str_size] = '\0'; + trt_provider_options_v2.trt_profile_min_shapes = (const char*)dest; + } + } else { + trt_provider_options_v2.trt_profile_min_shapes = internal_options.profile_min_shapes.c_str(); + } + + if (string_copy) { + char* dest = nullptr; + auto str_size = internal_options.profile_max_shapes.size(); + if (str_size == 0) { + trt_provider_options_v2.trt_profile_max_shapes = nullptr; + } else { + dest = new char[str_size + 1]; +#ifdef _MSC_VER + strncpy_s(dest, str_size + 1, internal_options.profile_max_shapes.c_str(), str_size); +#else + strncpy(dest, internal_options.profile_max_shapes.c_str(), str_size); +#endif + dest[str_size] = '\0'; + trt_provider_options_v2.trt_profile_max_shapes = (const char*)dest; + } + } else { + trt_provider_options_v2.trt_profile_max_shapes = internal_options.profile_max_shapes.c_str(); + } + + if (string_copy) { + char* dest = nullptr; + auto str_size = internal_options.profile_opt_shapes.size(); + if (str_size == 0) { + trt_provider_options_v2.trt_profile_opt_shapes = nullptr; + } else { + dest = new char[str_size + 1]; +#ifdef _MSC_VER + strncpy_s(dest, str_size + 1, internal_options.profile_opt_shapes.c_str(), str_size); +#else + strncpy(dest, internal_options.profile_opt_shapes.c_str(), str_size); +#endif + dest[str_size] = '\0'; + trt_provider_options_v2.trt_profile_opt_shapes = (const char*)dest; + } + } else { + trt_provider_options_v2.trt_profile_opt_shapes = internal_options.profile_opt_shapes.c_str(); + } + + trt_provider_options_v2.trt_cuda_graph_enable = internal_options.cuda_graph_enable; +} } // namespace onnxruntime diff --git a/onnxruntime/core/providers/tensorrt/tensorrt_execution_provider_info.h b/onnxruntime/core/providers/tensorrt/tensorrt_execution_provider_info.h index 4fb9837e1c..61a6bf0821 100644 --- a/onnxruntime/core/providers/tensorrt/tensorrt_execution_provider_info.h +++ b/onnxruntime/core/providers/tensorrt/tensorrt_execution_provider_info.h @@ -54,6 +54,7 @@ struct TensorrtExecutionProviderInfo { static TensorrtExecutionProviderInfo FromProviderOptions(const ProviderOptions& options); static ProviderOptions ToProviderOptions(const TensorrtExecutionProviderInfo& info); static ProviderOptions ToProviderOptions(const OrtTensorRTProviderOptionsV2& info); + static void UpdateProviderOptions(void* provider_options, const ProviderOptions& options, bool string_copy); std::vector custom_op_domain_list; }; diff --git a/onnxruntime/core/providers/tensorrt/tensorrt_provider_factory.cc b/onnxruntime/core/providers/tensorrt/tensorrt_provider_factory.cc index 18ec113734..b5dbe1ac45 100644 --- a/onnxruntime/core/providers/tensorrt/tensorrt_provider_factory.cc +++ b/onnxruntime/core/providers/tensorrt/tensorrt_provider_factory.cc @@ -2,7 +2,7 @@ // Licensed under the MIT License. #include "core/providers/shared_library/provider_api.h" -#include "core/providers/tensorrt/tensorrt_provider_factory.h" +#include "tensorrt_provider_factory.h" #include #include "tensorrt_execution_provider.h" #include "tensorrt_provider_factory_creator.h" @@ -18,22 +18,45 @@ namespace onnxruntime { void InitializeRegistry(); void DeleteRegistry(); +struct ProviderInfo_TensorRT_Impl final : ProviderInfo_TensorRT { + OrtStatus* GetCurrentGpuDeviceId(_In_ int* device_id) override { + auto cuda_err = cudaGetDevice(device_id); + if (cuda_err != cudaSuccess) { + return CreateStatus(ORT_FAIL, "Failed to get device id."); + } + return nullptr; + } + + OrtStatus* UpdateProviderOptions(void* provider_options, const ProviderOptions& options, bool string_copy) override { + TensorrtExecutionProviderInfo::UpdateProviderOptions(provider_options, options, string_copy); + return nullptr; + } + + OrtStatus* GetTensorRTCustomOpDomainList(std::vector& domain_list, const std::string extra_plugin_lib_paths) override { + common::Status status = CreateTensorRTCustomOpDomainList(domain_list, extra_plugin_lib_paths); + if (!status.IsOK()) { + return CreateStatus(ORT_FAIL, "[TensorRT EP] Can't create custom ops for TRT plugins."); + } + return nullptr; + } + + OrtStatus* ReleaseCustomOpDomainList(std::vector& domain_list) override { + ReleaseTensorRTCustomOpDomainList(domain_list); + return nullptr; + } + +} g_info; + struct TensorrtProviderFactory : IExecutionProviderFactory { TensorrtProviderFactory(const TensorrtExecutionProviderInfo& info) : info_{info} {} ~TensorrtProviderFactory() override {} std::unique_ptr CreateProvider() override; - void GetCustomOpDomainList(std::vector& custom_op_domain_list); - private: TensorrtExecutionProviderInfo info_; }; -void TensorrtProviderFactory::GetCustomOpDomainList(std::vector& custom_op_domain_list) { - custom_op_domain_list = info_.custom_op_domain_list; -} - std::unique_ptr TensorrtProviderFactory::CreateProvider() { return std::make_unique(info_); } @@ -46,6 +69,7 @@ std::shared_ptr TensorrtProviderFactoryCreator::Creat } struct Tensorrt_Provider : Provider { + void* GetInfo() override { return &g_info; } std::shared_ptr CreateExecutionProviderFactory(int device_id) override { TensorrtExecutionProviderInfo info; info.device_id = device_id; @@ -55,6 +79,7 @@ struct Tensorrt_Provider : Provider { if (!status.IsOK()) { LOGS_DEFAULT(WARNING) << "[TensorRT EP] Failed to get TRT plugins from TRT plugin registration."; } + return std::make_shared(info); } @@ -104,161 +129,8 @@ struct Tensorrt_Provider : Provider { return std::make_shared(info); } - /** - * This function will be called by the C API UpdateTensorRTProviderOptions(). - * - * Please note that it will reset the OrtProviderOptionsV2 instance first and then set up the provided provider options - * See TensorrtExecutionProviderInfo::FromProviderOptions() for more details - */ void UpdateProviderOptions(void* provider_options, const ProviderOptions& options) override { - auto internal_options = onnxruntime::TensorrtExecutionProviderInfo::FromProviderOptions(options); - auto& trt_options = *reinterpret_cast(provider_options); - trt_options.device_id = internal_options.device_id; - - // The 'has_user_compute_stream' of the OrtTensorRTProviderOptionsV2 instance can be set by C API UpdateTensorRTProviderOptionsWithValue() as well - // We only set the 'has_user_compute_stream' of the OrtTensorRTProviderOptionsV2 instance if it is provided in options - if (options.find("has_user_compute_stream") != options.end()) { - trt_options.has_user_compute_stream = internal_options.has_user_compute_stream; - } - - trt_options.trt_max_partition_iterations = internal_options.max_partition_iterations; - trt_options.trt_min_subgraph_size = internal_options.min_subgraph_size; - trt_options.trt_max_workspace_size = internal_options.max_workspace_size; - trt_options.trt_fp16_enable = internal_options.fp16_enable; - trt_options.trt_int8_enable = internal_options.int8_enable; - - char* dest = nullptr; - auto str_size = internal_options.int8_calibration_table_name.size(); - if (str_size == 0) { - trt_options.trt_int8_calibration_table_name = nullptr; - } else { - dest = new char[str_size + 1]; -#ifdef _MSC_VER - strncpy_s(dest, str_size + 1, internal_options.int8_calibration_table_name.c_str(), str_size); -#else - strncpy(dest, internal_options.int8_calibration_table_name.c_str(), str_size); -#endif - dest[str_size] = '\0'; - trt_options.trt_int8_calibration_table_name = (const char*)dest; - } - - trt_options.trt_int8_use_native_calibration_table = internal_options.int8_use_native_calibration_table; - trt_options.trt_dla_enable = internal_options.dla_enable; - trt_options.trt_dla_core = internal_options.dla_core; - trt_options.trt_dump_subgraphs = internal_options.dump_subgraphs; - trt_options.trt_engine_cache_enable = internal_options.engine_cache_enable; - - str_size = internal_options.engine_cache_path.size(); - if (str_size == 0) { - trt_options.trt_engine_cache_path = nullptr; - } else { - dest = new char[str_size + 1]; -#ifdef _MSC_VER - strncpy_s(dest, str_size + 1, internal_options.engine_cache_path.c_str(), str_size); -#else - strncpy(dest, internal_options.engine_cache_path.c_str(), str_size); -#endif - dest[str_size] = '\0'; - trt_options.trt_engine_cache_path = (const char*)dest; - } - - trt_options.trt_engine_decryption_enable = internal_options.engine_decryption_enable; - - str_size = internal_options.engine_decryption_lib_path.size(); - if (str_size == 0) { - trt_options.trt_engine_decryption_lib_path = nullptr; - } else { - dest = new char[str_size + 1]; -#ifdef _MSC_VER - strncpy_s(dest, str_size + 1, internal_options.engine_decryption_lib_path.c_str(), str_size); -#else - strncpy(dest, internal_options.engine_decryption_lib_path.c_str(), str_size); -#endif - dest[str_size] = '\0'; - trt_options.trt_engine_decryption_lib_path = (const char*)dest; - } - - trt_options.trt_force_sequential_engine_build = internal_options.force_sequential_engine_build; - trt_options.trt_context_memory_sharing_enable = internal_options.context_memory_sharing_enable; - trt_options.trt_layer_norm_fp32_fallback = internal_options.layer_norm_fp32_fallback; - trt_options.trt_timing_cache_enable = internal_options.timing_cache_enable; - trt_options.trt_force_timing_cache = internal_options.force_timing_cache; - trt_options.trt_detailed_build_log = internal_options.detailed_build_log; - trt_options.trt_build_heuristics_enable = internal_options.build_heuristics_enable; - trt_options.trt_sparsity_enable = internal_options.sparsity_enable; - trt_options.trt_builder_optimization_level = internal_options.builder_optimization_level; - trt_options.trt_auxiliary_streams = internal_options.auxiliary_streams; - str_size = internal_options.tactic_sources.size(); - if (str_size == 0) { - trt_options.trt_tactic_sources = nullptr; - } else { - dest = new char[str_size + 1]; -#ifdef _MSC_VER - strncpy_s(dest, str_size + 1, internal_options.tactic_sources.c_str(), str_size); -#else - strncpy(dest, internal_options.tactic_sources.c_str(), str_size); -#endif - dest[str_size] = '\0'; - trt_options.trt_tactic_sources = (const char*)dest; - } - - str_size = internal_options.extra_plugin_lib_paths.size(); - if (str_size == 0) { - trt_options.trt_extra_plugin_lib_paths = nullptr; - } else { - dest = new char[str_size + 1]; -#ifdef _MSC_VER - strncpy_s(dest, str_size + 1, internal_options.extra_plugin_lib_paths.c_str(), str_size); -#else - strncpy(dest, internal_options.extra_plugin_lib_paths.c_str(), str_size); -#endif - dest[str_size] = '\0'; - trt_options.trt_extra_plugin_lib_paths = (const char*)dest; - } - - str_size = internal_options.profile_min_shapes.size(); - if (str_size == 0) { - trt_options.trt_profile_min_shapes = nullptr; - } else { - dest = new char[str_size + 1]; -#ifdef _MSC_VER - strncpy_s(dest, str_size + 1, internal_options.profile_min_shapes.c_str(), str_size); -#else - strncpy(dest, internal_options.profile_min_shapes.c_str(), str_size); -#endif - dest[str_size] = '\0'; - trt_options.trt_profile_min_shapes = (const char*)dest; - } - - str_size = internal_options.profile_max_shapes.size(); - if (str_size == 0) { - trt_options.trt_profile_max_shapes = nullptr; - } else { - dest = new char[str_size + 1]; -#ifdef _MSC_VER - strncpy_s(dest, str_size + 1, internal_options.profile_max_shapes.c_str(), str_size); -#else - strncpy(dest, internal_options.profile_max_shapes.c_str(), str_size); -#endif - dest[str_size] = '\0'; - trt_options.trt_profile_max_shapes = (const char*)dest; - } - - str_size = internal_options.profile_opt_shapes.size(); - if (str_size == 0) { - trt_options.trt_profile_opt_shapes = nullptr; - } else { - dest = new char[str_size + 1]; -#ifdef _MSC_VER - strncpy_s(dest, str_size + 1, internal_options.profile_opt_shapes.c_str(), str_size); -#else - strncpy(dest, internal_options.profile_opt_shapes.c_str(), str_size); -#endif - dest[str_size] = '\0'; - trt_options.trt_profile_opt_shapes = (const char*)dest; - } - - trt_options.trt_cuda_graph_enable = internal_options.cuda_graph_enable; + TensorrtExecutionProviderInfo::UpdateProviderOptions(provider_options, options, true); } ProviderOptions GetProviderOptions(const void* provider_options) override { @@ -266,11 +138,6 @@ struct Tensorrt_Provider : Provider { return onnxruntime::TensorrtExecutionProviderInfo::ToProviderOptions(options); } - void GetCustomOpDomainList(IExecutionProviderFactory* factory, std::vector& custom_op_domains_ptr) override { - TensorrtProviderFactory* trt_factory = reinterpret_cast(factory); - trt_factory->GetCustomOpDomainList(custom_op_domains_ptr); - } - void Initialize() override { InitializeRegistry(); } diff --git a/onnxruntime/core/providers/tensorrt/tensorrt_provider_factory.h b/onnxruntime/core/providers/tensorrt/tensorrt_provider_factory.h new file mode 100644 index 0000000000..231e14e5c9 --- /dev/null +++ b/onnxruntime/core/providers/tensorrt/tensorrt_provider_factory.h @@ -0,0 +1,17 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +#include "onnxruntime_c_api.h" +#include "core/framework/provider_options.h" + +namespace onnxruntime { +struct ProviderInfo_TensorRT { + virtual OrtStatus* GetCurrentGpuDeviceId(_In_ int* device_id) = 0; + virtual OrtStatus* UpdateProviderOptions(void* provider_options, const ProviderOptions& options, bool string_copy) = 0; + virtual OrtStatus* GetTensorRTCustomOpDomainList(std::vector& domain_list, const std::string extra_plugin_lib_paths) = 0; + virtual OrtStatus* ReleaseCustomOpDomainList(std::vector& domain_list) = 0; + + protected: + ~ProviderInfo_TensorRT() = default; // Can only be destroyed through a subclass instance +}; +} // namespace onnxruntime diff --git a/onnxruntime/core/session/provider_bridge_ort.cc b/onnxruntime/core/session/provider_bridge_ort.cc index bf7a3bbd9d..950275c7c5 100644 --- a/onnxruntime/core/session/provider_bridge_ort.cc +++ b/onnxruntime/core/session/provider_bridge_ort.cc @@ -108,6 +108,8 @@ namespace onnxruntime { ProviderInfo_CUDA* TryGetProviderInfo_CUDA(); ProviderInfo_CUDA& GetProviderInfo_CUDA(); +ProviderInfo_TensorRT* TryGetProviderInfo_TensorRT(); +ProviderInfo_TensorRT& GetProviderInfo_TensorRT(); ProviderInfo_CANN* TryGetProviderInfo_CANN(); ProviderInfo_CANN& GetProviderInfo_CANN(); ProviderInfo_Dnnl* TryGetProviderInfo_Dnnl(); @@ -1418,10 +1420,6 @@ std::shared_ptr TensorrtProviderFactoryCreator::Creat return s_library_tensorrt.Get().CreateExecutionProviderFactory(provider_options); } -void TensorrtProviderGetCustomOpDomainList(IExecutionProviderFactory* factory, std::vector& custom_op_domains_ptr) { - s_library_tensorrt.Get().GetCustomOpDomainList(factory, custom_op_domains_ptr); -} - std::shared_ptr MIGraphXProviderFactoryCreator::Create(const OrtMIGraphXProviderOptions* provider_options) { return s_library_migraphx.Get().CreateExecutionProviderFactory(provider_options); } @@ -1474,6 +1472,20 @@ ProviderInfo_OpenVINO* GetProviderInfo_OpenVINO() { return reinterpret_cast(s_library_openvino.Get().GetInfo()); } +ProviderInfo_TensorRT* TryGetProviderInfo_TensorRT() try { + return reinterpret_cast(s_library_tensorrt.Get().GetInfo()); +} catch (const std::exception& exception) { + LOGS_DEFAULT(ERROR) << exception.what(); + return nullptr; +} + +ProviderInfo_TensorRT& GetProviderInfo_TensorRT() { + if (auto* info = TryGetProviderInfo_TensorRT()) + return *info; + + ORT_THROW("TensorRT Provider not available, can't get interface for it"); +} + ProviderInfo_CUDA* TryGetProviderInfo_CUDA() try { return reinterpret_cast(s_library_cuda.Get().GetInfo()); } catch (const std::exception& exception) { @@ -1633,7 +1645,9 @@ ORT_API_STATUS_IMPL(OrtSessionOptionsAppendExecutionProvider_Tensorrt, _In_ OrtS options->provider_factories.push_back(factory); std::vector custom_op_domains; - TensorrtProviderGetCustomOpDomainList(factory.get(), custom_op_domains); + std::string extra_plugin_lib_paths = onnxruntime::Env::Default().GetEnvironmentVar("trt_extra_plugin_lib_paths"); + onnxruntime::ProviderInfo_TensorRT& provider_info = onnxruntime::GetProviderInfo_TensorRT(); + provider_info.GetTensorRTCustomOpDomainList(custom_op_domains, extra_plugin_lib_paths); for (auto ptr : custom_op_domains) { options->custom_op_domains_.push_back(ptr); } @@ -1664,7 +1678,8 @@ ORT_API_STATUS_IMPL(OrtApis::SessionOptionsAppendExecutionProvider_TensorRT, _In options->provider_factories.push_back(factory); std::vector custom_op_domains; - TensorrtProviderGetCustomOpDomainList(factory.get(), custom_op_domains); + onnxruntime::ProviderInfo_TensorRT& provider_info = onnxruntime::GetProviderInfo_TensorRT(); + provider_info.GetTensorRTCustomOpDomainList(custom_op_domains, ""); for (auto ptr : custom_op_domains) { options->custom_op_domains_.push_back(ptr); } @@ -1772,10 +1787,13 @@ ORT_API_STATUS_IMPL(OrtApis::SessionOptionsAppendExecutionProvider_TensorRT_V2, options->provider_factories.push_back(factory); std::vector custom_op_domains; - TensorrtProviderGetCustomOpDomainList(factory.get(), custom_op_domains); + std::string extra_plugin_lib_paths = (tensorrt_options == nullptr || tensorrt_options->trt_extra_plugin_lib_paths == nullptr) ? "" : tensorrt_options->trt_extra_plugin_lib_paths; + onnxruntime::ProviderInfo_TensorRT& provider_info = onnxruntime::GetProviderInfo_TensorRT(); + provider_info.GetTensorRTCustomOpDomainList(custom_op_domains, extra_plugin_lib_paths); for (auto ptr : custom_op_domains) { options->custom_op_domains_.push_back(ptr); } + return nullptr; API_IMPL_END } @@ -1784,34 +1802,6 @@ ORT_API_STATUS_IMPL(OrtApis::CreateTensorRTProviderOptions, _Outptr_ OrtTensorRT API_IMPL_BEGIN #ifdef USE_TENSORRT auto options = std::make_unique(); - options->device_id = 0; - options->has_user_compute_stream = 0; - options->user_compute_stream = nullptr; - options->trt_max_partition_iterations = 1000; - options->trt_min_subgraph_size = 1; - options->trt_max_workspace_size = 1 << 30; - options->trt_fp16_enable = false; - options->trt_int8_enable = false; - options->trt_int8_calibration_table_name = nullptr; - options->trt_int8_use_native_calibration_table = false; - options->trt_dla_enable = false; - options->trt_dla_core = false; - options->trt_dump_subgraphs = false; - options->trt_engine_cache_enable = false; - options->trt_engine_cache_path = nullptr; - options->trt_engine_decryption_enable = false; - options->trt_engine_decryption_lib_path = nullptr; - options->trt_force_sequential_engine_build = false; - options->trt_context_memory_sharing_enable = false; - options->trt_layer_norm_fp32_fallback = false; - options->trt_timing_cache_enable = false; - options->trt_force_timing_cache = false; - options->trt_detailed_build_log = false; - options->trt_extra_plugin_lib_paths = nullptr; - options->trt_profile_min_shapes = nullptr; - options->trt_profile_max_shapes = nullptr; - options->trt_profile_opt_shapes = nullptr; - options->trt_cuda_graph_enable = false; *out = options.release(); return nullptr; #else diff --git a/onnxruntime/python/onnxruntime_inference_collection.py b/onnxruntime/python/onnxruntime_inference_collection.py index bcc6f15129..1a3e22142f 100644 --- a/onnxruntime/python/onnxruntime_inference_collection.py +++ b/onnxruntime/python/onnxruntime_inference_collection.py @@ -465,6 +465,9 @@ class InferenceSession(Session): ) session_options = self._sess_options if self._sess_options else C.get_default_session_options() + + self._register_ep_custom_ops(session_options, providers, provider_options) + if self._model_path: sess = C.InferenceSession(session_options, self._model_path, True, self._read_config_from_model) else: @@ -507,6 +510,13 @@ class InferenceSession(Session): self._sess_options = self._sess_options_initial self._create_inference_session(providers, provider_options) + def _register_ep_custom_ops(self, session_options, providers, provider_options): + for i in range(len(providers)): + if providers[i] == "TensorrtExecutionProvider": + C.register_tensorrt_plugins_as_custom_ops(session_options, provider_options[i]) + elif isinstance(providers[i], tuple) and providers[i][0] == "TensorrtExecutionProvider": + C.register_tensorrt_plugins_as_custom_ops(session_options, providers[i][1]) + class IOBinding: """ diff --git a/onnxruntime/python/onnxruntime_pybind_state.cc b/onnxruntime/python/onnxruntime_pybind_state.cc index 907ea0ec41..95a8f59186 100644 --- a/onnxruntime/python/onnxruntime_pybind_state.cc +++ b/onnxruntime/python/onnxruntime_pybind_state.cc @@ -430,6 +430,25 @@ const ROCMExecutionProviderInfo GetRocmExecutionProviderInfo(ProviderInfo_ROCM* } #endif +#ifdef USE_TENSORRT +void RegisterTensorRTPluginsAsCustomOps(PySessionOptions& so, const ProviderOptions& options) { + if (auto* tensorrt_provider_info = TryGetProviderInfo_TensorRT()) { + std::string trt_extra_plugin_lib_paths = ""; + const auto it = options.find("trt_extra_plugin_lib_paths"); + if (it != options.end()) { + trt_extra_plugin_lib_paths = it->second; + } + std::vector domain_list; + tensorrt_provider_info->GetTensorRTCustomOpDomainList(domain_list, trt_extra_plugin_lib_paths); + for (auto ptr : domain_list) { + so.custom_op_domains_.push_back(ptr); + } + } else { + ORT_THROW("Please install TensorRT libraries as mentioned in the GPU requirements page, make sure they're in the PATH or LD_LIBRARY_PATH, and that your GPU is supported."); + } +} +#endif + std::unique_ptr CreateExecutionProviderInstance( const SessionOptions& session_options, const std::string& type, @@ -443,43 +462,14 @@ std::unique_ptr CreateExecutionProviderInstance( // If the environment variable 'ORT_TENSORRT_UNAVAILABLE' exists, then we do not load TensorRT. This is set by _ld_preload for the manylinux case // as in that case, trying to load the library itself will result in a crash due to the way that auditwheel strips dependencies. if (Env::Default().GetEnvironmentVar("ORT_TENSORRT_UNAVAILABLE").empty()) { - std::string calibration_table, cache_path, lib_path, min_profile, max_profile, opt_profile; + // provider_options_map is just a reference to the ProviderOptionsMap instance, so it can be released anytime from application. + // So we need these std::string variables defined here as they will be kept alive for the lifetime of TRT EP and we can still access them from OrtTensorRTProviderOptionsV2 instance. + // (The reason is string copy is involved, for example params.trt_engine_cache_path = cache_path.c_str() and those std::string variable is referenced by OrtTensorRTProviderOptionsV2 instance + // and TRT EP instance, so it won't be released.) + std::string calibration_table, cache_path, lib_path, trt_tactic_sources, trt_extra_plugin_lib_paths, min_profile, max_profile, opt_profile; auto it = provider_options_map.find(type); if (it != provider_options_map.end()) { - OrtTensorRTProviderOptionsV2 params{ - 0, - 0, - nullptr, - 1000, - 1, - 1 << 30, - 0, - 0, - nullptr, - 0, - 0, - 0, - 0, - 0, - nullptr, - 0, - nullptr, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 2, - -1, - nullptr, - nullptr, - nullptr, - nullptr, - nullptr, - 0}; + OrtTensorRTProviderOptionsV2 params; for (auto option : it->second) { if (option.first == "device_id") { if (!option.second.empty()) { @@ -666,13 +656,15 @@ std::unique_ptr CreateExecutionProviderInstance( } } else if (option.first == "trt_tactic_sources") { if (!option.second.empty()) { - params.trt_tactic_sources = option.second.c_str(); + trt_tactic_sources = option.second; + params.trt_tactic_sources = trt_tactic_sources.c_str(); } else { ORT_THROW("[ERROR] [TensorRT] The value for the key 'trt_tactic_sources' should be a string. e.g. \"-CUDNN,+CUBLAS\" available keys: \"CUBLAS\"|\"CUBLAS_LT\"|\"CUDNN\"|\"EDGE_MASK_CONVOLUTIONS\".\n"); } } else if (option.first == "trt_extra_plugin_lib_paths") { if (!option.second.empty()) { - params.trt_extra_plugin_lib_paths = option.second.c_str(); + trt_extra_plugin_lib_paths = option.second; + params.trt_extra_plugin_lib_paths = trt_extra_plugin_lib_paths.c_str(); } else { ORT_THROW("[ERROR] [TensorRT] The value for the key 'trt_extra_plugin_lib_paths' should be a path string.\n"); } @@ -1209,6 +1201,12 @@ void addGlobalMethods(py::module& m) { }); #endif +#ifdef USE_TENSORRT + m.def( + "register_tensorrt_plugins_as_custom_ops", [](PySessionOptions& so, const ProviderOptions& options) { RegisterTensorRTPluginsAsCustomOps(so, options); }, + "Register TensorRT plugins as custom ops."); +#endif + #ifdef ENABLE_ATEN m.def("register_aten_op_executor", [](const std::string& is_tensor_argument_address_str, const std::string& aten_op_executor_address_str) -> void { diff --git a/onnxruntime/python/onnxruntime_pybind_state_common.h b/onnxruntime/python/onnxruntime_pybind_state_common.h index 18a9079b5c..5bb6bcc38b 100644 --- a/onnxruntime/python/onnxruntime_pybind_state_common.h +++ b/onnxruntime/python/onnxruntime_pybind_state_common.h @@ -180,6 +180,13 @@ extern onnxruntime::ArenaExtendStrategy arena_extend_strategy; } // namespace onnxruntime #endif +#ifdef USE_TENSORRT +namespace onnxruntime { +ProviderInfo_TensorRT* TryGetProviderInfo_TensorRT(); +ProviderInfo_TensorRT& GetProviderInfo_TensorRT(); +} // namespace onnxruntime +#endif + #ifdef USE_CANN namespace onnxruntime { ProviderInfo_CANN* TryGetProviderInfo_CANN(); diff --git a/onnxruntime/test/providers/cpu/model_tests.cc b/onnxruntime/test/providers/cpu/model_tests.cc index da906ebf76..999f04398d 100644 --- a/onnxruntime/test/providers/cpu/model_tests.cc +++ b/onnxruntime/test/providers/cpu/model_tests.cc @@ -690,11 +690,7 @@ TEST_P(ModelTest, Run) { #endif else if (provider_name == "tensorrt") { if (test_case_name.find(ORT_TSTR("FLOAT16")) != std::string::npos) { - OrtTensorRTProviderOptionsV2 params{0, 0, nullptr, 1000, 1, 1 << 30, - 1, // enable fp16 - 0, nullptr, 0, 0, 0, 0, 0, nullptr, 0, nullptr, 0, 0, 0, 0, 0, 0, 0, 0, - 3, -1, nullptr, nullptr, nullptr, nullptr, nullptr, 0}; - + OrtTensorRTProviderOptionsV2 params; ortso.AppendExecutionProvider_TensorRT_V2(params); } else { OrtTensorRTProviderOptionsV2* ep_option = nullptr; diff --git a/onnxruntime/test/providers/tensorrt/tensorrt_basic_test.cc b/onnxruntime/test/providers/tensorrt/tensorrt_basic_test.cc index 288cdfca2b..aa96e15336 100644 --- a/onnxruntime/test/providers/tensorrt/tensorrt_basic_test.cc +++ b/onnxruntime/test/providers/tensorrt/tensorrt_basic_test.cc @@ -175,41 +175,7 @@ void RunWithOneSessionSingleThreadInference(std::string model_name, std::string std::vector expected_dims_mul_m = {1, 3, 2}; std::vector expected_values_mul_m = {3.0f, 6.0f, 9.0f, 12.0f, 15.0f, 18.0f}; - OrtTensorRTProviderOptionsV2 params{ - 0, - 0, - nullptr, - 1000, - 1, - 1 << 30, - 0, - 0, - nullptr, - 0, - 0, - 0, - 0, - 0, - nullptr, - 0, - nullptr, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 3, - -1, - nullptr, - nullptr, - nullptr, - nullptr, - nullptr, - 0}; - + OrtTensorRTProviderOptionsV2 params; params.trt_engine_cache_enable = 1; std::unique_ptr execution_provider = TensorrtExecutionProviderWithOptions(¶ms); EXPECT_TRUE(session_object.RegisterExecutionProvider(std::move(execution_provider)).IsOK()); @@ -259,41 +225,7 @@ void RunWithOneSessionMultiThreadsInference(std::string model_name, std::string std::vector expected_dims_nonzero_m = {3, 6}; std::vector expected_values_nonzero_m = {0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 2, 2, 0, 1, 0, 1, 0, 1}; - OrtTensorRTProviderOptionsV2 params{ - 0, - 0, - nullptr, - 1000, - 1, - 1 << 30, - 0, - 0, - nullptr, - 0, - 0, - 0, - 0, - 0, - nullptr, - 0, - nullptr, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 3, - -1, - nullptr, - nullptr, - nullptr, - nullptr, - nullptr, - 0}; - + OrtTensorRTProviderOptionsV2 params; params.trt_engine_cache_enable = 1; std::unique_ptr execution_provider = TensorrtExecutionProviderWithOptions(¶ms); EXPECT_TRUE(session_object.RegisterExecutionProvider(std::move(execution_provider)).IsOK()); @@ -422,41 +354,7 @@ TEST(TensorrtExecutionProviderTest, TRTPluginsCustomOpTest) { output_names.push_back("output"); std::vector fetches; - OrtTensorRTProviderOptionsV2 params{ - 0, - 0, - nullptr, - 1000, - 1, - 1 << 30, - 0, - 0, - nullptr, - 0, - 0, - 0, - 0, - 0, - nullptr, - 0, - nullptr, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 3, - -1, - nullptr, - nullptr, - nullptr, - nullptr, - nullptr, - 0}; - + OrtTensorRTProviderOptionsV2 params; std::unique_ptr execution_provider = TensorrtExecutionProviderWithOptions(¶ms); EXPECT_TRUE(session_object.RegisterExecutionProvider(std::move(execution_provider)).IsOK()); std::cout << model_name << std::endl; @@ -516,41 +414,7 @@ TEST_P(TensorrtExecutionProviderCacheTest, Run) { std::vector expected_dims_mul_m = {1, 3, 2}; std::vector expected_values_mul_m = {3.0f, 6.0f, 9.0f, 12.0f, 15.0f, 18.0f}; - OrtTensorRTProviderOptionsV2 params{ - 0, - 0, - nullptr, - 1000, - 1, - 1 << 30, - 0, - 0, - nullptr, - 0, - 0, - 0, - 0, - 0, - nullptr, - 0, - nullptr, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 0, - 3, - -1, - nullptr, - nullptr, - nullptr, - nullptr, - nullptr, - 0}; - + OrtTensorRTProviderOptionsV2 params; if (cache_type.compare("engine") == 0) { /* Following code block tests the functionality of engine and optimization profile of ORT TRT, including: * - engine cache serialization/de-serialization diff --git a/onnxruntime/test/util/include/providers.h b/onnxruntime/test/util/include/providers.h index a6420e2342..aa489e6cd9 100644 --- a/onnxruntime/test/util/include/providers.h +++ b/onnxruntime/test/util/include/providers.h @@ -10,9 +10,6 @@ #ifdef USE_TVM #include "core/providers/tvm/tvm_provider_factory.h" #endif -#ifdef USE_TENSORRT -#include "core/providers/tensorrt/tensorrt_provider_factory.h" -#endif #ifdef USE_OPENVINO #include "core/providers/openvino/openvino_provider_factory.h" #endif diff --git a/tools/ci_build/gen_def.py b/tools/ci_build/gen_def.py index 9821f3a901..b53fb33659 100755 --- a/tools/ci_build/gen_def.py +++ b/tools/ci_build/gen_def.py @@ -67,7 +67,19 @@ with open(args.output_source, "w") as file: # external symbols are removed, xnnpack ep will be created via the standard ORT API. # https://github.com/microsoft/onnxruntime/pull/11798 - if c not in ("vitisai", "winml", "cuda", "rocm", "migraphx", "qnn", "snpe", "xnnpack", "cann", "dnnl"): + if c not in ( + "vitisai", + "winml", + "cuda", + "rocm", + "migraphx", + "qnn", + "snpe", + "xnnpack", + "cann", + "dnnl", + "tensorrt", + ): file.write(f"#include \n") file.write("void* GetFunctionEntryByName(const char* name){\n") for symbol in symbols: diff --git a/tools/ci_build/github/azure-pipelines/templates/c-api-artifacts-package-and-publish-steps-windows.yml b/tools/ci_build/github/azure-pipelines/templates/c-api-artifacts-package-and-publish-steps-windows.yml index 07aac08dac..663ce4338c 100644 --- a/tools/ci_build/github/azure-pipelines/templates/c-api-artifacts-package-and-publish-steps-windows.yml +++ b/tools/ci_build/github/azure-pipelines/templates/c-api-artifacts-package-and-publish-steps-windows.yml @@ -58,7 +58,6 @@ steps: copy $(Build.SourcesDirectory)\include\onnxruntime\core\session\onnxruntime_*.h $(Build.BinariesDirectory)\${{parameters.artifactName}}\include copy $(Build.SourcesDirectory)\include\onnxruntime\core\framework\provider_options.h $(Build.BinariesDirectory)\${{parameters.artifactName}}\include copy $(Build.SourcesDirectory)\include\onnxruntime\core\providers\cpu\cpu_provider_factory.h $(Build.BinariesDirectory)\${{parameters.artifactName}}\include - copy $(Build.SourcesDirectory)\include\onnxruntime\core\providers\tensorrt\tensorrt_provider_factory.h $(Build.BinariesDirectory)\${{parameters.artifactName}}\include copy $(Build.SourcesDirectory)\orttraining\orttraining\training_api\include\onnxruntime_training*.h $(Build.BinariesDirectory)\${{parameters.artifactName}}\include REM copy the README, license and TPN diff --git a/tools/ci_build/github/linux/copy_strip_binary.sh b/tools/ci_build/github/linux/copy_strip_binary.sh index 63690b69fc..73444b35a6 100755 --- a/tools/ci_build/github/linux/copy_strip_binary.sh +++ b/tools/ci_build/github/linux/copy_strip_binary.sh @@ -27,7 +27,6 @@ if [[ -f "$BINARY_DIR/$BUILD_CONFIG/libonnxruntime_providers_cuda.so" ]]; then fi if [[ -f "$BINARY_DIR/$BUILD_CONFIG/libonnxruntime_providers_tensorrt.so" ]]; then cp $BINARY_DIR/$BUILD_CONFIG/libonnxruntime_providers_tensorrt.so $BINARY_DIR/$ARTIFACT_NAME/lib - cp $SOURCE_DIR/include/onnxruntime/core/providers/tensorrt/tensorrt_provider_factory.h $BINARY_DIR/$ARTIFACT_NAME/include fi if [[ -f "$BINARY_DIR/$BUILD_CONFIG/libonnxruntime_providers_rocm.so" ]]; then cp $BINARY_DIR/$BUILD_CONFIG/libonnxruntime_providers_shared.so $BINARY_DIR/$ARTIFACT_NAME/lib diff --git a/tools/ci_build/github/linux/extract_and_bundle_gpu_package.sh b/tools/ci_build/github/linux/extract_and_bundle_gpu_package.sh index 9492b7bcf5..04ac0e35a6 100755 --- a/tools/ci_build/github/linux/extract_and_bundle_gpu_package.sh +++ b/tools/ci_build/github/linux/extract_and_bundle_gpu_package.sh @@ -28,4 +28,3 @@ rm $ARTIFACT_DIR/onnxruntime-linux-x64-cuda-*.tgz cp onnxruntime-linux-x64-tensorrt/*/lib/libonnxruntime.so* onnxruntime-linux-x64-gpu/*/lib cp onnxruntime-linux-x64-tensorrt/*/lib/libonnxruntime_providers_tensorrt.so onnxruntime-linux-x64-gpu/*/lib cp onnxruntime-linux-x64-tensorrt/*/lib/libonnxruntime_providers_shared.so onnxruntime-linux-x64-gpu/*/lib -cp onnxruntime-linux-x64-tensorrt/*/include/*tensorrt* onnxruntime-linux-x64-gpu/*/include diff --git a/tools/ci_build/github/windows/bundle_nuget_with_native_headers.bat b/tools/ci_build/github/windows/bundle_nuget_with_native_headers.bat index 01d2633ae2..bc4a799b2b 100644 --- a/tools/ci_build/github/windows/bundle_nuget_with_native_headers.bat +++ b/tools/ci_build/github/windows/bundle_nuget_with_native_headers.bat @@ -7,7 +7,6 @@ FOR /R %%i IN (*.nupkg) do ( set filename=%%~ni IF NOT "!filename:~25,7!"=="Managed" ( mkdir build\native\include - copy %BUILD_SOURCESDIRECTORY%\include\onnxruntime\core\providers\tensorrt\tensorrt_provider_factory.h build\native\include\tensorrt_provider_factory.h 7z a %%~ni.nupkg build ) ) diff --git a/tools/nuget/generate_nuspec_for_native_nuget.py b/tools/nuget/generate_nuspec_for_native_nuget.py index 2aefe794db..cc27cdc293 100644 --- a/tools/nuget/generate_nuspec_for_native_nuget.py +++ b/tools/nuget/generate_nuspec_for_native_nuget.py @@ -437,14 +437,7 @@ def generate_files(line_list, args): ) if args.execution_provider == "tensorrt": - files_list.append( - "' - ) + files_list.append("') if args.execution_provider == "dnnl": files_list.append( diff --git a/tools/nuget/validate_package.py b/tools/nuget/validate_package.py index e1b9cf0c3c..a9996b2779 100644 --- a/tools/nuget/validate_package.py +++ b/tools/nuget/validate_package.py @@ -23,7 +23,6 @@ win_gpu_package_libraries = [ ] gpu_related_header_files = [ "cpu_provider_factory.h", - "tensorrt_provider_factory.h", "onnxruntime_c_api.h", "onnxruntime_cxx_api.h", "onnxruntime_cxx_inline.h",