From 2c5d4dce773ef9cdfb7e344046aaba9940d2a6ae Mon Sep 17 00:00:00 2001 From: sfatimar Date: Thu, 10 Aug 2023 00:20:10 +0530 Subject: [PATCH] Openvino ep ort 5.1 (#17042) OpenVINO EP ORT 5.1 Branch Changes for the new API to take in OpenVINO Provider Options and compatibility with OV 2023.1 ### Motivation and Context The change is required for the new API to take in OpenVINO Provider Options and make it seamless. --------- Signed-off-by: MaajidKhan Co-authored-by: saurabhintel0 Co-authored-by: MaajidKhan Co-authored-by: Suryaprakash Shanmugam Co-authored-by: Preetha Veeramalai --- cmake/CMakeLists.txt | 7 +- docs/python/ReadMeOV.rst | 6 +- .../providers/openvino/backend_manager.cc | 33 +++--- .../core/providers/openvino/backend_utils.cc | 64 ++++------- .../core/providers/openvino/backend_utils.h | 11 +- .../openvino/backends/basic_backend.cc | 37 ++++--- .../openvino/backends/basic_backend.h | 5 +- .../core/providers/openvino/contexts.h | 3 +- .../openvino/openvino_execution_provider.cc | 5 + .../openvino/openvino_execution_provider.h | 7 +- .../openvino/openvino_provider_factory.cc | 101 ++++++++++++++---- .../openvino_provider_factory_creator.h | 2 + .../core/providers/openvino/ov_interface.cc | 10 +- .../core/providers/openvino/ov_interface.h | 10 +- .../openvino/ov_versions/capability.cc | 9 +- .../openvino/ov_versions/data_ops.cc | 17 ++- .../providers/openvino/ov_versions/data_ops.h | 3 +- .../providers/openvino/ov_versions/utils.cc | 13 ++- .../core/session/provider_bridge_ort.cc | 38 ++++++- .../core/session/provider_registration.cc | 9 +- .../python/onnxruntime_pybind_schema.cc | 4 +- .../python/onnxruntime_pybind_state.cc | 53 +++++---- .../python/onnxruntime_pybind_state_common.h | 1 - onnxruntime/test/perftest/ort_test_session.cc | 75 +++++-------- .../test/providers/cpu/math/einsum_test.cc | 4 +- onnxruntime/test/util/default_providers.cc | 13 ++- .../test/util/include/default_providers.h | 4 +- 27 files changed, 333 insertions(+), 211 deletions(-) diff --git a/cmake/CMakeLists.txt b/cmake/CMakeLists.txt index d33e8bd4f6..2f80e6ecec 100644 --- a/cmake/CMakeLists.txt +++ b/cmake/CMakeLists.txt @@ -1236,9 +1236,12 @@ if (onnxruntime_USE_OPENVINO) elseif ($ENV{INTEL_OPENVINO_DIR} MATCHES "2023.0") set(OPENVINO_VERSION "2023.0") add_definitions(-DOPENVINO_2023_0=1) + elseif ($ENV{INTEL_OPENVINO_DIR} MATCHES "2023.1") + set(OPENVINO_VERSION "2023.1") + add_definitions(-DOPENVINO_2023_1=1) elseif ($ENV{INTEL_OPENVINO_DIR} MATCHES "openvino") - set(OPENVINO_VERSION "2023.0") - add_definitions(-DOPENVINO_2023_0=1) + set(OPENVINO_VERSION "2023.1") + add_definitions(-DOPENVINO_2023_1=1) else() message(FATAL_ERROR "Unsupported OpenVINO version: ${INTEL_OPENVINO_DIR}") endif() diff --git a/docs/python/ReadMeOV.rst b/docs/python/ReadMeOV.rst index a19aa0e86d..f12c01d278 100644 --- a/docs/python/ReadMeOV.rst +++ b/docs/python/ReadMeOV.rst @@ -7,6 +7,7 @@ OpenVINO™ Execution Provider for ONNX Runtime accelerates inference across man - Intel® CPUs - Intel® integrated GPUs - Intel® discrete GPUs + - Intel® integrated VPUs Installation ------------ @@ -15,12 +16,13 @@ Requirements ^^^^^^^^^^^^ - Ubuntu 18.04, 20.04, RHEL(CPU only) or Windows 10 - 64 bit -- Python 3.8, 3.9 or 3.10 for Linux and only Python3.10 for Windows +- Python 3.8 or 3.9 or 3.10 for Linux and only Python3.10 for Windows This package supports: - Intel® CPUs - Intel® integrated GPUs - Intel® discrete GPUs + - Intel® integrated VPUs ``pip3 install onnxruntime-openvino`` @@ -34,7 +36,7 @@ For more details on build and installation please refer to `Build `_ to change the hardware on which inferencing is done. For more API calls and environment variables, see `Usage `_. diff --git a/onnxruntime/core/providers/openvino/backend_manager.cc b/onnxruntime/core/providers/openvino/backend_manager.cc index 5969aaeb44..78467b646b 100644 --- a/onnxruntime/core/providers/openvino/backend_manager.cc +++ b/onnxruntime/core/providers/openvino/backend_manager.cc @@ -7,9 +7,6 @@ #include #include "core/providers/shared_library/provider_api.h" - -#include - #include "contexts.h" #include "backend_manager.h" #include "ibackend.h" @@ -36,11 +33,11 @@ BackendManager::BackendManager(const onnxruntime::Node& fused_node, const logging::Logger& logger) { auto prec_str = GetGlobalContext().precision_str; if (prec_str == "FP32") { - subgraph_context_.precision = InferenceEngine::Precision::FP32; + subgraph_context_.precision = "FP32"; } else if (prec_str == "FP16") { - subgraph_context_.precision = InferenceEngine::Precision::FP16; + subgraph_context_.precision = "FP16"; } else if (prec_str == "U8") { - subgraph_context_.precision = InferenceEngine::Precision::U8; + subgraph_context_.precision = "U8"; } else { throw std::string("Invalid OpenVINO Precision type: " + prec_str); } @@ -78,19 +75,17 @@ BackendManager::BackendManager(const onnxruntime::Node& fused_node, LOGS_DEFAULT(INFO) << "[OpenVINO-EP] Model has symbolic input dims"; if (GetGlobalContext().device_type.find("CPU") != std::string::npos || GetGlobalContext().device_type.find("GPU") != std::string::npos) { - if (GetGlobalContext().enable_dynamic_shapes) { - LOGS_DEFAULT(INFO) << "[OpenVINO-EP] Starting backend initialization. " - << "Creating backend Dynamic Shapes"; - try { - concrete_backend_ = BackendFactory::MakeBackend(*model_proto_, - GetGlobalContext(), - subgraph_context_); - } catch (std::string const& msg) { - throw msg; - } - LOGS_DEFAULT(INFO) << "[OpenVINO-EP] " - << "Backend created for graph " << subgraph_context_.subgraph_name; + LOGS_DEFAULT(INFO) << "[OpenVINO-EP] Starting backend initialization. " + << "Creating backend Dynamic Shapes"; + try { + concrete_backend_ = BackendFactory::MakeBackend(*model_proto_, + GetGlobalContext(), + subgraph_context_); + } catch (std::string const& msg) { + throw msg; } + LOGS_DEFAULT(INFO) << "[OpenVINO-EP] " + << "Backend created for graph " << subgraph_context_.subgraph_name; } } else { LOGS_DEFAULT(INFO) << "[OpenVINO-EP] Model has concrete input dims. Initializing backend for graph " << subgraph_context_.subgraph_name; @@ -257,7 +252,7 @@ void BackendManager::Compute(OrtKernelContext* context) { } #endif bool use_dynamic_backend = true; - if (GetGlobalContext().enable_dynamic_shapes && subgraph_context_.has_dynamic_input_shape && + if (subgraph_context_.has_dynamic_input_shape && (GetGlobalContext().device_type.find("CPU") != std::string::npos || GetGlobalContext().device_type.find("GPU") != std::string::npos)) { concrete_backend_->Infer(context); diff --git a/onnxruntime/core/providers/openvino/backend_utils.cc b/onnxruntime/core/providers/openvino/backend_utils.cc index c5ebdb4131..d49968cdb7 100644 --- a/onnxruntime/core/providers/openvino/backend_utils.cc +++ b/onnxruntime/core/providers/openvino/backend_utils.cc @@ -8,8 +8,8 @@ #include #include "ov_interface.h" -#include -#include +#include "openvino/pass/convert_fp32_to_fp16.hpp" +#include "openvino/pass/constant_folding.hpp" #include "core/providers/shared_library/provider_api.h" #include "backend_utils.h" @@ -50,14 +50,14 @@ struct static_cast_int64 { std::shared_ptr CreateOVModel(const ONNX_NAMESPACE::ModelProto& model_proto, const GlobalContext& global_context, const SubGraphContext& subgraph_context, - std::map>& const_outputs_map) { + std::map>& const_outputs_map) { if (IsCILogEnabled()) { std::cout << "CreateNgraphFunc" << std::endl; } const std::string model = model_proto.SerializeAsString(); try { auto cnn_network = global_context.ie_core.ReadModel(model); - if ((subgraph_context.precision == InferenceEngine::Precision::FP16) && + if ((subgraph_context.precision == "FP16") && (global_context.device_type.find("VPUX") == std::string::npos)) { // FP16 transformations ov::pass::ConvertFP32ToFP16 pass_obj; @@ -88,7 +88,7 @@ CreateOVModel(const ONNX_NAMESPACE::ModelProto& model_proto, const GlobalContext size_t index = results.size() - 1; for (auto it = results.rbegin(); it != results.rend(); ++it) { - if (auto const_node = std::dynamic_pointer_cast((*it)->input_value(0).get_node_shared_ptr())) { + if (auto const_node = std::dynamic_pointer_cast((*it)->input_value(0).get_node_shared_ptr())) { const_outputs_map[(*it)->get_friendly_name()] = const_node; results.erase(results.begin() + index); } @@ -96,12 +96,11 @@ CreateOVModel(const ONNX_NAMESPACE::ModelProto& model_proto, const GlobalContext } } #ifndef NDEBUG -#if defined(OPENVINO_2022_3) || (OPENVINO_2023_0) +#if defined(OPENVINO_2022_3) || (OPENVINO_2023_0) || (OPENVINO_2023_1) if (IsDebugEnabled()) { std::string name = cnn_network->get_friendly_name(); ov::pass::Serialize serializer(name + ".xml", name + ".bin"); serializer.run_on_model(cnn_network); - ngraph::plot_graph(cnn_network, name + "_executable" + ".dot"); } #endif #endif @@ -111,31 +110,6 @@ CreateOVModel(const ONNX_NAMESPACE::ModelProto& model_proto, const GlobalContext } } -InferenceEngine::Precision ConvertPrecisionONNXToOpenVINO(const ONNX_NAMESPACE::TypeProto& onnx_type) { - ONNX_NAMESPACE::DataType type_string = ONNX_NAMESPACE::Utils::DataTypeUtils::ToType(onnx_type); - if (*type_string == "float" || *type_string == "tensor(float)") { - return InferenceEngine::Precision::FP32; - } else if (*type_string == "float16" || *type_string == "tensor(float16)") { - return InferenceEngine::Precision::FP16; - } else if (*type_string == "int32" || *type_string == "tensor(int32)") { - return InferenceEngine::Precision::I32; - } else if (*type_string == "int16" || *type_string == "tensor(int16)") { - return InferenceEngine::Precision::I16; - } else if (*type_string == "int8" || *type_string == "tensor(int8)") { - return InferenceEngine::Precision::I8; - } else if (*type_string == "uint16" || *type_string == "tensor(uint16)") { - return InferenceEngine::Precision::U16; - } else if (*type_string == "uint8" || *type_string == "tensor(uint8)") { - return InferenceEngine::Precision::U8; - } else if (*type_string == "bool" || *type_string == "tensor(bool)") { - return InferenceEngine::Precision::U8; - } else if (*type_string == "int64" || *type_string == "tensor(int64)") { - return InferenceEngine::Precision::I32; - } else { - throw std::string(log_tag + "Unsupported Data type"); - } -} - Ort::UnownedValue GetOutputTensor(Ort::KernelContext& context, size_t batch_size, OVInferRequestPtr infer_request, @@ -166,7 +140,7 @@ Ort::UnownedValue GetOutputTensor(Ort::KernelContext& context, std::string output_name, std::unordered_map output_names, - std::shared_ptr node) { + std::shared_ptr node) { // Find position of '/' in the output_name int pos = output_name.find("/"); // Copy the substring from start to pos @@ -210,25 +184,25 @@ int GetFirstAvailableDevice(GlobalContext& global_context) { return i; } -void FillOutputsWithConstantData(std::shared_ptr node, Ort::UnownedValue& out_tensor) { +void FillOutputsWithConstantData(std::shared_ptr node, Ort::UnownedValue& out_tensor) { switch (node->get_element_type()) { - case ngraph::element::Type_t::f32: { + case ov::element::Type_t::f32: { FillOutputHelper(out_tensor, node); break; } - case ngraph::element::Type_t::boolean: { + case ov::element::Type_t::boolean: { FillOutputHelper(out_tensor, node); break; } - case ngraph::element::Type_t::i32: { + case ov::element::Type_t::i32: { FillOutputHelper(out_tensor, node); break; } - case ngraph::element::Type_t::i64: { + case ov::element::Type_t::i64: { FillOutputHelper(out_tensor, node); break; } - case ngraph::element::Type_t::f16: { + case ov::element::Type_t::f16: { FillOutputHelper(out_tensor, node); break; } @@ -237,14 +211,22 @@ void FillOutputsWithConstantData(std::shared_ptr node, Ort::Unowne } } +#if defined(_MSC_VER) +#pragma warning(disable : 4127) +#endif + template -void FillOutputHelper(Ort::UnownedValue& out_tensor, std::shared_ptr node) { - auto const_node = std::dynamic_pointer_cast(node); +void FillOutputHelper(Ort::UnownedValue& out_tensor, std::shared_ptr node) { + auto const_node = std::dynamic_pointer_cast(node); auto res = const_node->cast_vector(); T* tensor_data = out_tensor.GetTensorMutableData(); std::copy(res.begin(), res.end(), tensor_data); } +#if defined(_MSC_VER) +#pragma warning(default : 4127) +#endif + void FillInputBlob(OVTensorPtr inputBlob, size_t batch_slice_idx, std::string input_name, Ort::KernelContext& context, const SubGraphContext& subgraph_context) { diff --git a/onnxruntime/core/providers/openvino/backend_utils.h b/onnxruntime/core/providers/openvino/backend_utils.h index e0fdc6f55a..de78a150fe 100644 --- a/onnxruntime/core/providers/openvino/backend_utils.h +++ b/onnxruntime/core/providers/openvino/backend_utils.h @@ -32,19 +32,16 @@ bool IsCILogEnabled(); int GetFirstAvailableDevice(GlobalContext& global_context); -void FillOutputsWithConstantData(std::shared_ptr node, Ort::UnownedValue& out_tensor); +void FillOutputsWithConstantData(std::shared_ptr node, Ort::UnownedValue& out_tensor); template -void FillOutputHelper(Ort::UnownedValue& out_tensor, std::shared_ptr node); +void FillOutputHelper(Ort::UnownedValue& out_tensor, std::shared_ptr node); Ort::UnownedValue GetOutputTensor(Ort::KernelContext& context, std::string output_name, std::unordered_map output_names, - std::shared_ptr node); - -InferenceEngine::Precision -ConvertPrecisionONNXToOpenVINO(const ONNX_NAMESPACE::TypeProto& onnx_type); + std::shared_ptr node); Ort::UnownedValue GetOutputTensor(Ort::KernelContext& context, size_t batch_size, @@ -61,7 +58,7 @@ void FillOutputBlob(OVTensorPtr outputBlob, Ort::UnownedValue& output_tensor, std::shared_ptr CreateOVModel(const ONNX_NAMESPACE::ModelProto& model_proto, const GlobalContext& global_context, const SubGraphContext& subgraph_context, - std::map>& const_outputs_map); + std::map>& const_outputs_map); void printPerformanceCounts(const std::vector& performanceMap, std::ostream& stream, std::string deviceName); diff --git a/onnxruntime/core/providers/openvino/backends/basic_backend.cc b/onnxruntime/core/providers/openvino/backends/basic_backend.cc index 31ec8db03b..f9517d7942 100644 --- a/onnxruntime/core/providers/openvino/backends/basic_backend.cc +++ b/onnxruntime/core/providers/openvino/backends/basic_backend.cc @@ -9,7 +9,7 @@ #include "core/providers/shared_library/provider_api.h" #include "../backend_utils.h" -#include +// #include #include "basic_backend.h" #include "../backend_manager.h" @@ -37,6 +37,9 @@ BasicBackend::BasicBackend(const ONNX_NAMESPACE::ModelProto& model_proto, // Setting OpenCL queue throttling for GPU EnableGPUThrottling(device_config); + // Enable streams; default=1 unless ovverriden by user config + EnableStreams(); + #ifndef NDEBUG if (IsDebugEnabled()) { std::string file_name = subgraph_context.subgraph_name + "_static.onnx"; @@ -45,6 +48,7 @@ BasicBackend::BasicBackend(const ONNX_NAMESPACE::ModelProto& model_proto, } #endif try { + std::string dev_prec = global_context.device_type + "_" + global_context_.precision_str; if (global_context.is_wholly_supported_graph) { #if defined(IO_BUFFER_ENABLED) if ((global_context.device_type.find("GPU") != std::string::npos) && @@ -61,8 +65,8 @@ BasicBackend::BasicBackend(const ONNX_NAMESPACE::ModelProto& model_proto, LOGS_DEFAULT(INFO) << log_tag << "Loaded model to the plugin"; } #else -#if defined(OPENVINO_2023_0) - if (subgraph_context.precision != InferenceEngine::Precision::FP16 && global_context_.enable_dynamic_shapes == false) { +#if defined(OPENVINO_2023_0) || (OPENVINO_2023_1) + if (!subgraph_context_.has_dynamic_input_shape && dev_prec != "CPU_FP16") { const std::string model = model_proto.SerializeAsString(); exe_network_ = global_context_.ie_core.LoadNetwork(model, hw_target, device_config, subgraph_context_.subgraph_name); LOGS_DEFAULT(INFO) << log_tag << "Loaded model to the plugin"; @@ -98,7 +102,7 @@ BasicBackend::BasicBackend(const ONNX_NAMESPACE::ModelProto& model_proto, inferRequestsQueue_ = std::unique_ptr(new InferRequestsQueue(exe_network_, nireq)); } -bool BasicBackend::ValidateSubgraph(std::map>& const_outputs_map) { +bool BasicBackend::ValidateSubgraph(std::map>& const_outputs_map) { if (const_outputs_map.size() == subgraph_context_.output_names.size()) subgraph_context_.is_constant = true; if (subgraph_context_.is_constant) { @@ -109,20 +113,23 @@ bool BasicBackend::ValidateSubgraph(std::map device_property; - device_property = std::make_pair("VPUX_COMPILER_TYPE", "MLIR"); + device_property = std::make_pair("VPU_COMPILER_TYPE", "MLIR"); device_config.emplace(ov::device::properties("VPUX", device_property)); } #endif @@ -147,10 +154,17 @@ void BasicBackend::EnableCaching() { void BasicBackend::EnableGPUThrottling(ov::AnyMap& device_config) { if (global_context_.enable_opencl_throttling == true && global_context_.device_type.find("GPU") != std::string::npos) { LOGS_DEFAULT(INFO) << log_tag << "Enabled OpenCL queue throttling for GPU device"; - device_config[GPU_CONFIG_KEY(PLUGIN_THROTTLE)] = "1"; + std::pair device_property; + device_property = std::make_pair("PLUGIN_THROTTLE", "1"); + device_config.emplace(ov::device::properties("GPU_CONFIG_KEY", device_property)); + // device_config[GPU_CONFIG_KEY(PLUGIN_THROTTLE)] = "1"; } } +void BasicBackend::EnableStreams() { + global_context_.ie_core.SetStreams(global_context_.device_type, global_context_.num_streams); +} + // Starts an asynchronous inference request for data in slice indexed by batch_slice_idx on // an Infer Request indexed by infer_req_idx void BasicBackend::StartAsyncInference(Ort::KernelContext& context, OVInferRequestPtr infer_request) { @@ -177,7 +191,6 @@ void BasicBackend::StartAsyncInference(Ort::KernelContext& context, OVInferReque } size_t batch_slice_idx = 0; if (subgraph_context_.has_dynamic_input_shape && - global_context_.enable_dynamic_shapes == true && (global_context_.device_type.find("CPU") != std::string::npos || global_context_.device_type.find("GPU") != std::string::npos)) { auto tensor = context.GetInput(subgraph_context_.input_names.at(input_name)); diff --git a/onnxruntime/core/providers/openvino/backends/basic_backend.h b/onnxruntime/core/providers/openvino/backends/basic_backend.h index 8cdb758fe7..2f1d603640 100644 --- a/onnxruntime/core/providers/openvino/backends/basic_backend.h +++ b/onnxruntime/core/providers/openvino/backends/basic_backend.h @@ -31,10 +31,11 @@ class BasicBackend : public IBackend { private: bool ImportBlob(std::string hw_target, bool vpu_status); void PopulateCompiledDirectory(std::string, std::string&, std::string&, bool&); - bool ValidateSubgraph(std::map>& const_outputs_map); + bool ValidateSubgraph(std::map>& const_outputs_map); void PopulateConfigValue(ov::AnyMap& device_config); void EnableCaching(); void EnableGPUThrottling(ov::AnyMap& device_config); + void EnableStreams(); void StartAsyncInference(Ort::KernelContext& context, std::shared_ptr infer_request); #ifdef IO_BUFFER_ENABLED @@ -48,7 +49,7 @@ class BasicBackend : public IBackend { mutable std::mutex compute_lock_; std::shared_ptr ie_cnn_network_; OVExeNetwork exe_network_; - std::map> const_outputs_map_; + std::map> const_outputs_map_; std::unique_ptr inferRequestsQueue_; #if defined IO_BUFFER_ENABLED OVRemoteContextPtr remote_context_; diff --git a/onnxruntime/core/providers/openvino/contexts.h b/onnxruntime/core/providers/openvino/contexts.h index a6011590fa..b61dcf8ca4 100644 --- a/onnxruntime/core/providers/openvino/contexts.h +++ b/onnxruntime/core/providers/openvino/contexts.h @@ -20,6 +20,7 @@ struct GlobalContext { std::string precision_str; std::string device_id; std::string cache_dir; + int num_streams; std::vector deviceAvailableList = {true, true, true, true, true, true, true, true}; std::vector deviceTags = {"0", "1", "2", "3", "4", "5", "6", "7"}; std::string onnx_model_name; @@ -40,7 +41,7 @@ struct SubGraphContext { std::vector input_indexes; std::unordered_map input_names; std::unordered_map output_names; - OVPrecision precision; + std::string precision; }; } // namespace openvino_ep diff --git a/onnxruntime/core/providers/openvino/openvino_execution_provider.cc b/onnxruntime/core/providers/openvino/openvino_execution_provider.cc index 6a4b039683..9908099262 100644 --- a/onnxruntime/core/providers/openvino/openvino_execution_provider.cc +++ b/onnxruntime/core/providers/openvino/openvino_execution_provider.cc @@ -19,6 +19,7 @@ OpenVINOExecutionProvider::OpenVINOExecutionProvider(const OpenVINOExecutionProv openvino_ep::BackendManager::GetGlobalContext().precision_str = info.precision_; openvino_ep::BackendManager::GetGlobalContext().enable_vpu_fast_compile = info.enable_vpu_fast_compile_; openvino_ep::BackendManager::GetGlobalContext().cache_dir = info.cache_dir_; + openvino_ep::BackendManager::GetGlobalContext().num_streams = info.num_streams_; openvino_ep::BackendManager::GetGlobalContext().context = info.context_; openvino_ep::BackendManager::GetGlobalContext().enable_opencl_throttling = info.enable_opencl_throttling_; openvino_ep::BackendManager::GetGlobalContext().enable_dynamic_shapes = info.enable_dynamic_shapes_; @@ -130,6 +131,10 @@ OpenVINOExecutionProvider::GetCapability(const GraphViewer& graph_viewer, openvino_ep::GetCapability obj(graph_viewer, openvino_ep::BackendManager::GetGlobalContext().device_type, "V_2023_0"); result = obj.Execute(); +#elif defined(OPENVINO_2023_1) + openvino_ep::GetCapability obj(graph_viewer, + openvino_ep::BackendManager::GetGlobalContext().device_type, "V_2023_1"); + result = obj.Execute(); #endif return result; diff --git a/onnxruntime/core/providers/openvino/openvino_execution_provider.h b/onnxruntime/core/providers/openvino/openvino_execution_provider.h index 5eae9c78c9..a4fc09362f 100644 --- a/onnxruntime/core/providers/openvino/openvino_execution_provider.h +++ b/onnxruntime/core/providers/openvino/openvino_execution_provider.h @@ -57,15 +57,16 @@ struct OpenVINOExecutionProviderInfo { std::string device_id_; size_t num_of_threads_; std::string cache_dir_; + int num_streams_; void* context_; bool enable_opencl_throttling_; bool enable_dynamic_shapes_; explicit OpenVINOExecutionProviderInfo(std::string dev_type, bool enable_vpu_fast_compile, std::string dev_id, - size_t num_of_threads, std::string cache_dir, + size_t num_of_threads, std::string cache_dir, int num_streams, void* context, bool enable_opencl_throttling, bool enable_dynamic_shapes) - : enable_vpu_fast_compile_(enable_vpu_fast_compile), device_id_(dev_id), num_of_threads_(num_of_threads), cache_dir_(cache_dir), context_(context), enable_opencl_throttling_(enable_opencl_throttling), enable_dynamic_shapes_(enable_dynamic_shapes) { + : enable_vpu_fast_compile_(enable_vpu_fast_compile), device_id_(dev_id), num_of_threads_(num_of_threads), cache_dir_(cache_dir), num_streams_(num_streams), context_(context), enable_opencl_throttling_(enable_opencl_throttling), enable_dynamic_shapes_(enable_dynamic_shapes) { if (dev_type == "") { LOGS_DEFAULT(INFO) << "[OpenVINO-EP]" << "No runtime device selection option provided."; @@ -149,7 +150,7 @@ struct OpenVINOExecutionProviderInfo { << "Choosing Device: " << device_type_ << " , Precision: " << precision_; } OpenVINOExecutionProviderInfo() { - OpenVINOExecutionProviderInfo("", false, "", 0, "", NULL, false, false); + OpenVINOExecutionProviderInfo("", false, "", 0, "", 1, NULL, false, false); } }; diff --git a/onnxruntime/core/providers/openvino/openvino_provider_factory.cc b/onnxruntime/core/providers/openvino/openvino_provider_factory.cc index d118b37f8a..463f985d8c 100644 --- a/onnxruntime/core/providers/openvino/openvino_provider_factory.cc +++ b/onnxruntime/core/providers/openvino/openvino_provider_factory.cc @@ -3,16 +3,16 @@ #include "core/providers/shared_library/provider_api.h" #include "core/providers/openvino/openvino_provider_factory.h" -#include "openvino_execution_provider.h" -#include "openvino_provider_factory_creator.h" +#include "core/providers/openvino/openvino_execution_provider.h" +#include "core/providers/openvino/openvino_provider_factory_creator.h" namespace onnxruntime { struct OpenVINOProviderFactory : IExecutionProviderFactory { OpenVINOProviderFactory(const char* device_type, bool enable_vpu_fast_compile, const char* device_id, size_t num_of_threads, - const char* cache_dir, void* context, + const char* cache_dir, int num_streams, void* context, bool enable_opencl_throttling, bool enable_dynamic_shapes) - : enable_vpu_fast_compile_(enable_vpu_fast_compile), num_of_threads_(num_of_threads), context_(context), enable_opencl_throttling_(enable_opencl_throttling), enable_dynamic_shapes_(enable_dynamic_shapes) { + : enable_vpu_fast_compile_(enable_vpu_fast_compile), num_of_threads_(num_of_threads), num_streams_(num_streams), context_(context), enable_opencl_throttling_(enable_opencl_throttling), enable_dynamic_shapes_(enable_dynamic_shapes) { device_type_ = (device_type == nullptr) ? "" : device_type; device_id_ = (device_id == nullptr) ? "" : device_id; cache_dir_ = (cache_dir == nullptr) ? "" : cache_dir; @@ -28,6 +28,7 @@ struct OpenVINOProviderFactory : IExecutionProviderFactory { std::string device_id_; size_t num_of_threads_; std::string cache_dir_; + int num_streams_; void* context_; bool enable_opencl_throttling_; bool enable_dynamic_shapes_; @@ -35,20 +36,11 @@ struct OpenVINOProviderFactory : IExecutionProviderFactory { std::unique_ptr OpenVINOProviderFactory::CreateProvider() { OpenVINOExecutionProviderInfo info(device_type_, enable_vpu_fast_compile_, device_id_, num_of_threads_, - cache_dir_, context_, enable_opencl_throttling_, + cache_dir_, num_streams_, context_, enable_opencl_throttling_, enable_dynamic_shapes_); return std::make_unique(info); } -std::shared_ptr CreateExecutionProviderFactory_OpenVINO( - const char* device_type, bool enable_vpu_fast_compile, const char* device_id, size_t num_of_threads, - const char* cache_dir, void* context, bool enable_opencl_throttling, - bool enable_dynamic_shapes) { - return std::make_shared(device_type, enable_vpu_fast_compile, - device_id, num_of_threads, cache_dir, context, enable_opencl_throttling, - enable_dynamic_shapes); -} - } // namespace onnxruntime namespace onnxruntime { @@ -63,12 +55,81 @@ struct OpenVINO_Provider : Provider { void* GetInfo() override { return &g_info; } std::shared_ptr CreateExecutionProviderFactory(const void* void_params) override { - auto& params = *reinterpret_cast(void_params); - return std::make_shared(params.device_type, params.enable_vpu_fast_compile, - params.device_id, params.num_of_threads, - params.cache_dir, - params.context, params.enable_opencl_throttling, - params.enable_dynamic_shapes); + auto& provider_options_map = *reinterpret_cast(void_params); + + const char* device_type = ""; // [device_type]: Overrides the accelerator hardware type and precision + // with these values at runtime. + bool enable_vpu_fast_compile = false; // [enable_vpu_fast_compile]: Fast-compile may be optionally enabled to + // speeds up the model's compilation to VPU device specific format. + const char* device_id = ""; // [device_id]: Selects a particular hardware device for inference. + size_t num_of_threads = 8; // [num_of_threads]: Overrides the accelerator default value of number of + // threads with this value at runtime. + const char* cache_dir = ""; // [cache_dir]: specify the path to + // dump and load the blobs for the model caching/kernel caching (GPU) + // feature. If blob files are already present, it will be directly loaded. + int num_streams = 1; // [num_streams]: Option that specifies the number of parallel inference + // requests to be processed on a given `device_type`. Overrides the + // accelerator default value of number of streams with this value at runtime. + bool enable_opencl_throttling = false; // [enable_opencl_throttling]: Enables OpenCL queue throttling for GPU + // device (Reduces CPU Utilization when using GPU) + bool enable_dynamic_shapes = false; // [enable_dynamic_shapes]: Enables Dynamic Shapes feature for CPU device) + void* context = nullptr; + + if (provider_options_map.find("device_type") != provider_options_map.end()) { + device_type = provider_options_map.at("device_type").c_str(); + } + if (provider_options_map.find("device_id") != provider_options_map.end()) { + device_id = provider_options_map.at("device_id").c_str(); + } + if (provider_options_map.find("cache_dir") != provider_options_map.end()) { + cache_dir = provider_options_map.at("cache_dir").c_str(); + } + if (provider_options_map.find("context") != provider_options_map.end()) { + context = (void*)provider_options_map.at("context").c_str(); + } + + if (provider_options_map.find("num_of_threads") != provider_options_map.end()) { + num_of_threads = std::stoi(provider_options_map.at("num_of_threads")); + } + + if (provider_options_map.find("num_streams") != provider_options_map.end()) { + num_streams = std::stoi(provider_options_map.at("num_streams")); + } + std::string bool_flag = ""; + if (provider_options_map.find("enable_vpu_fast_compile") != provider_options_map.end()) { + bool_flag = provider_options_map.at("enable_vpu_fast_compile"); + if (bool_flag == "true" || bool_flag == "True") + enable_vpu_fast_compile = true; + else if (bool_flag == "false" || bool_flag == "False") + enable_vpu_fast_compile = false; + bool_flag = ""; + } + + if (provider_options_map.find("enable_opencl_throttling") != provider_options_map.end()) { + bool_flag = provider_options_map.at("enable_opencl_throttling"); + if (bool_flag == "true" || bool_flag == "True") + enable_opencl_throttling = true; + else if (bool_flag == "false" || bool_flag == "False") + enable_opencl_throttling = false; + bool_flag = ""; + } + + if (provider_options_map.find("enable_dynamic_shapes") != provider_options_map.end()) { + bool_flag = provider_options_map.at("enable_dynamic_shapes"); + if (bool_flag == "true" || bool_flag == "True") + enable_dynamic_shapes = true; + else if (bool_flag == "false" || bool_flag == "False") + enable_dynamic_shapes = false; + } + return std::make_shared(device_type, + enable_vpu_fast_compile, + device_id, + num_of_threads, + cache_dir, + num_streams, + context, + enable_opencl_throttling, + enable_dynamic_shapes); } void Initialize() override { diff --git a/onnxruntime/core/providers/openvino/openvino_provider_factory_creator.h b/onnxruntime/core/providers/openvino/openvino_provider_factory_creator.h index 5781d3a3ab..4df653b022 100644 --- a/onnxruntime/core/providers/openvino/openvino_provider_factory_creator.h +++ b/onnxruntime/core/providers/openvino/openvino_provider_factory_creator.h @@ -6,12 +6,14 @@ #include #include "core/providers/providers.h" +#include "core/framework/provider_options.h" struct OrtOpenVINOProviderOptions; namespace onnxruntime { // defined in provider_bridge_ort.cc struct OpenVINOProviderFactoryCreator { + static std::shared_ptr Create(const ProviderOptions* provider_options_map); static std::shared_ptr Create(const OrtOpenVINOProviderOptions* provider_options); }; } // namespace onnxruntime diff --git a/onnxruntime/core/providers/openvino/ov_interface.cc b/onnxruntime/core/providers/openvino/ov_interface.cc index 9175f51b12..3914488fc5 100644 --- a/onnxruntime/core/providers/openvino/ov_interface.cc +++ b/onnxruntime/core/providers/openvino/ov_interface.cc @@ -42,7 +42,7 @@ OVExeNetwork OVCore::LoadNetwork(std::shared_ptr& ie_cnn_network, std } } -#if defined(OPENVINO_2023_0) +#if defined(OPENVINO_2023_0) || (OPENVINO_2023_1) OVExeNetwork OVCore::LoadNetwork(const std::string& model, std::string& hw_target, ov::AnyMap& device_config, std::string name) { ov::CompiledModel obj; try { @@ -75,8 +75,12 @@ OVExeNetwork OVCore::LoadNetwork(std::shared_ptr& model, OVRemoteCont #endif std::vector OVCore::GetAvailableDevices() { - auto obj = oe.get_available_devices(); - return obj; + auto available_devices = oe.get_available_devices(); + return available_devices; +} + +void OVCore::SetStreams(const std::string& device_type, int num_streams) { + oe.set_property(device_type, {ov::num_streams(num_streams)}); } OVInferRequest OVExeNetwork::CreateInferRequest() { diff --git a/onnxruntime/core/providers/openvino/ov_interface.h b/onnxruntime/core/providers/openvino/ov_interface.h index 84268ab6dc..ed9583033a 100644 --- a/onnxruntime/core/providers/openvino/ov_interface.h +++ b/onnxruntime/core/providers/openvino/ov_interface.h @@ -5,11 +5,12 @@ #include -#include -#if defined(OPENVINO_2022_1) || (OPENVINO_2022_2) || (OPENVINO_2022_3) || (OPENVINO_2023_0) +#if defined(OPENVINO_2022_1) || (OPENVINO_2022_2) || (OPENVINO_2022_3) || (OPENVINO_2023_0) || (OPENVINO_2023_1) #define OV_API_20 #include "openvino/openvino.hpp" #include "openvino/pass/convert_fp32_to_fp16.hpp" +#else +#include #endif #ifdef IO_BUFFER_ENABLED @@ -26,10 +27,8 @@ class OVCore; class OVInferRequest; class OVExeNetwork; -typedef InferenceEngine::Precision OVPrecision; typedef ov::Tensor OVTensor; typedef ov::ProfilingInfo OVProfilingInfo; -typedef ov::AnyMap OVConfig; typedef ov::Model OVNetwork; typedef std::shared_ptr OVInferRequestPtr; typedef std::shared_ptr OVTensorPtr; @@ -45,7 +44,7 @@ class OVCore { public: std::shared_ptr ReadModel(const std::string& model_stream) const; OVExeNetwork LoadNetwork(std::shared_ptr& ie_cnn_network, std::string& hw_target, ov::AnyMap& device_config, std::string name); -#if defined(OPENVINO_2023_0) +#if defined(OPENVINO_2023_0) || (OPENVINO_2023_1) OVExeNetwork LoadNetwork(const std::string& model_stream, std::string& hw_target, ov::AnyMap& device_config, std::string name); #endif void SetCache(std::string cache_dir_path); @@ -56,6 +55,7 @@ class OVCore { ov::Core& Get() { return oe; } + void SetStreams(const std::string& device_type, int num_streams); }; class OVExeNetwork { diff --git a/onnxruntime/core/providers/openvino/ov_versions/capability.cc b/onnxruntime/core/providers/openvino/ov_versions/capability.cc index 251f475525..865e74aa1f 100644 --- a/onnxruntime/core/providers/openvino/ov_versions/capability.cc +++ b/onnxruntime/core/providers/openvino/ov_versions/capability.cc @@ -33,8 +33,10 @@ GetCapability::GetCapability(const GraphViewer& graph_viewer_param, std::string data_ops_ = new DataOps(graph_viewer_, V_2022_3, device_type_); } else if (version_param == "V_2023_0") { data_ops_ = new DataOps(graph_viewer_, V_2023_0, device_type_); + } else if (version_param == "V_2023_1") { + data_ops_ = new DataOps(graph_viewer_, V_2023_1, device_type_); } else { - data_ops_ = new DataOps(graph_viewer_, V_2023_0, device_type_); + data_ops_ = new DataOps(graph_viewer_, V_2023_1, device_type_); } } @@ -46,6 +48,11 @@ std::vector> GetCapability::Execute() { return result; } + // Check if it is a subgraph + if (graph_viewer_.IsSubgraph() && graph_viewer_.Name() == "tf2onnx") { + return result; + } + // This is a list of initializers that nGraph considers as constants. Example weights, reshape shape etc. std::unordered_set ng_required_initializers; diff --git a/onnxruntime/core/providers/openvino/ov_versions/data_ops.cc b/onnxruntime/core/providers/openvino/ov_versions/data_ops.cc index 44bf96788e..70118c94f9 100644 --- a/onnxruntime/core/providers/openvino/ov_versions/data_ops.cc +++ b/onnxruntime/core/providers/openvino/ov_versions/data_ops.cc @@ -17,8 +17,8 @@ #pragma GCC diagnostic push #pragma GCC diagnostic ignored "-Wunused-parameter" #endif -#include -#include +// #include +// #include #if defined(_MSC_VER) #pragma warning(default : 4244 4245) #elif __GNUC__ @@ -36,6 +36,7 @@ std::set ops_supported_only_in_model = { "ConstantOfShape", "DequantizeLinear", "Dropout", + "Einsum", "Exp", "Expand", "EyeLike", @@ -127,6 +128,7 @@ std::vector supported_op_mode = { {"Dropout", V_2023_0, {"VPUX"}}, {"Elu", V_2020_4, {"CPU", "GPU"}}, {"Elu", V_2023_0, {"VPUX"}}, + // {"Einsum", V_2023_0, {"CPU", "GPU"}}, {"Equal", V_2020_4, {"CPU", "GPU"}}, {"Equal", V_2023_0, {"VPUX"}}, // Added for whisper decoder model. {"Erf", V_2020_4, {"CPU", "GPU"}}, @@ -155,6 +157,7 @@ std::vector supported_op_mode = { {"GreaterOrEqual", V_2022_1, {"CPU", "GPU"}}, {"GreaterOrEqual", V_2023_0, {"VPUX"}}, {"GridSample", V_2022_3, {"CPU"}}, + {"GridSample", V_2023_0, {"GPU"}}, {"Identity", V_2020_4, {"CPU", "GPU"}}, {"Identity", V_2023_0, {"VPUX"}}, // NoOP {"If", V_2022_3, {"CPU", "GPU"}}, @@ -196,6 +199,7 @@ std::vector supported_op_mode = { {"Neg", V_2023_0, {"VPUX"}}, {"NonMaxSuppression", V_2021_1, {"CPU", "GPU"}}, {"NonZero", V_2021_1, {"CPU"}}, + {"NonZero", V_2023_0, {"GPU"}}, {"Not", V_2021_1, {"CPU", "GPU"}}, {"Not", V_2020_4, {"CPU", "GPU"}}, {"OneHot", V_2020_4, {"CPU", "GPU"}}, @@ -210,6 +214,7 @@ std::vector supported_op_mode = { {"QuantizeLinear", V_2021_4, {"CPU", "GPU"}}, {"QuantizeLinear", V_2023_0, {"VPUX"}}, {"RandomNormalLike", V_2023_0, {"CPU", "GPU"}}, + {"RandomNormal", V_2023_0, {"CPU", "GPU"}}, {"Range", V_2022_1, {"CPU", "GPU"}}, {"Range", V_2023_0, {"VPUX"}}, {"Reciprocal", V_2020_4, {"CPU", "GPU"}}, @@ -341,6 +346,7 @@ void DataOps::populate_op_mode_supported() { no_dimension_supported_.push_back({"Div", V_2020_4, {"All"}}); no_dimension_supported_.push_back({"DequantizeLinear", V_2021_4, {"All"}}); no_dimension_supported_.push_back({"Equal", V_2022_1, {"CPU"}}); + no_dimension_supported_.push_back({"Equal", V_2023_0, {"GPU"}}); no_dimension_supported_.push_back({"Floor", V_2020_4, {"All"}}); no_dimension_supported_.push_back({"Gather", V_2020_4, {"All"}}); no_dimension_supported_.push_back({"Greater", V_2023_0, {"VPUX"}}); @@ -356,6 +362,7 @@ void DataOps::populate_op_mode_supported() { no_dimension_supported_.push_back({"ReduceProd", V_2022_1, {"CPU", "GPU"}}); no_dimension_supported_.push_back({"Reshape", V_2022_1, {"All"}}); no_dimension_supported_.push_back({"Shape", V_2022_1, {"GPU"}}); + no_dimension_supported_.push_back({"Shape", V_2023_0, {"CPU"}}); no_dimension_supported_.push_back({"Squeeze", V_2020_4, {"All"}}); no_dimension_supported_.push_back({"Sub", V_2020_4, {"All"}}); no_dimension_supported_.push_back({"Unsqueeze", V_2020_4, {"All"}}); @@ -1022,8 +1029,10 @@ bool DataOps::node_is_supported(const std::mapdim()) { if (utils::HasDimValue(dim) && dim.dim_value() == 0) { - if ((device_id_.find("GPU") != std::string::npos) && ((optype == "Expand") || - (optype == "Slice") || (optype == "Concat") || (optype == "Shape"))) { + if (((device_id_.find("CPU") != std::string::npos) || (device_id_.find("GPU") != std::string::npos)) && + ((optype == "Expand") || (optype == "Equal") || + (optype == "Slice") || (optype == "Concat") || + (optype == "Shape"))) { return; } has_unsupported_dimension = true; diff --git a/onnxruntime/core/providers/openvino/ov_versions/data_ops.h b/onnxruntime/core/providers/openvino/ov_versions/data_ops.h index b26d1653be..cc968d02ea 100644 --- a/onnxruntime/core/providers/openvino/ov_versions/data_ops.h +++ b/onnxruntime/core/providers/openvino/ov_versions/data_ops.h @@ -18,7 +18,8 @@ enum versionNum { V_2022_1, V_2022_2, V_2022_3, - V_2023_0 + V_2023_0, + V_2023_1, }; using VersionNum = enum versionNum; diff --git a/onnxruntime/core/providers/openvino/ov_versions/utils.cc b/onnxruntime/core/providers/openvino/ov_versions/utils.cc index 53b2c3b460..be509b6743 100644 --- a/onnxruntime/core/providers/openvino/ov_versions/utils.cc +++ b/onnxruntime/core/providers/openvino/ov_versions/utils.cc @@ -9,8 +9,15 @@ #pragma GCC diagnostic push #pragma GCC diagnostic ignored "-Wunused-parameter" #endif -#include + +#include "openvino/core/deprecated.hpp" +#define IN_OV_COMPONENT +#define NGRAPH_LEGACY_HEADER_INCLUDED #include + +#undef NGRAPH_LEGACY_HEADER_INCLUDED +#undef IN_OV_COMPONENT + #if defined(_MSC_VER) #pragma warning(default : 4244 4245) #elif __GNUC__ @@ -40,6 +47,7 @@ bool IsOpSupportedOnlyInModel(std::string name) { "Concat", "ConstantOfShape", "Dropout", + "Einsum", "Expand", "EyeLike", "Exp", @@ -88,6 +96,7 @@ int GetOnnxOpSet(const GraphViewer& graph_viewer) { std::map> GetNgSupportedOps(const int onnx_opset) { std::map> ng_supported_ops; + OPENVINO_SUPPRESS_DEPRECATED_START ng_supported_ops.emplace(kOnnxDomain, ngraph::onnx_import::get_supported_operators(onnx_opset, kOnnxDomain)); const std::set ng_disabled_ops = {"LSTM"}; // Place-holder for ops not supported. @@ -95,7 +104,7 @@ std::map> GetNgSupportedOps(const int onnx_op for (const auto& disabled_op : ng_disabled_ops) { ng_supported_ops.at(kOnnxDomain).erase(disabled_op); } - + OPENVINO_SUPPRESS_DEPRECATED_END return ng_supported_ops; } diff --git a/onnxruntime/core/session/provider_bridge_ort.cc b/onnxruntime/core/session/provider_bridge_ort.cc index 255c7e36b3..8f0a5aeaa3 100644 --- a/onnxruntime/core/session/provider_bridge_ort.cc +++ b/onnxruntime/core/session/provider_bridge_ort.cc @@ -1426,8 +1426,44 @@ std::shared_ptr MIGraphXProviderFactoryCreator::Creat return s_library_migraphx.Get().CreateExecutionProviderFactory(provider_options); } +// Adapter to convert the legacy OrtOpenVINOProviderOptions to ProviderOptions +ProviderOptions OrtOpenVINOProviderOptionsToOrtOpenVINOProviderOptionsV2(const OrtOpenVINOProviderOptions* legacy_ov_options) { + ProviderOptions ov_options_converted_map; + if (legacy_ov_options->device_type != nullptr) + ov_options_converted_map["device_type"] = legacy_ov_options->device_type; + + ov_options_converted_map["enable_vpu_fast_compile"] = legacy_ov_options->enable_vpu_fast_compile; + + if (legacy_ov_options->device_id != nullptr) + ov_options_converted_map["device_id"] = legacy_ov_options->device_id; + + ov_options_converted_map["num_of_threads"] = std::to_string(legacy_ov_options->num_of_threads); + + if (legacy_ov_options->cache_dir != nullptr) + ov_options_converted_map["cache_dir"] = legacy_ov_options->cache_dir; + + std::stringstream context_string; + + if (legacy_ov_options->context != nullptr) + context_string << legacy_ov_options->context; + ov_options_converted_map["context"] = context_string.str(); + + ov_options_converted_map["enable_opencl_throttling"] = legacy_ov_options->enable_opencl_throttling; + ov_options_converted_map["enable_dynamic_shapes"] = legacy_ov_options->enable_dynamic_shapes; + + // Add new provider option below + ov_options_converted_map["num_streams"] = "1"; + return ov_options_converted_map; +} + std::shared_ptr OpenVINOProviderFactoryCreator::Create(const OrtOpenVINOProviderOptions* provider_options) { - return s_library_openvino.Get().CreateExecutionProviderFactory(provider_options); + ProviderOptions ov_options_converted_map = onnxruntime::OrtOpenVINOProviderOptionsToOrtOpenVINOProviderOptionsV2(provider_options); + return s_library_openvino.Get().CreateExecutionProviderFactory(&ov_options_converted_map); +} + +std::shared_ptr OpenVINOProviderFactoryCreator::Create(const ProviderOptions* provider_options_map) { + // std::cout << provider_options_map.at("num_streams") << std::endl; + return s_library_openvino.Get().CreateExecutionProviderFactory(provider_options_map); } std::shared_ptr DnnlProviderFactoryCreator::Create(const OrtDnnlProviderOptions* dnnl_options) { diff --git a/onnxruntime/core/session/provider_registration.cc b/onnxruntime/core/session/provider_registration.cc index 4cea84a590..9326c6eaff 100644 --- a/onnxruntime/core/session/provider_registration.cc +++ b/onnxruntime/core/session/provider_registration.cc @@ -10,6 +10,7 @@ #include "core/session/abi_session_options_impl.h" #include "core/session/onnxruntime_c_api.h" #include "core/session/ort_apis.h" +#include "core/providers/openvino/openvino_provider_factory_creator.h" using namespace onnxruntime; @@ -71,6 +72,12 @@ ORT_API_STATUS_IMPL(OrtApis::SessionOptionsAppendExecutionProvider, options->provider_factories.push_back(QNNProviderFactoryCreator::Create(provider_options, &(options->value))); #else status = create_not_supported_status(); +#endif + } else if (strcmp(provider_name, "OpenVINO") == 0) { +#if defined(USE_OPENVINO) + options->provider_factories.push_back(OpenVINOProviderFactoryCreator::Create(&provider_options)); +#else + status = create_not_supported_status(); #endif } else if (strcmp(provider_name, "SNPE") == 0) { #if defined(USE_SNPE) @@ -115,7 +122,7 @@ ORT_API_STATUS_IMPL(OrtApis::SessionOptionsAppendExecutionProvider, } else { ORT_UNUSED_PARAMETER(options); status = OrtApis::CreateStatus(ORT_INVALID_ARGUMENT, - "Unknown provider name. Currently supported values are 'SNPE', 'XNNPACK', and 'AZURE'"); + "Unknown provider name. Currently supported values are 'OPENVINO', 'SNPE', 'XNNPACK', 'QNN', 'WEBNN' and 'AZURE'"); } return status; diff --git a/onnxruntime/python/onnxruntime_pybind_schema.cc b/onnxruntime/python/onnxruntime_pybind_schema.cc index 61d4feb182..a8c217b0ff 100644 --- a/onnxruntime/python/onnxruntime_pybind_schema.cc +++ b/onnxruntime/python/onnxruntime_pybind_schema.cc @@ -39,8 +39,8 @@ void addGlobalSchemaFunctions(pybind11::module& m) { #endif #ifdef USE_OPENVINO []() { - OrtOpenVINOProviderOptions provider_options; - return onnxruntime::OpenVINOProviderFactoryCreator::Create(&provider_options); + ProviderOptions provider_options_map; + return onnxruntime::OpenVINOProviderFactoryCreator::Create(&provider_options_map); }(), #endif #ifdef USE_TENSORRT diff --git a/onnxruntime/python/onnxruntime_pybind_state.cc b/onnxruntime/python/onnxruntime_pybind_state.cc index 826c996c22..5ac20739c4 100644 --- a/onnxruntime/python/onnxruntime_pybind_state.cc +++ b/onnxruntime/python/onnxruntime_pybind_state.cc @@ -780,56 +780,53 @@ std::unique_ptr CreateExecutionProviderInstance( #endif } else if (type == kOpenVINOExecutionProvider) { #ifdef USE_OPENVINO - OrtOpenVINOProviderOptions params; - params.device_type = openvino_device_type.c_str(); - std::string cache_dir; - + ProviderOptions OV_provider_options_map; auto it = provider_options_map.find(type); if (it != provider_options_map.end()) { for (auto option : it->second) { if (option.first == "device_type") { - openvino_device_type = option.second; - params.device_type = openvino_device_type.c_str(); + OV_provider_options_map[option.first] = option.second; + continue; } else if (option.first == "enable_vpu_fast_compile") { - if (option.second == "True") { - params.enable_vpu_fast_compile = true; - } else if (option.second == "False") { - params.enable_vpu_fast_compile = false; - } else { + if (!(option.second == "True" || option.second == "true" || + option.second == "False" || option.second == "false")) { ORT_THROW("Invalid value passed for enable_vpu_fast_compile: ", option.second); } - + OV_provider_options_map[option.first] = option.second; } else if (option.first == "enable_opencl_throttling") { - if (option.second == "True") { - params.enable_opencl_throttling = true; - } else if (option.second == "False") { - params.enable_opencl_throttling = false; - } else { + if (!(option.second == "True" || option.second == "true" || + option.second == "False" || option.second == "false")) { ORT_THROW("Invalid value passed for enable_opencl_throttling: ", option.second); } + OV_provider_options_map[option.first] = option.second; } else if (option.first == "enable_dynamic_shapes") { - if (option.second == "True") { - params.enable_dynamic_shapes = true; - } else if (option.second == "False") { - params.enable_dynamic_shapes = false; - } else { + if (!(option.second == "True" || option.second == "true" || + option.second == "False" || option.second == "false")) { ORT_THROW("Invalid value passed for enable_dynamic_shapes: ", option.second); } + OV_provider_options_map[option.first] = option.second; } else if (option.first == "device_id") { - params.device_id = option.second.c_str(); + OV_provider_options_map[option.first] = option.second; + continue; } else if (option.first == "num_of_threads") { - params.num_of_threads = std::stoi(option.second); + OV_provider_options_map[option.first] = option.second; + continue; + } else if (option.first == "num_streams") { + OV_provider_options_map[option.first] = option.second; + continue; } else if (option.first == "cache_dir") { - cache_dir = option.second; - params.cache_dir = cache_dir.c_str(); + OV_provider_options_map[option.first] = option.second; + continue; } else if (option.first == "context") { - params.context = (void*)(option.second.c_str()); + OV_provider_options_map[option.first] = option.second; + continue; } else { ORT_THROW("Invalid OpenVINO EP option: ", option.first); } } } - if (std::shared_ptr openvino_provider_factory = onnxruntime::OpenVINOProviderFactoryCreator::Create(¶ms)) { + if (std::shared_ptr openvino_provider_factory = onnxruntime::OpenVINOProviderFactoryCreator::Create( + &OV_provider_options_map)) { auto p = openvino_provider_factory->CreateProvider(); // Reset global variables config to avoid it being accidentally passed on to the next session openvino_device_type.clear(); diff --git a/onnxruntime/python/onnxruntime_pybind_state_common.h b/onnxruntime/python/onnxruntime_pybind_state_common.h index 1260e9ea71..18a9079b5c 100644 --- a/onnxruntime/python/onnxruntime_pybind_state_common.h +++ b/onnxruntime/python/onnxruntime_pybind_state_common.h @@ -440,7 +440,6 @@ std::shared_ptr CreateExecutionProviderFactory_MIGrap std::shared_ptr CreateExecutionProviderFactory_MIGraphX(int device_id); std::shared_ptr CreateExecutionProviderFactory_Cuda(const OrtCUDAProviderOptions* params); std::shared_ptr CreateExecutionProviderFactory_Dnnl(const OrtDnnlProviderOptions* params); -std::shared_ptr CreateExecutionProviderFactory_OpenVINO(const OrtOpenVINOProviderOptions* params); #ifdef USE_TVM std::shared_ptr CreateExecutionProviderFactory_Tvm(const tvm::TvmEPOptions& info); std::shared_ptr CreateExecutionProviderFactory_Tvm(const char* params); diff --git a/onnxruntime/test/perftest/ort_test_session.cc b/onnxruntime/test/perftest/ort_test_session.cc index d283d9df62..454ef5dfb2 100644 --- a/onnxruntime/test/perftest/ort_test_session.cc +++ b/onnxruntime/test/perftest/ort_test_session.cc @@ -423,24 +423,12 @@ OnnxRuntimeTestSession::OnnxRuntimeTestSession(Ort::Env& env, std::random_device #endif } else if (provider_name == onnxruntime::kOpenVINOExecutionProvider) { #ifdef USE_OPENVINO - std::string device_type = ""; // [device_type]: Overrides the accelerator hardware type and precision - // with these values at runtime. - bool enable_vpu_fast_compile = false; // [enable_vpu_fast_compile]: Fast-compile may be optionally enabled to - // speeds up the model's compilation to VPU device specific format. - std::string device_id = ""; // [device_id]: Selects a particular hardware device for inference. - size_t num_of_threads = 8; // [num_of_threads]: Overrides the accelerator default value of number of - // threads with this value at runtime. - std::string cache_dir = ""; // [cache_dir]: specify the path to - // dump and load the blobs for the model caching/kernel caching (GPU) - // feature. If blob files are already present, it will be directly loaded. - bool enable_opencl_throttling = false; // [enable_opencl_throttling]: Enables OpenCL queue throttling for GPU - // device (Reduces CPU Utilization when using GPU) - bool enable_dynamic_shapes = false; // [enable_dynamic_shapes]: Enables Dynamic Shapes feature for CPU device) #ifdef _MSC_VER std::string ov_string = ToUTF8String(performance_test_config.run_config.ep_runtime_config_string); #else std::string ov_string = performance_test_config.run_config.ep_runtime_config_string; #endif + std::unordered_map ov_options; std::istringstream ss(ov_string); std::string token; while (ss >> token) { @@ -461,69 +449,64 @@ OnnxRuntimeTestSession::OnnxRuntimeTestSession(Ort::Env& env, std::random_device "GPU.0_FP16", "GPU.1_FP16", "VPUX_FP16", "VPUX_U8"}; if (ov_supported_device_types.find(value) != ov_supported_device_types.end()) { - device_type = value; + ov_options[key] = value; } else if (value.find("HETERO:") == 0) { - device_type = value; + ov_options[key] = value; } else if (value.find("MULTI:") == 0) { - device_type = value; + ov_options[key] = value; } else if (value.find("AUTO:") == 0) { - device_type = value; + ov_options[key] = value; } else { ORT_THROW( "[ERROR] [OpenVINO] You have selcted wrong configuration value for the key 'device_type'. " "Select from 'CPU_FP32', 'CPU_FP16', 'GPU_FP32', 'GPU.0_FP32', 'GPU.1_FP32', 'GPU_FP16', " - "'GPU.0_FP16', 'GPU.1_FP16', 'VPUX_FP16', 'VPUX_U8', or from" + "'GPU.0_FP16', 'GPU.1_FP16', 'VPUX_FP16', 'VPUX_U8' or from" " HETERO/MULTI/AUTO options available. \n"); } } else if (key == "device_id") { - device_id = value; + ov_options[key] = value; } else if (key == "enable_vpu_fast_compile") { - if (value == "true" || value == "True") { - enable_vpu_fast_compile = true; - } else if (value == "false" || value == "False") { - enable_vpu_fast_compile = false; + if (value == "true" || value == "True" || + value == "false" || value == "False") { + ov_options[key] = value; } else { ORT_THROW("[ERROR] [OpenVINO] The value for the key 'enable_vpu_fast_compile' should be a boolean i.e. true or false. Default value is false.\n"); } } else if (key == "enable_opencl_throttling") { - if (value == "true" || value == "True") { - enable_opencl_throttling = true; - } else if (value == "false" || value == "False") { - enable_opencl_throttling = false; + if (value == "true" || value == "True" || + value == "false" || value == "False") { + ov_options[key] = value; } else { ORT_THROW("[ERROR] [OpenVINO] The value for the key 'enable_opencl_throttling' should be a boolean i.e. true or false. Default value is false.\n"); } } else if (key == "enable_dynamic_shapes") { - if (value == "true" || value == "True") { - enable_dynamic_shapes = true; - } else if (value == "false" || value == "False") { - enable_dynamic_shapes = false; + if (value == "true" || value == "True" || + value == "false" || value == "False") { + ov_options[key] = value; } else { ORT_THROW( "[ERROR] [OpenVINO] The value for the key 'enable_dynamic_shapes' " "should be a boolean i.e. true or false. Default value is false.\n"); } } else if (key == "num_of_threads") { - std::stringstream sstream(value); - sstream >> num_of_threads; - if ((int)num_of_threads <= 0) { + if (std::stoi(value) <= 0) { ORT_THROW("[ERROR] [OpenVINO] The value for the key 'num_of_threads' should be greater than 0\n"); + } else { + ov_options[key] = value; } } else if (key == "cache_dir") { - cache_dir = value; + ov_options[key] = value; + } else if (key == "num_streams") { + if (std::stoi(value) <= 0 && std::stoi(value) > 8) { + ORT_THROW("[ERROR] [OpenVINO] The value for the key 'num_streams' should be in the range of 1-8 \n"); + } else { + ov_options[key] = value; + } } else { - ORT_THROW("[ERROR] [OpenVINO] wrong key type entered. Choose from the following runtime key options that are available for OpenVINO. ['device_type', 'device_id', 'enable_vpu_fast_compile', 'num_of_threads', 'cache_dir', 'enable_opencl_throttling|true'] \n"); + ORT_THROW("[ERROR] [OpenVINO] wrong key type entered. Choose from the following runtime key options that are available for OpenVINO. ['device_type', 'device_id', 'enable_vpu_fast_compile', 'num_of_threads', 'cache_dir', 'num_streams', 'enable_opencl_throttling|true'] \n"); } } - OrtOpenVINOProviderOptions options; - options.device_type = device_type.c_str(); // To set the device_type - options.device_id = device_id.c_str(); // To set the device_id - options.enable_vpu_fast_compile = enable_vpu_fast_compile; // To enable_vpu_fast_compile, default is false - options.num_of_threads = num_of_threads; // To set number of free InferRequests, default is 8 - options.cache_dir = cache_dir.c_str(); // sets the cache_dir, default is "" - options.enable_opencl_throttling = enable_opencl_throttling; // Enables GPU Throttling (Reduces CPU Utilization) - options.enable_dynamic_shapes = enable_dynamic_shapes; // Enables Dynamic Shapes feature - session_options.AppendExecutionProvider_OpenVINO(options); + session_options.AppendExecutionProvider("OpenVINO", ov_options); #else ORT_THROW("OpenVINO is not supported in this build\n"); #endif @@ -579,7 +562,7 @@ OnnxRuntimeTestSession::OnnxRuntimeTestSession(Ort::Env& env, std::random_device ORT_THROW("Supported htp_performance_mode: " + str); } } else { - ORT_THROW(R"(Wrong key type entered. Choose from options: ['backend_path', 'qnn_context_cache_enable', + ORT_THROW(R"(Wrong key type entered. Choose from options: ['backend_path', 'qnn_context_cache_enable', 'qnn_context_cache_path', 'profiling_level', 'rpc_control_latency', 'htp_performance_mode'])"); } diff --git a/onnxruntime/test/providers/cpu/math/einsum_test.cc b/onnxruntime/test/providers/cpu/math/einsum_test.cc index b7758fd2fd..05b936a41e 100644 --- a/onnxruntime/test/providers/cpu/math/einsum_test.cc +++ b/onnxruntime/test/providers/cpu/math/einsum_test.cc @@ -22,7 +22,7 @@ TEST(Einsum, ExplicitEinsumAsIdentity_1D_input) { test.AddAttribute("equation", "i->i"); test.AddInput("x", {5}, {0.9f, 2.5f, 2.3f, 1.5f, -4.5f}); test.AddOutput("y", {5}, {0.9f, 2.5f, 2.3f, 1.5f, -4.5f}); - test.Run(); + test.Run(OpTester::ExpectResult::kExpectSuccess, "", {kOpenVINOExecutionProvider}); } // Implicit @@ -31,7 +31,7 @@ TEST(Einsum, ImplicitEinsumAsIdentity_1D_input) { test.AddAttribute("equation", "i"); test.AddInput("x", {5}, {0.9f, 2.5f, 2.3f, 1.5f, -4.5f}); test.AddOutput("y", {5}, {0.9f, 2.5f, 2.3f, 1.5f, -4.5f}); - test.Run(); + test.Run(OpTester::ExpectResult::kExpectSuccess, "", {kOpenVINOExecutionProvider}); } // Theme: Transpose/Permutation diff --git a/onnxruntime/test/util/default_providers.cc b/onnxruntime/test/util/default_providers.cc index aea59e0667..bc85ae03d0 100644 --- a/onnxruntime/test/util/default_providers.cc +++ b/onnxruntime/test/util/default_providers.cc @@ -86,10 +86,19 @@ std::unique_ptr MIGraphXExecutionProviderWithOptions(const O return nullptr; } +std::unique_ptr OpenVINOExecutionProviderWithOptions(const OrtOpenVINOProviderOptions* params) { +#ifdef USE_OPENVINO + return OpenVINOProviderFactoryCreator::Create(params)->CreateProvider(); +#else + ORT_UNUSED_PARAMETER(params); +#endif + return nullptr; +} + std::unique_ptr DefaultOpenVINOExecutionProvider() { #ifdef USE_OPENVINO - OrtOpenVINOProviderOptions params; - return OpenVINOProviderFactoryCreator::Create(¶ms)->CreateProvider(); + ProviderOptions provider_options_map; + return OpenVINOProviderFactoryCreator::Create(&provider_options_map)->CreateProvider(); #else return nullptr; #endif diff --git a/onnxruntime/test/util/include/default_providers.h b/onnxruntime/test/util/include/default_providers.h index d6c9339af0..1325f7aa43 100644 --- a/onnxruntime/test/util/include/default_providers.h +++ b/onnxruntime/test/util/include/default_providers.h @@ -18,9 +18,6 @@ std::shared_ptr CreateExecutionProviderFactory_MIGrap std::shared_ptr CreateExecutionProviderFactory_Nnapi( uint32_t flags, const optional& partitioning_stop_ops_list); // std::shared_ptr CreateExecutionProviderFactory_Tvm(const char*); -std::shared_ptr CreateExecutionProviderFactory_OpenVINO( - const char* device_type, bool enable_vpu_fast_compile, const char* device_id, size_t num_of_threads, const char* cache_dir); -std::shared_ptr CreateExecutionProviderFactory_OpenVINO(const OrtOpenVINOProviderOptions* params); std::shared_ptr CreateExecutionProviderFactory_Rknpu(); std::shared_ptr CreateExecutionProviderFactory_Rocm(const OrtROCMProviderOptions* provider_options); std::shared_ptr CreateExecutionProviderFactory_Tensorrt(const OrtTensorRTProviderOptions* params); @@ -45,6 +42,7 @@ std::unique_ptr TensorrtExecutionProviderWithOptions(const O std::unique_ptr TensorrtExecutionProviderWithOptions(const OrtTensorRTProviderOptionsV2* params); std::unique_ptr DefaultMIGraphXExecutionProvider(); std::unique_ptr MIGraphXExecutionProviderWithOptions(const OrtMIGraphXProviderOptions* params); +std::unique_ptr OpenVINOExecutionProviderWithOptions(const OrtOpenVINOProviderOptions* params); std::unique_ptr DefaultOpenVINOExecutionProvider(); std::unique_ptr DefaultNnapiExecutionProvider(); std::unique_ptr DefaultRknpuExecutionProvider();