From 27e778909d75d0d7ff1177f3e86ef8c19db71d5d Mon Sep 17 00:00:00 2001 From: Maajid khan Date: Thu, 8 Apr 2021 09:29:16 +0530 Subject: [PATCH] [OpenVINO-EP] Enabling save/Load blob feature (#7054) * Enabling save/Load blob feature for OpenVINO-EP Signed-off-by: MaajidKhan * Added changes to enhance save/load feature ->This feature applies only for MYRIAD device target ->cleaned up the code and added error checks Signed-off-by: MaajidKhan * Enabled the feature only for MyriadX and only for Linux Signed-off-by: MaajidKhan * Fixed compilation issues on windows Signed-off-by: MaajidKhan * Added changes to fix const subgraph issue Signed-off-by: MaajidKhan * Fixed issues on windows Signed-off-by: MaajidKhan * Added changes for the feature -> Removed default location dir dump using cmake -> Enabled saving blob dumps at the executable path by default Signed-off-by: MaajidKhan * Made save/load dump path configurable -> The save/load blob dump path is now also made configurable using a c/python Api's. -> Introduced a flag named blob_dump_path Signed-off-by: MaajidKhan * Minor fixes added Signed-off-by: MaajidKhan * Fixed python API issues Signed-off-by: MaajidKhan * Using GetEnvironmentVar to get the path Signed-off-by: MaajidKhan * Fixed python runtime option issue Signed-off-by: MaajidKhan * Fixes import network issue on windows Signed-off-by: MaajidKhan --- .../core/session/onnxruntime_c_api.h | 4 +- .../core/providers/openvino/backend_utils.cc | 64 +++++- .../core/providers/openvino/backend_utils.h | 18 ++ .../openvino/backends/basic_backend.cc | 182 +++++++++++++----- .../openvino/backends/basic_backend.h | 1 + .../core/providers/openvino/contexts.h | 5 + .../openvino/openvino_execution_provider.cc | 11 ++ .../openvino/openvino_execution_provider.h | 8 +- .../openvino/openvino_provider_factory.cc | 16 +- .../python/onnxruntime_pybind_state.cc | 15 +- .../test/perftest/command_args_parser.cc | 4 +- onnxruntime/test/perftest/ort_test_session.cc | 20 +- onnxruntime/test/util/default_providers.cc | 2 +- 13 files changed, 274 insertions(+), 76 deletions(-) diff --git a/include/onnxruntime/core/session/onnxruntime_c_api.h b/include/onnxruntime/core/session/onnxruntime_c_api.h index 4cb1b2c746..faca901ce9 100644 --- a/include/onnxruntime/core/session/onnxruntime_c_api.h +++ b/include/onnxruntime/core/session/onnxruntime_c_api.h @@ -305,12 +305,14 @@ typedef struct OrtTensorRTProviderOptions { /// typedef struct OrtOpenVINOProviderOptions { #ifdef __cplusplus - OrtOpenVINOProviderOptions() : device_type{}, enable_vpu_fast_compile{}, device_id{}, num_of_threads{} {} + OrtOpenVINOProviderOptions() : device_type{}, enable_vpu_fast_compile{}, device_id{}, num_of_threads{}, use_compiled_network{}, blob_dump_path{} {} #endif const char* device_type; // CPU_FP32, GPU_FP32, GPU_FP16, MYRIAD_FP16, VAD-M_FP16 or VAD-F_FP32 unsigned char enable_vpu_fast_compile; // 0 = false, nonzero = true const char* device_id; size_t num_of_threads; // 0 uses default number of threads + unsigned char use_compiled_network; // 0 = false, nonzero = true + const char* blob_dump_path; // path is set to empty by default } OrtOpenVINOProviderOptions; struct OrtApi; diff --git a/onnxruntime/core/providers/openvino/backend_utils.cc b/onnxruntime/core/providers/openvino/backend_utils.cc index 74ebd33f4e..ff5697a124 100644 --- a/onnxruntime/core/providers/openvino/backend_utils.cc +++ b/onnxruntime/core/providers/openvino/backend_utils.cc @@ -23,15 +23,11 @@ namespace backend_utils { #ifndef NDEBUG bool IsDebugEnabled() { -#ifdef _WIN32 - size_t env_name_len = 0; - char* env_name = nullptr; - bool res = (_dupenv_s(&env_name, &env_name_len, "ORT_OPENVINO_ENABLE_DEBUG") == 0 && env_name != nullptr); - free(env_name); - return res; -#else - return (std::getenv("ORT_OPENVINO_ENABLE_DEBUG") != nullptr); -#endif + const std::string env_name = onnxruntime::GetEnvironmentVar("ORT_OPENVINO_ENABLE_DEBUG"); + if (!env_name.empty()) { + return true; + } + return false; } void DumpOnnxModelProto(const ONNX_NAMESPACE::ModelProto& model_proto, std::string file_name) { std::fstream outfile(file_name, std::ios::out | std::ios::trunc | std::ios::binary); @@ -40,6 +36,56 @@ void DumpOnnxModelProto(const ONNX_NAMESPACE::ModelProto& model_proto, std::stri #endif +bool UseCompiledNetwork() { + const std::string env_name = onnxruntime::GetEnvironmentVar("OV_USE_COMPILED_NETWORK"); + if (!env_name.empty()) { + return true; + } + return false; +} + +std::string GetCurrentWorkingDir() { + std::string curr_dir; + ORT_UNUSED_PARAMETER(curr_dir); + char buff[FILENAME_MAX]; + curr_dir = GetCurrentDir(buff, FILENAME_MAX); + std::string current_working_dir(buff); + return current_working_dir; +} + +bool IsDirExists(const std::string& pathname) { + struct stat info; + if(stat(pathname.c_str(), &info) != 0) { + LOGS_DEFAULT(INFO) << log_tag << "cannot access pathname: " << pathname; + return false; + } else if(info.st_mode & S_IFDIR) { + LOGS_DEFAULT(INFO) << log_tag << "pathname exists: " << pathname; + return true; + } else { + LOGS_DEFAULT(INFO) << log_tag << "pathname: " << pathname << ": doesn't contain the directory 'ov_compiled_blobs' "; + } + return false; +} + +void CreateDirectory(const std::string& ov_compiled_blobs_dir) { + LOGS_DEFAULT(INFO) << log_tag << "'ov_compiled_blobs' directory doesn't exist at the executable path, so creating one"; +#if defined(_WIN32) + if (_mkdir(ov_compiled_blobs_dir.c_str()) == 0) { // Creating a directory + LOGS_DEFAULT(INFO) << log_tag << "created a directory named 'ov_compiled_blobs' at the executable path"; + } else { + LOGS_DEFAULT(INFO) << log_tag << "Error creating a directory named 'ov_compiled_blobs' at the executable path"; + throw std::runtime_error("Could not create the directory"); + } +#else + if (mkdir(ov_compiled_blobs_dir.c_str(), 0777) == 0) { // Creating a directory + LOGS_DEFAULT(INFO) << log_tag << "created a directory named 'ov_compiled_blobs' at the executable path"; + } else { + LOGS_DEFAULT(INFO) << log_tag << "Error creating a directory named 'ov_compiled_blobs' at the executable path"; + throw std::runtime_error("Could not create the directory"); + } +#endif +} + struct static_cast_int64 { template // T1 models type statically convertible to T int64_t operator()(const T1& x) const { return static_cast(x); } diff --git a/onnxruntime/core/providers/openvino/backend_utils.h b/onnxruntime/core/providers/openvino/backend_utils.h index bfc197ad52..1c3cd10059 100644 --- a/onnxruntime/core/providers/openvino/backend_utils.h +++ b/onnxruntime/core/providers/openvino/backend_utils.h @@ -10,6 +10,16 @@ #include "contexts.h" #include +#ifdef _WIN32 +#include +#define GetCurrentDir _getcwd +#else +#include +#define GetCurrentDir getcwd +#endif + +#include + namespace onnxruntime { namespace openvino_ep { namespace backend_utils { @@ -19,6 +29,14 @@ const std::string log_tag = "[OpenVINO-EP] "; bool IsDebugEnabled(); #endif +bool UseCompiledNetwork(); + +std::string GetCurrentWorkingDir(); + +bool IsDirExists(const std::string& pathname); + +void CreateDirectory(const std::string& ov_compiled_blobs_dir); + void SetIODefs(const ONNX_NAMESPACE::ModelProto& model_proto, std::shared_ptr network, std::unordered_map output_names, diff --git a/onnxruntime/core/providers/openvino/backends/basic_backend.cc b/onnxruntime/core/providers/openvino/backends/basic_backend.cc index 88b08fcb25..93f16fa1d6 100644 --- a/onnxruntime/core/providers/openvino/backends/basic_backend.cc +++ b/onnxruntime/core/providers/openvino/backends/basic_backend.cc @@ -16,6 +16,7 @@ #include #include "basic_backend.h" +#include "../backend_manager.h" namespace onnxruntime { namespace openvino_ep { @@ -26,56 +27,135 @@ BasicBackend::BasicBackend(const ONNX_NAMESPACE::ModelProto& model_proto, GlobalContext& global_context, const SubGraphContext& subgraph_context) : global_context_(global_context), subgraph_context_(subgraph_context) { - ie_cnn_network_ = CreateCNNNetwork(model_proto, global_context_, subgraph_context_, const_outputs_map_); - SetIODefs(model_proto, ie_cnn_network_, subgraph_context_.output_names, const_outputs_map_, global_context_.device_type); - - InferenceEngine::ExecutableNetwork exe_network; - -#if defined(OPENVINO_2020_4) || defined(OPENVINO_2021_1) || defined(OPENVINO_2021_2) || defined(OPENVINO_2021_3) - if (const_outputs_map_.size() == subgraph_context_.output_names.size()) - subgraph_context_.is_constant = true; -#endif - - // Loading model to the plugin - if (subgraph_context_.is_constant) - return; - std::map config; -#ifndef NDEBUG - if (openvino_ep::backend_utils::IsDebugEnabled()) { - config["PERF_COUNT"] = CONFIG_VALUE(YES); - } -#endif - if (global_context_.device_type.find("MYRIAD") != std::string::npos) { -#if defined(OPENVINO_2021_1) || defined(OPENVINO_2021_2) || defined(OPENVINO_2021_3) - if (subgraph_context_.set_vpu_config) { - config["MYRIAD_DETECT_NETWORK_BATCH"] = CONFIG_VALUE(NO); - } - - if (global_context_.enable_vpu_fast_compile) { - config["MYRIAD_HW_INJECT_STAGES"] = CONFIG_VALUE(NO); - config["MYRIAD_COPY_OPTIMIZATION"] = CONFIG_VALUE(NO); - } -#else - if (subgraph_context_.set_vpu_config) { - config["VPU_DETECT_NETWORK_BATCH"] = CONFIG_VALUE(NO); - } - - if (global_context_.enable_vpu_fast_compile) { - config["VPU_HW_INJECT_STAGES"] = CONFIG_VALUE(NO); - config["VPU_COPY_OPTIMIZATION"] = CONFIG_VALUE(NO); - } -#endif - } std::string& hw_target = (global_context_.device_id != "") ? global_context_.device_id : global_context_.device_type; - try { - exe_network = global_context_.ie_core.LoadNetwork(*ie_cnn_network_, hw_target, config); - } catch (const InferenceEngine::details::InferenceEngineException& e) { - ORT_THROW(log_tag + " Exception while Loading Network for graph: " + subgraph_context_.subgraph_name + ": " + e.what()); - } catch (...) { - ORT_THROW(log_tag + " Exception while Loading Network for graph " + subgraph_context_.subgraph_name); - } - LOGS_DEFAULT(INFO) << log_tag << "Loaded model to the plugin"; + bool vpu_status = false; + bool import_blob_status = false; + std::string model_blob_name; + std::ifstream blob_path; + std::string ov_compiled_blobs_dir = ""; + if(hw_target == "MYRIAD" && global_context_.use_compiled_network == true) { + if(!openvino_ep::backend_utils::UseCompiledNetwork()) { + std::size_t model_index = global_context_.onnx_model_path_name.find_last_of("/\\"); + std::string model_name= global_context_.onnx_model_path_name.substr(model_index+1); + std::size_t model_extension_index = model_name.find_last_of("."); + if(openvino_ep::BackendManager::GetGlobalContext().is_wholly_supported_graph) { + model_blob_name = global_context_.onnx_model_name + "_" + "op_v_" + std::to_string(global_context_.onnx_opset_version) + "_" + model_name.substr(0,model_extension_index) + "_" + hw_target + "_" + subgraph_context_.subgraph_name + "_ov_" + "fully" + ".blob"; + } + else { + model_blob_name = global_context_.onnx_model_name + "_" + "op_v_" + std::to_string(global_context_.onnx_opset_version) + "_" + model_name.substr(0,model_extension_index) + "_" + hw_target + "_" + subgraph_context_.subgraph_name + "_ov_" + "partially" + ".blob"; + } + if(global_context_.blob_dump_path == "" || global_context_.blob_dump_path == "\"" || global_context_.blob_dump_path.empty()) { + ov_compiled_blobs_dir = openvino_ep::backend_utils::GetCurrentWorkingDir() + "/ov_compiled_blobs/"; + } else { + ov_compiled_blobs_dir = global_context_.blob_dump_path + "/ov_compiled_blobs"; + } + if(openvino_ep::backend_utils::IsDirExists(ov_compiled_blobs_dir)) { + LOGS_DEFAULT(INFO) << log_tag << "'ov_compiled_blobs' directory already exists at the executable path"; + } + else { + CreateDirectory(ov_compiled_blobs_dir); + } + blob_path.open(ov_compiled_blobs_dir + "/" + model_blob_name); + if (!blob_path.is_open()) { + LOGS_DEFAULT(INFO) << log_tag << "Device specific Compiled blob doesn't exist for this model"; + } else { + LOGS_DEFAULT(INFO) << log_tag << "Device specific Compiled blob already exists for this model"; + vpu_status = true; + } + } + } + + //validate const subgraphs + if(!openvino_ep::BackendManager::GetGlobalContext().is_wholly_supported_graph) { + ie_cnn_network_ = CreateCNNNetwork(model_proto, global_context_, subgraph_context_, const_outputs_map_); + SetIODefs(model_proto, ie_cnn_network_, subgraph_context_.output_names, const_outputs_map_, global_context_.device_type); + #if defined(OPENVINO_2020_4) || defined(OPENVINO_2021_1) || defined(OPENVINO_2021_2) || defined(OPENVINO_2021_3) + if (const_outputs_map_.size() == subgraph_context_.output_names.size()) + subgraph_context_.is_constant = true; + #endif + + // Loading model to the plugin + if (subgraph_context_.is_constant) { + LOGS_DEFAULT(INFO) << log_tag << "The subgraph is a const. Directly moving to Infer stage."; + return; + } + } + + if (vpu_status == true || openvino_ep::backend_utils::UseCompiledNetwork()) { + const std::string model_blob_path = ov_compiled_blobs_dir + "/" + model_blob_name; + const std::string compiled_blob_path = onnxruntime::GetEnvironmentVar("OV_BLOB_PATH"); + try { + if(vpu_status == true) { + LOGS_DEFAULT(INFO) << log_tag << "Importing the pre-compiled blob for this model which already exists in the directory 'ov_compiled_blobs'"; + exe_network_ = global_context_.ie_core.ImportNetwork(model_blob_path, hw_target, {}); + } else { + LOGS_DEFAULT(INFO) << log_tag << "Importing the pre-compiled blob from the path set by the user"; + if (compiled_blob_path.empty()) + throw std::runtime_error("The compiled blob path is not set"); + exe_network_ = global_context_.ie_core.ImportNetwork(compiled_blob_path, hw_target, {}); + } + } catch (InferenceEngine::details::InferenceEngineException &e) { + ORT_THROW(log_tag + " Exception while Importing Network for graph: " + subgraph_context_.subgraph_name + ": " + e.what()); + } catch(...) { + ORT_THROW(log_tag + " Exception while Importing Network for graph: " + subgraph_context_.subgraph_name); + } + import_blob_status = true; + LOGS_DEFAULT(INFO) << log_tag << "Succesfully Created an executable network from a previously exported network"; + } + + if ((global_context_.use_compiled_network == true && import_blob_status == false) || vpu_status == false) { + if(!openvino_ep::backend_utils::UseCompiledNetwork()) { + ie_cnn_network_ = CreateCNNNetwork(model_proto, global_context_, subgraph_context_, const_outputs_map_); + SetIODefs(model_proto, ie_cnn_network_, subgraph_context_.output_names, const_outputs_map_, global_context_.device_type); + #if defined(OPENVINO_2020_4) || defined(OPENVINO_2021_1) || defined(OPENVINO_2021_2) || defined(OPENVINO_2021_3) + if (const_outputs_map_.size() == subgraph_context_.output_names.size()) + subgraph_context_.is_constant = true; + #endif + + // Loading model to the plugin + if (subgraph_context_.is_constant) + return; + std::map config; + #ifndef NDEBUG + if (openvino_ep::backend_utils::IsDebugEnabled()) { + config["PERF_COUNT"] = CONFIG_VALUE(YES); + } + #endif + if (global_context_.device_type.find("MYRIAD") != std::string::npos) { + #if defined(OPENVINO_2021_1) || defined(OPENVINO_2021_2) || defined(OPENVINO_2021_3) + if (subgraph_context_.set_vpu_config) { + config["MYRIAD_DETECT_NETWORK_BATCH"] = CONFIG_VALUE(NO); + } + if (global_context_.enable_vpu_fast_compile) { + config["MYRIAD_HW_INJECT_STAGES"] = CONFIG_VALUE(NO); + config["MYRIAD_COPY_OPTIMIZATION"] = CONFIG_VALUE(NO); + } + #else + if (subgraph_context_.set_vpu_config) { + config["VPU_DETECT_NETWORK_BATCH"] = CONFIG_VALUE(NO); + } + if (global_context_.enable_vpu_fast_compile) { + config["VPU_HW_INJECT_STAGES"] = CONFIG_VALUE(NO); + config["VPU_COPY_OPTIMIZATION"] = CONFIG_VALUE(NO); + } + #endif + } + try { + exe_network_ = global_context_.ie_core.LoadNetwork(*ie_cnn_network_, hw_target, config); + } catch (const InferenceEngine::details::InferenceEngineException& e) { + ORT_THROW(log_tag + " Exception while Loading Network for graph: " + subgraph_context_.subgraph_name + ": " + e.what()); + } catch (...) { + ORT_THROW(log_tag + " Exception while Loading Network for graph " + subgraph_context_.subgraph_name); + } + LOGS_DEFAULT(INFO) << log_tag << "Loaded model to the plugin"; + if(global_context_.use_compiled_network && hw_target == "MYRIAD") { + LOGS_DEFAULT(INFO) << log_tag << "Dumping the compiled blob for this model into the directory 'ov_compiled_blobs'"; + std::ofstream compiled_blob_dump{ov_compiled_blobs_dir + "/" + model_blob_name}; + exe_network_.Export(compiled_blob_dump); + } + } + } //The infer_requests_ pool will be intialized with a default value of 8 infer_request's //The nireq value can also be configured to any num_of_threads during runtime size_t nireq = global_context_.num_of_threads; @@ -85,13 +165,13 @@ BasicBackend::BasicBackend(const ONNX_NAMESPACE::ModelProto& model_proto, std::cout << "The value of nireq being used is: " << nireq << std::endl; } #endif - inferRequestsQueue_ = std::unique_ptr(new InferRequestsQueue(exe_network, nireq)); + inferRequestsQueue_ = std::unique_ptr(new InferRequestsQueue(exe_network_, nireq)); } // Starts an asynchronous inference request for data in slice indexed by batch_slice_idx on // an Infer Request indexed by infer_req_idx void BasicBackend::StartAsyncInference(Ort::CustomOpApi& ort, OrtKernelContext* context, std::shared_ptr infer_request) { - auto graph_input_info = ie_cnn_network_->getInputsInfo(); + auto graph_input_info = exe_network_.GetInputsInfo(); size_t index = 0; for (auto input_info_iter = graph_input_info.begin(); @@ -132,7 +212,7 @@ void BasicBackend::CompleteAsyncInference(Ort::CustomOpApi& ort, OrtKernelContex } catch (...) { ORT_THROW(log_tag + " Exception with completing Inference"); } - auto graph_output_info = ie_cnn_network_->getOutputsInfo(); + auto graph_output_info = exe_network_.GetOutputsInfo(); for (auto output_info_iter = graph_output_info.begin(); output_info_iter != graph_output_info.end(); ++output_info_iter) { diff --git a/onnxruntime/core/providers/openvino/backends/basic_backend.h b/onnxruntime/core/providers/openvino/backends/basic_backend.h index 80f14eeb2b..1083a2ce6b 100644 --- a/onnxruntime/core/providers/openvino/backends/basic_backend.h +++ b/onnxruntime/core/providers/openvino/backends/basic_backend.h @@ -37,6 +37,7 @@ class BasicBackend : public IBackend { SubGraphContext subgraph_context_; mutable std::mutex compute_lock_; std::shared_ptr ie_cnn_network_; + InferenceEngine::ExecutableNetwork exe_network_; std::map> const_outputs_map_; std::unique_ptr inferRequestsQueue_; }; diff --git a/onnxruntime/core/providers/openvino/contexts.h b/onnxruntime/core/providers/openvino/contexts.h index 1049668a7e..6453c41569 100644 --- a/onnxruntime/core/providers/openvino/contexts.h +++ b/onnxruntime/core/providers/openvino/contexts.h @@ -13,12 +13,17 @@ struct GlobalContext { InferenceEngine::Core ie_core; bool is_wholly_supported_graph = false; bool enable_vpu_fast_compile = false; + bool use_compiled_network = false; size_t num_of_threads; std::string device_type; std::string precision_str; std::string device_id; + std::string blob_dump_path; std::vector deviceAvailableList = {true, true, true, true, true, true, true, true}; std::vector deviceTags = {"0", "1", "2", "3", "4", "5", "6", "7"}; + std::string onnx_model_name; + std::string onnx_model_path_name; + int onnx_opset_version; }; // Holds context specific to subgraph. diff --git a/onnxruntime/core/providers/openvino/openvino_execution_provider.cc b/onnxruntime/core/providers/openvino/openvino_execution_provider.cc index e1cd784b65..b25a1f5ea2 100644 --- a/onnxruntime/core/providers/openvino/openvino_execution_provider.cc +++ b/onnxruntime/core/providers/openvino/openvino_execution_provider.cc @@ -18,6 +18,9 @@ OpenVINOExecutionProvider::OpenVINOExecutionProvider(const OpenVINOExecutionProv openvino_ep::BackendManager::GetGlobalContext().device_type = info.device_type_; openvino_ep::BackendManager::GetGlobalContext().precision_str = info.precision_; openvino_ep::BackendManager::GetGlobalContext().enable_vpu_fast_compile = info.enable_vpu_fast_compile_; + openvino_ep::BackendManager::GetGlobalContext().use_compiled_network = info.use_compiled_network_; + openvino_ep::BackendManager::GetGlobalContext().blob_dump_path = info.blob_dump_path_; + if ((int)info.num_of_threads_ <= 0) { openvino_ep::BackendManager::GetGlobalContext().num_of_threads = 8; } else { @@ -55,6 +58,14 @@ OpenVINOExecutionProvider::GetCapability(const GraphViewer& graph_viewer, const ORT_UNUSED_PARAMETER(kernel_registries); std::vector> result; + openvino_ep::BackendManager::GetGlobalContext().onnx_model_name = graph_viewer.Name(); +#ifdef _WIN32 + std::wstring onnx_path = graph_viewer.ModelPath().ToPathString(); + openvino_ep::BackendManager::GetGlobalContext().onnx_model_path_name = std::string(onnx_path.begin(), onnx_path.end()); +#else + openvino_ep::BackendManager::GetGlobalContext().onnx_model_path_name = graph_viewer.ModelPath().ToPathString(); +#endif + openvino_ep::BackendManager::GetGlobalContext().onnx_opset_version = graph_viewer.DomainToVersionMap().at(kOnnxDomain); #if defined OPENVINO_2020_3 result = openvino_ep::GetCapability_2020_3(graph_viewer, diff --git a/onnxruntime/core/providers/openvino/openvino_execution_provider.h b/onnxruntime/core/providers/openvino/openvino_execution_provider.h index d547e00e38..3db730e29c 100644 --- a/onnxruntime/core/providers/openvino/openvino_execution_provider.h +++ b/onnxruntime/core/providers/openvino/openvino_execution_provider.h @@ -55,9 +55,11 @@ struct OpenVINOExecutionProviderInfo { bool enable_vpu_fast_compile_; std::string device_id_; size_t num_of_threads_; + bool use_compiled_network_; + std::string blob_dump_path_; - explicit OpenVINOExecutionProviderInfo(std::string dev_type, bool enable_vpu_fast_compile, std::string dev_id, size_t num_of_threads) - : enable_vpu_fast_compile_(enable_vpu_fast_compile), device_id_(dev_id), num_of_threads_(num_of_threads) { + explicit OpenVINOExecutionProviderInfo(std::string dev_type, bool enable_vpu_fast_compile, std::string dev_id, size_t num_of_threads, bool use_compiled_network, std::string blob_dump_path) + : enable_vpu_fast_compile_(enable_vpu_fast_compile), device_id_(dev_id), num_of_threads_(num_of_threads), use_compiled_network_(use_compiled_network), blob_dump_path_(blob_dump_path) { if (dev_type == "") { LOGS_DEFAULT(INFO) << "[OpenVINO-EP]" << "No runtime device selection option provided."; @@ -125,7 +127,7 @@ struct OpenVINOExecutionProviderInfo { << "Choosing Device: " << device_type_ << " , Precision: " << precision_; } OpenVINOExecutionProviderInfo() { - OpenVINOExecutionProviderInfo("", false, "", 0); + OpenVINOExecutionProviderInfo("", false, "", 0, false,""); } }; diff --git a/onnxruntime/core/providers/openvino/openvino_provider_factory.cc b/onnxruntime/core/providers/openvino/openvino_provider_factory.cc index 9ef16dcbbd..6b2b990615 100644 --- a/onnxruntime/core/providers/openvino/openvino_provider_factory.cc +++ b/onnxruntime/core/providers/openvino/openvino_provider_factory.cc @@ -8,10 +8,12 @@ namespace onnxruntime { struct OpenVINOProviderFactory : IExecutionProviderFactory { OpenVINOProviderFactory(const char* device_type, bool enable_vpu_fast_compile, - const char* device_id, size_t num_of_threads) - : enable_vpu_fast_compile_(enable_vpu_fast_compile), num_of_threads_(num_of_threads) { + const char* device_id, size_t num_of_threads, + bool use_compiled_network, const char* blob_dump_path) + : enable_vpu_fast_compile_(enable_vpu_fast_compile), num_of_threads_(num_of_threads), use_compiled_network_(use_compiled_network) { device_type_ = (device_type == nullptr) ? "" : device_type; device_id_ = (device_id == nullptr) ? "" : device_id; + blob_dump_path_ = (blob_dump_path == nullptr) ? "" : blob_dump_path; } ~OpenVINOProviderFactory() override { } @@ -23,16 +25,18 @@ struct OpenVINOProviderFactory : IExecutionProviderFactory { bool enable_vpu_fast_compile_; std::string device_id_; size_t num_of_threads_; + bool use_compiled_network_; + std::string blob_dump_path_; }; std::unique_ptr OpenVINOProviderFactory::CreateProvider() { - OpenVINOExecutionProviderInfo info(device_type_, enable_vpu_fast_compile_, device_id_, num_of_threads_); + OpenVINOExecutionProviderInfo info(device_type_, enable_vpu_fast_compile_, device_id_, num_of_threads_, use_compiled_network_, blob_dump_path_); return std::make_unique(info); } std::shared_ptr CreateExecutionProviderFactory_OpenVINO( - const char* device_type, bool enable_vpu_fast_compile, const char* device_id, size_t num_of_threads) { - return std::make_shared(device_type, enable_vpu_fast_compile, device_id, num_of_threads); + const char* device_type, bool enable_vpu_fast_compile, const char* device_id, size_t num_of_threads, bool use_compiled_network, const char* blob_dump_path) { + return std::make_shared(device_type, enable_vpu_fast_compile, device_id, num_of_threads, use_compiled_network, blob_dump_path); } } // namespace onnxruntime @@ -50,7 +54,7 @@ struct OpenVINO_Provider : Provider { std::shared_ptr CreateExecutionProviderFactory(const void* void_params) override { auto& params = *reinterpret_cast(void_params); - return std::make_shared(params.device_type, params.enable_vpu_fast_compile, params.device_id, params.num_of_threads); + return std::make_shared(params.device_type, params.enable_vpu_fast_compile, params.device_id, params.num_of_threads, params.use_compiled_network, params.blob_dump_path); } void Shutdown() override { diff --git a/onnxruntime/python/onnxruntime_pybind_state.cc b/onnxruntime/python/onnxruntime_pybind_state.cc index a3b0800c0a..64071a89bc 100644 --- a/onnxruntime/python/onnxruntime_pybind_state.cc +++ b/onnxruntime/python/onnxruntime_pybind_state.cc @@ -665,6 +665,7 @@ static void RegisterExecutionProviders(InferenceSession* sess, const std::vector #ifdef USE_OPENVINO OrtOpenVINOProviderOptions params; params.device_type = openvino_device_type.c_str(); + std::string blob_dump_path; auto it = provider_options_map.find(type); if (it != provider_options_map.end()) { @@ -681,10 +682,22 @@ static void RegisterExecutionProviders(InferenceSession* sess, const std::vector ORT_THROW("Invalid value passed for enable_vpu_fast_compile: ", option.second); } + } else if (option.first == "use_compiled_network") { + if (option.second == "True") { + params.use_compiled_network = true; + } else if (option.second == "False") { + params.use_compiled_network = false; + } else { + ORT_THROW("Invalid value passed for use_compiled_network: ", option.second); + } + } else if (option.first == "device_id") { params.device_id = option.second.c_str(); } else if (option.first == "num_of_threads") { params.num_of_threads = std::stoi(option.second); + } else if (option.first == "blob_dump_path") { + blob_dump_path = option.second; + params.blob_dump_path = blob_dump_path.c_str(); } else { ORT_THROW("Invalid OpenVINO EP option: ", option.first); } @@ -967,7 +980,7 @@ void addGlobalMethods(py::module& m, Environment& env) { onnxruntime::CreateExecutionProviderFactory_Dnnl(1), #endif #ifdef USE_OPENVINO - onnxruntime::CreateExecutionProviderFactory_OpenVINO(openvino_device_type, false, "", 8), + onnxruntime::CreateExecutionProviderFactory_OpenVINO(openvino_device_type, false, "", 8, false, ""), #endif #ifdef USE_TENSORRT onnxruntime::CreateExecutionProviderFactory_Tensorrt( diff --git a/onnxruntime/test/perftest/command_args_parser.cc b/onnxruntime/test/perftest/command_args_parser.cc index 013b646061..c9e8888f6b 100644 --- a/onnxruntime/test/perftest/command_args_parser.cc +++ b/onnxruntime/test/perftest/command_args_parser.cc @@ -60,8 +60,10 @@ namespace perftest { "\t [OpenVINO only] [device_id]: Selects a particular hardware device for inference.\n" "\t [OpenVINO only] [enable_vpu_fast_compile]: Optionally enabled to speeds up the model's compilation on VPU device targets.\n" "\t [OpenVINO only] [num_of_threads]: Overrides the accelerator hardware type and precision with these values at runtime.\n" + "\t [OpenVINO only] [use_compiled_network]: Can be enabled to directly import pre-compiled blobs if exists. currently this feature is only supported on MyriadX(VPU) hardware device target.\n" + "\t [OpenVINO only] [blob_dump_path]: Explicitly specify the path where you would like to dump and load the blobs for the use_compiled_network(save/load blob) feature. This overrides the default path.\n" "\t [Usage]: -e -i '| |'\n\n" - "\t [Example] [For OpenVINO EP] -e openvino -i 'device_type|CPU_FP32 enable_vpu_fast_compile|true num_of_threads|5'\n" + "\t [Example] [For OpenVINO EP] -e openvino -i \"device_type|CPU_FP32 enable_vpu_fast_compile|true num_of_threads|5 use_compiled_network|true blob_dump_path|\"\"\"\n" "\t [TensorRT only] [use_trt_options]: Overrides TensorRT environment variables (if any) with following settings at runtime.\n" "\t [TensorRT only] [trt_max_workspace_size]: Set TensorRT maximum workspace size in byte.\n" "\t [TensorRT only] [trt_fp16_enable]: Enable TensorRT FP16 precision.\n" diff --git a/onnxruntime/test/perftest/ort_test_session.cc b/onnxruntime/test/perftest/ort_test_session.cc index 7e95baf8d1..ed439eb40c 100644 --- a/onnxruntime/test/perftest/ort_test_session.cc +++ b/onnxruntime/test/perftest/ort_test_session.cc @@ -162,9 +162,11 @@ OnnxRuntimeTestSession::OnnxRuntimeTestSession(Ort::Env& env, std::random_device } else if (provider_name == onnxruntime::kOpenVINOExecutionProvider) { #ifdef USE_OPENVINO std::string device_type = ""; // [device_type]: Overrides the accelerator hardware type and precision with these values at runtime. - bool enable_vpu_fast_compile = false; // [device_id]: Selects a particular hardware device for inference. - std::string device_id = ""; // [enable_vpu_fast_compile]: Fast-compile may be optionally enabled to speeds up the model's compilation to VPU device specific format. + bool enable_vpu_fast_compile = false; // [enable_vpu_fast_compile]: Fast-compile may be optionally enabled to speeds up the model's compilation to VPU device specific format. + std::string device_id = ""; // [device_id]: Selects a particular hardware device for inference. size_t num_of_threads = 8; // [num_of_threads]: Overrides the accelerator default value of number of threads with this value at runtime. + bool use_compiled_network = false; // [use_compiled_network]: Can be enabled to directly import pre-compiled blobs if exists. + std::string blob_dump_path = ""; // [blob_dump_path]: Explicitly specify the path where you would like to dump and load the blobs for the use_compiled_network(save/load blob) feature. This overrides the default path. #ifdef _MSC_VER std::string ov_string = ToMBString(performance_test_config.run_config.ep_runtime_config_string); @@ -203,14 +205,24 @@ OnnxRuntimeTestSession::OnnxRuntimeTestSession(Ort::Env& env, std::random_device } else { ORT_THROW("[ERROR] [OpenVINO] The value for the key 'enable_vpu_fast_compile' should be a boolean i.e. true or false. Default value is false.\n"); } + } else if (key == "use_compiled_network") { + if(value == "true" || value == "True"){ + use_compiled_network = true; + } else if (value == "false" || value == "False") { + use_compiled_network = false; + } else { + ORT_THROW("[ERROR] [OpenVINO] The value for the key 'use_compiled_network' should be a boolean i.e. true or false. Default value is false.\n"); + } } else if (key == "num_of_threads") { std::stringstream sstream(value); sstream >> num_of_threads; if ((int)num_of_threads <=0) { ORT_THROW("[ERROR] [OpenVINO] The value for the key 'num_of_threads' should be greater than 0\n"); } + } else if (key == "blob_dump_path") { + blob_dump_path = value; } else { - ORT_THROW("[ERROR] [OpenVINO] wrong key type entered. Choose from the following runtime key options that are available for OpenVINO. ['device_type', 'device_id', 'enable_vpu_fast_compile', 'num_of_threads'] \n"); + ORT_THROW("[ERROR] [OpenVINO] wrong key type entered. Choose from the following runtime key options that are available for OpenVINO. ['device_type', 'device_id', 'enable_vpu_fast_compile', 'num_of_threads', 'use_compiled_network', 'blob_dump_path'] \n"); } } OrtOpenVINOProviderOptions options; @@ -218,6 +230,8 @@ OnnxRuntimeTestSession::OnnxRuntimeTestSession(Ort::Env& env, std::random_device options.device_id = device_id.c_str(); // To set the device_id options.enable_vpu_fast_compile = enable_vpu_fast_compile; // To enable_vpu_fast_compile, default is false options.num_of_threads = num_of_threads; // To set number of free InferRequests, default is 8 + options.use_compiled_network = use_compiled_network; // To use_compiled_network, default is false + options.blob_dump_path = blob_dump_path.c_str(); // sets the blob_dump_path, default is "" session_options.AppendExecutionProvider_OpenVINO(options); #else ORT_THROW("OpenVINO is not supported in this build\n"); diff --git a/onnxruntime/test/util/default_providers.cc b/onnxruntime/test/util/default_providers.cc index 7cb8e4c216..9ea46c991f 100644 --- a/onnxruntime/test/util/default_providers.cc +++ b/onnxruntime/test/util/default_providers.cc @@ -18,7 +18,7 @@ namespace onnxruntime { std::shared_ptr CreateExecutionProviderFactory_OpenVINO( - const char* device_type, bool enable_vpu_fast_compile, const char* device_id, size_t num_of_threads); + const char* device_type, bool enable_vpu_fast_compile, const char* device_id, size_t num_of_threads, bool use_compiled_network, const char* blob_dump_path); std::shared_ptr CreateExecutionProviderFactory_Dnnl(int use_arena); std::shared_ptr CreateExecutionProviderFactory_OpenVINO(const OrtOpenVINOProviderOptions* params);