mirror of
https://github.com/saymrwulf/onnxruntime.git
synced 2026-05-15 20:50:42 +00:00
[OpenVINO-EP] Enabling save/Load blob feature (#7054)
* Enabling save/Load blob feature for OpenVINO-EP Signed-off-by: MaajidKhan <n.maajidkhan@gmail.com> * Added changes to enhance save/load feature ->This feature applies only for MYRIAD device target ->cleaned up the code and added error checks Signed-off-by: MaajidKhan <n.maajidkhan@gmail.com> * Enabled the feature only for MyriadX and only for Linux Signed-off-by: MaajidKhan <n.maajidkhan@gmail.com> * Fixed compilation issues on windows Signed-off-by: MaajidKhan <n.maajidkhan@gmail.com> * Added changes to fix const subgraph issue Signed-off-by: MaajidKhan <n.maajidkhan@gmail.com> * Fixed issues on windows Signed-off-by: MaajidKhan <n.maajidkhan@gmail.com> * Added changes for the feature -> Removed default location dir dump using cmake -> Enabled saving blob dumps at the executable path by default Signed-off-by: MaajidKhan <n.maajidkhan@gmail.com> * Made save/load dump path configurable -> The save/load blob dump path is now also made configurable using a c/python Api's. -> Introduced a flag named blob_dump_path Signed-off-by: MaajidKhan <n.maajidkhan@gmail.com> * Minor fixes added Signed-off-by: MaajidKhan <n.maajidkhan@gmail.com> * Fixed python API issues Signed-off-by: MaajidKhan <n.maajidkhan@gmail.com> * Using GetEnvironmentVar to get the path Signed-off-by: MaajidKhan <n.maajidkhan@gmail.com> * Fixed python runtime option issue Signed-off-by: MaajidKhan <n.maajidkhan@gmail.com> * Fixes import network issue on windows Signed-off-by: MaajidKhan <n.maajidkhan@gmail.com>
This commit is contained in:
parent
def4cc09c7
commit
27e778909d
13 changed files with 274 additions and 76 deletions
|
|
@ -305,12 +305,14 @@ typedef struct OrtTensorRTProviderOptions {
|
|||
/// </summary>
|
||||
typedef struct OrtOpenVINOProviderOptions {
|
||||
#ifdef __cplusplus
|
||||
OrtOpenVINOProviderOptions() : device_type{}, enable_vpu_fast_compile{}, device_id{}, num_of_threads{} {}
|
||||
OrtOpenVINOProviderOptions() : device_type{}, enable_vpu_fast_compile{}, device_id{}, num_of_threads{}, use_compiled_network{}, blob_dump_path{} {}
|
||||
#endif
|
||||
const char* device_type; // CPU_FP32, GPU_FP32, GPU_FP16, MYRIAD_FP16, VAD-M_FP16 or VAD-F_FP32
|
||||
unsigned char enable_vpu_fast_compile; // 0 = false, nonzero = true
|
||||
const char* device_id;
|
||||
size_t num_of_threads; // 0 uses default number of threads
|
||||
unsigned char use_compiled_network; // 0 = false, nonzero = true
|
||||
const char* blob_dump_path; // path is set to empty by default
|
||||
} OrtOpenVINOProviderOptions;
|
||||
|
||||
struct OrtApi;
|
||||
|
|
|
|||
|
|
@ -23,15 +23,11 @@ namespace backend_utils {
|
|||
|
||||
#ifndef NDEBUG
|
||||
bool IsDebugEnabled() {
|
||||
#ifdef _WIN32
|
||||
size_t env_name_len = 0;
|
||||
char* env_name = nullptr;
|
||||
bool res = (_dupenv_s(&env_name, &env_name_len, "ORT_OPENVINO_ENABLE_DEBUG") == 0 && env_name != nullptr);
|
||||
free(env_name);
|
||||
return res;
|
||||
#else
|
||||
return (std::getenv("ORT_OPENVINO_ENABLE_DEBUG") != nullptr);
|
||||
#endif
|
||||
const std::string env_name = onnxruntime::GetEnvironmentVar("ORT_OPENVINO_ENABLE_DEBUG");
|
||||
if (!env_name.empty()) {
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
void DumpOnnxModelProto(const ONNX_NAMESPACE::ModelProto& model_proto, std::string file_name) {
|
||||
std::fstream outfile(file_name, std::ios::out | std::ios::trunc | std::ios::binary);
|
||||
|
|
@ -40,6 +36,56 @@ void DumpOnnxModelProto(const ONNX_NAMESPACE::ModelProto& model_proto, std::stri
|
|||
|
||||
#endif
|
||||
|
||||
bool UseCompiledNetwork() {
|
||||
const std::string env_name = onnxruntime::GetEnvironmentVar("OV_USE_COMPILED_NETWORK");
|
||||
if (!env_name.empty()) {
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
std::string GetCurrentWorkingDir() {
|
||||
std::string curr_dir;
|
||||
ORT_UNUSED_PARAMETER(curr_dir);
|
||||
char buff[FILENAME_MAX];
|
||||
curr_dir = GetCurrentDir(buff, FILENAME_MAX);
|
||||
std::string current_working_dir(buff);
|
||||
return current_working_dir;
|
||||
}
|
||||
|
||||
bool IsDirExists(const std::string& pathname) {
|
||||
struct stat info;
|
||||
if(stat(pathname.c_str(), &info) != 0) {
|
||||
LOGS_DEFAULT(INFO) << log_tag << "cannot access pathname: " << pathname;
|
||||
return false;
|
||||
} else if(info.st_mode & S_IFDIR) {
|
||||
LOGS_DEFAULT(INFO) << log_tag << "pathname exists: " << pathname;
|
||||
return true;
|
||||
} else {
|
||||
LOGS_DEFAULT(INFO) << log_tag << "pathname: " << pathname << ": doesn't contain the directory 'ov_compiled_blobs' ";
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
void CreateDirectory(const std::string& ov_compiled_blobs_dir) {
|
||||
LOGS_DEFAULT(INFO) << log_tag << "'ov_compiled_blobs' directory doesn't exist at the executable path, so creating one";
|
||||
#if defined(_WIN32)
|
||||
if (_mkdir(ov_compiled_blobs_dir.c_str()) == 0) { // Creating a directory
|
||||
LOGS_DEFAULT(INFO) << log_tag << "created a directory named 'ov_compiled_blobs' at the executable path";
|
||||
} else {
|
||||
LOGS_DEFAULT(INFO) << log_tag << "Error creating a directory named 'ov_compiled_blobs' at the executable path";
|
||||
throw std::runtime_error("Could not create the directory");
|
||||
}
|
||||
#else
|
||||
if (mkdir(ov_compiled_blobs_dir.c_str(), 0777) == 0) { // Creating a directory
|
||||
LOGS_DEFAULT(INFO) << log_tag << "created a directory named 'ov_compiled_blobs' at the executable path";
|
||||
} else {
|
||||
LOGS_DEFAULT(INFO) << log_tag << "Error creating a directory named 'ov_compiled_blobs' at the executable path";
|
||||
throw std::runtime_error("Could not create the directory");
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
struct static_cast_int64 {
|
||||
template <typename T1> // T1 models type statically convertible to T
|
||||
int64_t operator()(const T1& x) const { return static_cast<int64_t>(x); }
|
||||
|
|
|
|||
|
|
@ -10,6 +10,16 @@
|
|||
#include "contexts.h"
|
||||
#include <iomanip>
|
||||
|
||||
#ifdef _WIN32
|
||||
#include <direct.h>
|
||||
#define GetCurrentDir _getcwd
|
||||
#else
|
||||
#include <unistd.h>
|
||||
#define GetCurrentDir getcwd
|
||||
#endif
|
||||
|
||||
#include <sys/stat.h>
|
||||
|
||||
namespace onnxruntime {
|
||||
namespace openvino_ep {
|
||||
namespace backend_utils {
|
||||
|
|
@ -19,6 +29,14 @@ const std::string log_tag = "[OpenVINO-EP] ";
|
|||
bool IsDebugEnabled();
|
||||
#endif
|
||||
|
||||
bool UseCompiledNetwork();
|
||||
|
||||
std::string GetCurrentWorkingDir();
|
||||
|
||||
bool IsDirExists(const std::string& pathname);
|
||||
|
||||
void CreateDirectory(const std::string& ov_compiled_blobs_dir);
|
||||
|
||||
void SetIODefs(const ONNX_NAMESPACE::ModelProto& model_proto,
|
||||
std::shared_ptr<InferenceEngine::CNNNetwork> network,
|
||||
std::unordered_map<std::string, int> output_names,
|
||||
|
|
|
|||
|
|
@ -16,6 +16,7 @@
|
|||
#include <ngraph/pass/constant_folding.hpp>
|
||||
|
||||
#include "basic_backend.h"
|
||||
#include "../backend_manager.h"
|
||||
|
||||
namespace onnxruntime {
|
||||
namespace openvino_ep {
|
||||
|
|
@ -26,56 +27,135 @@ BasicBackend::BasicBackend(const ONNX_NAMESPACE::ModelProto& model_proto,
|
|||
GlobalContext& global_context,
|
||||
const SubGraphContext& subgraph_context)
|
||||
: global_context_(global_context), subgraph_context_(subgraph_context) {
|
||||
ie_cnn_network_ = CreateCNNNetwork(model_proto, global_context_, subgraph_context_, const_outputs_map_);
|
||||
SetIODefs(model_proto, ie_cnn_network_, subgraph_context_.output_names, const_outputs_map_, global_context_.device_type);
|
||||
|
||||
InferenceEngine::ExecutableNetwork exe_network;
|
||||
|
||||
#if defined(OPENVINO_2020_4) || defined(OPENVINO_2021_1) || defined(OPENVINO_2021_2) || defined(OPENVINO_2021_3)
|
||||
if (const_outputs_map_.size() == subgraph_context_.output_names.size())
|
||||
subgraph_context_.is_constant = true;
|
||||
#endif
|
||||
|
||||
// Loading model to the plugin
|
||||
if (subgraph_context_.is_constant)
|
||||
return;
|
||||
std::map<std::string, std::string> config;
|
||||
#ifndef NDEBUG
|
||||
if (openvino_ep::backend_utils::IsDebugEnabled()) {
|
||||
config["PERF_COUNT"] = CONFIG_VALUE(YES);
|
||||
}
|
||||
#endif
|
||||
if (global_context_.device_type.find("MYRIAD") != std::string::npos) {
|
||||
#if defined(OPENVINO_2021_1) || defined(OPENVINO_2021_2) || defined(OPENVINO_2021_3)
|
||||
if (subgraph_context_.set_vpu_config) {
|
||||
config["MYRIAD_DETECT_NETWORK_BATCH"] = CONFIG_VALUE(NO);
|
||||
}
|
||||
|
||||
if (global_context_.enable_vpu_fast_compile) {
|
||||
config["MYRIAD_HW_INJECT_STAGES"] = CONFIG_VALUE(NO);
|
||||
config["MYRIAD_COPY_OPTIMIZATION"] = CONFIG_VALUE(NO);
|
||||
}
|
||||
#else
|
||||
if (subgraph_context_.set_vpu_config) {
|
||||
config["VPU_DETECT_NETWORK_BATCH"] = CONFIG_VALUE(NO);
|
||||
}
|
||||
|
||||
if (global_context_.enable_vpu_fast_compile) {
|
||||
config["VPU_HW_INJECT_STAGES"] = CONFIG_VALUE(NO);
|
||||
config["VPU_COPY_OPTIMIZATION"] = CONFIG_VALUE(NO);
|
||||
}
|
||||
#endif
|
||||
}
|
||||
std::string& hw_target = (global_context_.device_id != "") ? global_context_.device_id : global_context_.device_type;
|
||||
try {
|
||||
exe_network = global_context_.ie_core.LoadNetwork(*ie_cnn_network_, hw_target, config);
|
||||
} catch (const InferenceEngine::details::InferenceEngineException& e) {
|
||||
ORT_THROW(log_tag + " Exception while Loading Network for graph: " + subgraph_context_.subgraph_name + ": " + e.what());
|
||||
} catch (...) {
|
||||
ORT_THROW(log_tag + " Exception while Loading Network for graph " + subgraph_context_.subgraph_name);
|
||||
}
|
||||
LOGS_DEFAULT(INFO) << log_tag << "Loaded model to the plugin";
|
||||
bool vpu_status = false;
|
||||
bool import_blob_status = false;
|
||||
std::string model_blob_name;
|
||||
std::ifstream blob_path;
|
||||
std::string ov_compiled_blobs_dir = "";
|
||||
|
||||
if(hw_target == "MYRIAD" && global_context_.use_compiled_network == true) {
|
||||
if(!openvino_ep::backend_utils::UseCompiledNetwork()) {
|
||||
std::size_t model_index = global_context_.onnx_model_path_name.find_last_of("/\\");
|
||||
std::string model_name= global_context_.onnx_model_path_name.substr(model_index+1);
|
||||
std::size_t model_extension_index = model_name.find_last_of(".");
|
||||
if(openvino_ep::BackendManager::GetGlobalContext().is_wholly_supported_graph) {
|
||||
model_blob_name = global_context_.onnx_model_name + "_" + "op_v_" + std::to_string(global_context_.onnx_opset_version) + "_" + model_name.substr(0,model_extension_index) + "_" + hw_target + "_" + subgraph_context_.subgraph_name + "_ov_" + "fully" + ".blob";
|
||||
}
|
||||
else {
|
||||
model_blob_name = global_context_.onnx_model_name + "_" + "op_v_" + std::to_string(global_context_.onnx_opset_version) + "_" + model_name.substr(0,model_extension_index) + "_" + hw_target + "_" + subgraph_context_.subgraph_name + "_ov_" + "partially" + ".blob";
|
||||
}
|
||||
if(global_context_.blob_dump_path == "" || global_context_.blob_dump_path == "\"" || global_context_.blob_dump_path.empty()) {
|
||||
ov_compiled_blobs_dir = openvino_ep::backend_utils::GetCurrentWorkingDir() + "/ov_compiled_blobs/";
|
||||
} else {
|
||||
ov_compiled_blobs_dir = global_context_.blob_dump_path + "/ov_compiled_blobs";
|
||||
}
|
||||
if(openvino_ep::backend_utils::IsDirExists(ov_compiled_blobs_dir)) {
|
||||
LOGS_DEFAULT(INFO) << log_tag << "'ov_compiled_blobs' directory already exists at the executable path";
|
||||
}
|
||||
else {
|
||||
CreateDirectory(ov_compiled_blobs_dir);
|
||||
}
|
||||
blob_path.open(ov_compiled_blobs_dir + "/" + model_blob_name);
|
||||
if (!blob_path.is_open()) {
|
||||
LOGS_DEFAULT(INFO) << log_tag << "Device specific Compiled blob doesn't exist for this model";
|
||||
} else {
|
||||
LOGS_DEFAULT(INFO) << log_tag << "Device specific Compiled blob already exists for this model";
|
||||
vpu_status = true;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
//validate const subgraphs
|
||||
if(!openvino_ep::BackendManager::GetGlobalContext().is_wholly_supported_graph) {
|
||||
ie_cnn_network_ = CreateCNNNetwork(model_proto, global_context_, subgraph_context_, const_outputs_map_);
|
||||
SetIODefs(model_proto, ie_cnn_network_, subgraph_context_.output_names, const_outputs_map_, global_context_.device_type);
|
||||
#if defined(OPENVINO_2020_4) || defined(OPENVINO_2021_1) || defined(OPENVINO_2021_2) || defined(OPENVINO_2021_3)
|
||||
if (const_outputs_map_.size() == subgraph_context_.output_names.size())
|
||||
subgraph_context_.is_constant = true;
|
||||
#endif
|
||||
|
||||
// Loading model to the plugin
|
||||
if (subgraph_context_.is_constant) {
|
||||
LOGS_DEFAULT(INFO) << log_tag << "The subgraph is a const. Directly moving to Infer stage.";
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
if (vpu_status == true || openvino_ep::backend_utils::UseCompiledNetwork()) {
|
||||
const std::string model_blob_path = ov_compiled_blobs_dir + "/" + model_blob_name;
|
||||
const std::string compiled_blob_path = onnxruntime::GetEnvironmentVar("OV_BLOB_PATH");
|
||||
try {
|
||||
if(vpu_status == true) {
|
||||
LOGS_DEFAULT(INFO) << log_tag << "Importing the pre-compiled blob for this model which already exists in the directory 'ov_compiled_blobs'";
|
||||
exe_network_ = global_context_.ie_core.ImportNetwork(model_blob_path, hw_target, {});
|
||||
} else {
|
||||
LOGS_DEFAULT(INFO) << log_tag << "Importing the pre-compiled blob from the path set by the user";
|
||||
if (compiled_blob_path.empty())
|
||||
throw std::runtime_error("The compiled blob path is not set");
|
||||
exe_network_ = global_context_.ie_core.ImportNetwork(compiled_blob_path, hw_target, {});
|
||||
}
|
||||
} catch (InferenceEngine::details::InferenceEngineException &e) {
|
||||
ORT_THROW(log_tag + " Exception while Importing Network for graph: " + subgraph_context_.subgraph_name + ": " + e.what());
|
||||
} catch(...) {
|
||||
ORT_THROW(log_tag + " Exception while Importing Network for graph: " + subgraph_context_.subgraph_name);
|
||||
}
|
||||
import_blob_status = true;
|
||||
LOGS_DEFAULT(INFO) << log_tag << "Succesfully Created an executable network from a previously exported network";
|
||||
}
|
||||
|
||||
if ((global_context_.use_compiled_network == true && import_blob_status == false) || vpu_status == false) {
|
||||
if(!openvino_ep::backend_utils::UseCompiledNetwork()) {
|
||||
ie_cnn_network_ = CreateCNNNetwork(model_proto, global_context_, subgraph_context_, const_outputs_map_);
|
||||
SetIODefs(model_proto, ie_cnn_network_, subgraph_context_.output_names, const_outputs_map_, global_context_.device_type);
|
||||
#if defined(OPENVINO_2020_4) || defined(OPENVINO_2021_1) || defined(OPENVINO_2021_2) || defined(OPENVINO_2021_3)
|
||||
if (const_outputs_map_.size() == subgraph_context_.output_names.size())
|
||||
subgraph_context_.is_constant = true;
|
||||
#endif
|
||||
|
||||
// Loading model to the plugin
|
||||
if (subgraph_context_.is_constant)
|
||||
return;
|
||||
std::map<std::string, std::string> config;
|
||||
#ifndef NDEBUG
|
||||
if (openvino_ep::backend_utils::IsDebugEnabled()) {
|
||||
config["PERF_COUNT"] = CONFIG_VALUE(YES);
|
||||
}
|
||||
#endif
|
||||
if (global_context_.device_type.find("MYRIAD") != std::string::npos) {
|
||||
#if defined(OPENVINO_2021_1) || defined(OPENVINO_2021_2) || defined(OPENVINO_2021_3)
|
||||
if (subgraph_context_.set_vpu_config) {
|
||||
config["MYRIAD_DETECT_NETWORK_BATCH"] = CONFIG_VALUE(NO);
|
||||
}
|
||||
if (global_context_.enable_vpu_fast_compile) {
|
||||
config["MYRIAD_HW_INJECT_STAGES"] = CONFIG_VALUE(NO);
|
||||
config["MYRIAD_COPY_OPTIMIZATION"] = CONFIG_VALUE(NO);
|
||||
}
|
||||
#else
|
||||
if (subgraph_context_.set_vpu_config) {
|
||||
config["VPU_DETECT_NETWORK_BATCH"] = CONFIG_VALUE(NO);
|
||||
}
|
||||
if (global_context_.enable_vpu_fast_compile) {
|
||||
config["VPU_HW_INJECT_STAGES"] = CONFIG_VALUE(NO);
|
||||
config["VPU_COPY_OPTIMIZATION"] = CONFIG_VALUE(NO);
|
||||
}
|
||||
#endif
|
||||
}
|
||||
try {
|
||||
exe_network_ = global_context_.ie_core.LoadNetwork(*ie_cnn_network_, hw_target, config);
|
||||
} catch (const InferenceEngine::details::InferenceEngineException& e) {
|
||||
ORT_THROW(log_tag + " Exception while Loading Network for graph: " + subgraph_context_.subgraph_name + ": " + e.what());
|
||||
} catch (...) {
|
||||
ORT_THROW(log_tag + " Exception while Loading Network for graph " + subgraph_context_.subgraph_name);
|
||||
}
|
||||
LOGS_DEFAULT(INFO) << log_tag << "Loaded model to the plugin";
|
||||
if(global_context_.use_compiled_network && hw_target == "MYRIAD") {
|
||||
LOGS_DEFAULT(INFO) << log_tag << "Dumping the compiled blob for this model into the directory 'ov_compiled_blobs'";
|
||||
std::ofstream compiled_blob_dump{ov_compiled_blobs_dir + "/" + model_blob_name};
|
||||
exe_network_.Export(compiled_blob_dump);
|
||||
}
|
||||
}
|
||||
}
|
||||
//The infer_requests_ pool will be intialized with a default value of 8 infer_request's
|
||||
//The nireq value can also be configured to any num_of_threads during runtime
|
||||
size_t nireq = global_context_.num_of_threads;
|
||||
|
|
@ -85,13 +165,13 @@ BasicBackend::BasicBackend(const ONNX_NAMESPACE::ModelProto& model_proto,
|
|||
std::cout << "The value of nireq being used is: " << nireq << std::endl;
|
||||
}
|
||||
#endif
|
||||
inferRequestsQueue_ = std::unique_ptr<InferRequestsQueue>(new InferRequestsQueue(exe_network, nireq));
|
||||
inferRequestsQueue_ = std::unique_ptr<InferRequestsQueue>(new InferRequestsQueue(exe_network_, nireq));
|
||||
}
|
||||
|
||||
// Starts an asynchronous inference request for data in slice indexed by batch_slice_idx on
|
||||
// an Infer Request indexed by infer_req_idx
|
||||
void BasicBackend::StartAsyncInference(Ort::CustomOpApi& ort, OrtKernelContext* context, std::shared_ptr<InferenceEngine::InferRequest> infer_request) {
|
||||
auto graph_input_info = ie_cnn_network_->getInputsInfo();
|
||||
auto graph_input_info = exe_network_.GetInputsInfo();
|
||||
|
||||
size_t index = 0;
|
||||
for (auto input_info_iter = graph_input_info.begin();
|
||||
|
|
@ -132,7 +212,7 @@ void BasicBackend::CompleteAsyncInference(Ort::CustomOpApi& ort, OrtKernelContex
|
|||
} catch (...) {
|
||||
ORT_THROW(log_tag + " Exception with completing Inference");
|
||||
}
|
||||
auto graph_output_info = ie_cnn_network_->getOutputsInfo();
|
||||
auto graph_output_info = exe_network_.GetOutputsInfo();
|
||||
|
||||
for (auto output_info_iter = graph_output_info.begin();
|
||||
output_info_iter != graph_output_info.end(); ++output_info_iter) {
|
||||
|
|
|
|||
|
|
@ -37,6 +37,7 @@ class BasicBackend : public IBackend {
|
|||
SubGraphContext subgraph_context_;
|
||||
mutable std::mutex compute_lock_;
|
||||
std::shared_ptr<InferenceEngine::CNNNetwork> ie_cnn_network_;
|
||||
InferenceEngine::ExecutableNetwork exe_network_;
|
||||
std::map<std::string, std::shared_ptr<ngraph::Node>> const_outputs_map_;
|
||||
std::unique_ptr<InferRequestsQueue> inferRequestsQueue_;
|
||||
};
|
||||
|
|
|
|||
|
|
@ -13,12 +13,17 @@ struct GlobalContext {
|
|||
InferenceEngine::Core ie_core;
|
||||
bool is_wholly_supported_graph = false;
|
||||
bool enable_vpu_fast_compile = false;
|
||||
bool use_compiled_network = false;
|
||||
size_t num_of_threads;
|
||||
std::string device_type;
|
||||
std::string precision_str;
|
||||
std::string device_id;
|
||||
std::string blob_dump_path;
|
||||
std::vector<bool> deviceAvailableList = {true, true, true, true, true, true, true, true};
|
||||
std::vector<std::string> deviceTags = {"0", "1", "2", "3", "4", "5", "6", "7"};
|
||||
std::string onnx_model_name;
|
||||
std::string onnx_model_path_name;
|
||||
int onnx_opset_version;
|
||||
};
|
||||
|
||||
// Holds context specific to subgraph.
|
||||
|
|
|
|||
|
|
@ -18,6 +18,9 @@ OpenVINOExecutionProvider::OpenVINOExecutionProvider(const OpenVINOExecutionProv
|
|||
openvino_ep::BackendManager::GetGlobalContext().device_type = info.device_type_;
|
||||
openvino_ep::BackendManager::GetGlobalContext().precision_str = info.precision_;
|
||||
openvino_ep::BackendManager::GetGlobalContext().enable_vpu_fast_compile = info.enable_vpu_fast_compile_;
|
||||
openvino_ep::BackendManager::GetGlobalContext().use_compiled_network = info.use_compiled_network_;
|
||||
openvino_ep::BackendManager::GetGlobalContext().blob_dump_path = info.blob_dump_path_;
|
||||
|
||||
if ((int)info.num_of_threads_ <= 0) {
|
||||
openvino_ep::BackendManager::GetGlobalContext().num_of_threads = 8;
|
||||
} else {
|
||||
|
|
@ -55,6 +58,14 @@ OpenVINOExecutionProvider::GetCapability(const GraphViewer& graph_viewer, const
|
|||
ORT_UNUSED_PARAMETER(kernel_registries);
|
||||
|
||||
std::vector<std::unique_ptr<ComputeCapability>> result;
|
||||
openvino_ep::BackendManager::GetGlobalContext().onnx_model_name = graph_viewer.Name();
|
||||
#ifdef _WIN32
|
||||
std::wstring onnx_path = graph_viewer.ModelPath().ToPathString();
|
||||
openvino_ep::BackendManager::GetGlobalContext().onnx_model_path_name = std::string(onnx_path.begin(), onnx_path.end());
|
||||
#else
|
||||
openvino_ep::BackendManager::GetGlobalContext().onnx_model_path_name = graph_viewer.ModelPath().ToPathString();
|
||||
#endif
|
||||
openvino_ep::BackendManager::GetGlobalContext().onnx_opset_version = graph_viewer.DomainToVersionMap().at(kOnnxDomain);
|
||||
|
||||
#if defined OPENVINO_2020_3
|
||||
result = openvino_ep::GetCapability_2020_3(graph_viewer,
|
||||
|
|
|
|||
|
|
@ -55,9 +55,11 @@ struct OpenVINOExecutionProviderInfo {
|
|||
bool enable_vpu_fast_compile_;
|
||||
std::string device_id_;
|
||||
size_t num_of_threads_;
|
||||
bool use_compiled_network_;
|
||||
std::string blob_dump_path_;
|
||||
|
||||
explicit OpenVINOExecutionProviderInfo(std::string dev_type, bool enable_vpu_fast_compile, std::string dev_id, size_t num_of_threads)
|
||||
: enable_vpu_fast_compile_(enable_vpu_fast_compile), device_id_(dev_id), num_of_threads_(num_of_threads) {
|
||||
explicit OpenVINOExecutionProviderInfo(std::string dev_type, bool enable_vpu_fast_compile, std::string dev_id, size_t num_of_threads, bool use_compiled_network, std::string blob_dump_path)
|
||||
: enable_vpu_fast_compile_(enable_vpu_fast_compile), device_id_(dev_id), num_of_threads_(num_of_threads), use_compiled_network_(use_compiled_network), blob_dump_path_(blob_dump_path) {
|
||||
if (dev_type == "") {
|
||||
LOGS_DEFAULT(INFO) << "[OpenVINO-EP]"
|
||||
<< "No runtime device selection option provided.";
|
||||
|
|
@ -125,7 +127,7 @@ struct OpenVINOExecutionProviderInfo {
|
|||
<< "Choosing Device: " << device_type_ << " , Precision: " << precision_;
|
||||
}
|
||||
OpenVINOExecutionProviderInfo() {
|
||||
OpenVINOExecutionProviderInfo("", false, "", 0);
|
||||
OpenVINOExecutionProviderInfo("", false, "", 0, false,"");
|
||||
}
|
||||
};
|
||||
|
||||
|
|
|
|||
|
|
@ -8,10 +8,12 @@
|
|||
namespace onnxruntime {
|
||||
struct OpenVINOProviderFactory : IExecutionProviderFactory {
|
||||
OpenVINOProviderFactory(const char* device_type, bool enable_vpu_fast_compile,
|
||||
const char* device_id, size_t num_of_threads)
|
||||
: enable_vpu_fast_compile_(enable_vpu_fast_compile), num_of_threads_(num_of_threads) {
|
||||
const char* device_id, size_t num_of_threads,
|
||||
bool use_compiled_network, const char* blob_dump_path)
|
||||
: enable_vpu_fast_compile_(enable_vpu_fast_compile), num_of_threads_(num_of_threads), use_compiled_network_(use_compiled_network) {
|
||||
device_type_ = (device_type == nullptr) ? "" : device_type;
|
||||
device_id_ = (device_id == nullptr) ? "" : device_id;
|
||||
blob_dump_path_ = (blob_dump_path == nullptr) ? "" : blob_dump_path;
|
||||
}
|
||||
~OpenVINOProviderFactory() override {
|
||||
}
|
||||
|
|
@ -23,16 +25,18 @@ struct OpenVINOProviderFactory : IExecutionProviderFactory {
|
|||
bool enable_vpu_fast_compile_;
|
||||
std::string device_id_;
|
||||
size_t num_of_threads_;
|
||||
bool use_compiled_network_;
|
||||
std::string blob_dump_path_;
|
||||
};
|
||||
|
||||
std::unique_ptr<IExecutionProvider> OpenVINOProviderFactory::CreateProvider() {
|
||||
OpenVINOExecutionProviderInfo info(device_type_, enable_vpu_fast_compile_, device_id_, num_of_threads_);
|
||||
OpenVINOExecutionProviderInfo info(device_type_, enable_vpu_fast_compile_, device_id_, num_of_threads_, use_compiled_network_, blob_dump_path_);
|
||||
return std::make_unique<OpenVINOExecutionProvider>(info);
|
||||
}
|
||||
|
||||
std::shared_ptr<IExecutionProviderFactory> CreateExecutionProviderFactory_OpenVINO(
|
||||
const char* device_type, bool enable_vpu_fast_compile, const char* device_id, size_t num_of_threads) {
|
||||
return std::make_shared<onnxruntime::OpenVINOProviderFactory>(device_type, enable_vpu_fast_compile, device_id, num_of_threads);
|
||||
const char* device_type, bool enable_vpu_fast_compile, const char* device_id, size_t num_of_threads, bool use_compiled_network, const char* blob_dump_path) {
|
||||
return std::make_shared<onnxruntime::OpenVINOProviderFactory>(device_type, enable_vpu_fast_compile, device_id, num_of_threads, use_compiled_network, blob_dump_path);
|
||||
}
|
||||
|
||||
} // namespace onnxruntime
|
||||
|
|
@ -50,7 +54,7 @@ struct OpenVINO_Provider : Provider {
|
|||
|
||||
std::shared_ptr<IExecutionProviderFactory> CreateExecutionProviderFactory(const void* void_params) override {
|
||||
auto& params = *reinterpret_cast<const OrtOpenVINOProviderOptions*>(void_params);
|
||||
return std::make_shared<OpenVINOProviderFactory>(params.device_type, params.enable_vpu_fast_compile, params.device_id, params.num_of_threads);
|
||||
return std::make_shared<OpenVINOProviderFactory>(params.device_type, params.enable_vpu_fast_compile, params.device_id, params.num_of_threads, params.use_compiled_network, params.blob_dump_path);
|
||||
}
|
||||
|
||||
void Shutdown() override {
|
||||
|
|
|
|||
|
|
@ -665,6 +665,7 @@ static void RegisterExecutionProviders(InferenceSession* sess, const std::vector
|
|||
#ifdef USE_OPENVINO
|
||||
OrtOpenVINOProviderOptions params;
|
||||
params.device_type = openvino_device_type.c_str();
|
||||
std::string blob_dump_path;
|
||||
|
||||
auto it = provider_options_map.find(type);
|
||||
if (it != provider_options_map.end()) {
|
||||
|
|
@ -681,10 +682,22 @@ static void RegisterExecutionProviders(InferenceSession* sess, const std::vector
|
|||
ORT_THROW("Invalid value passed for enable_vpu_fast_compile: ", option.second);
|
||||
}
|
||||
|
||||
} else if (option.first == "use_compiled_network") {
|
||||
if (option.second == "True") {
|
||||
params.use_compiled_network = true;
|
||||
} else if (option.second == "False") {
|
||||
params.use_compiled_network = false;
|
||||
} else {
|
||||
ORT_THROW("Invalid value passed for use_compiled_network: ", option.second);
|
||||
}
|
||||
|
||||
} else if (option.first == "device_id") {
|
||||
params.device_id = option.second.c_str();
|
||||
} else if (option.first == "num_of_threads") {
|
||||
params.num_of_threads = std::stoi(option.second);
|
||||
} else if (option.first == "blob_dump_path") {
|
||||
blob_dump_path = option.second;
|
||||
params.blob_dump_path = blob_dump_path.c_str();
|
||||
} else {
|
||||
ORT_THROW("Invalid OpenVINO EP option: ", option.first);
|
||||
}
|
||||
|
|
@ -967,7 +980,7 @@ void addGlobalMethods(py::module& m, Environment& env) {
|
|||
onnxruntime::CreateExecutionProviderFactory_Dnnl(1),
|
||||
#endif
|
||||
#ifdef USE_OPENVINO
|
||||
onnxruntime::CreateExecutionProviderFactory_OpenVINO(openvino_device_type, false, "", 8),
|
||||
onnxruntime::CreateExecutionProviderFactory_OpenVINO(openvino_device_type, false, "", 8, false, ""),
|
||||
#endif
|
||||
#ifdef USE_TENSORRT
|
||||
onnxruntime::CreateExecutionProviderFactory_Tensorrt(
|
||||
|
|
|
|||
|
|
@ -60,8 +60,10 @@ namespace perftest {
|
|||
"\t [OpenVINO only] [device_id]: Selects a particular hardware device for inference.\n"
|
||||
"\t [OpenVINO only] [enable_vpu_fast_compile]: Optionally enabled to speeds up the model's compilation on VPU device targets.\n"
|
||||
"\t [OpenVINO only] [num_of_threads]: Overrides the accelerator hardware type and precision with these values at runtime.\n"
|
||||
"\t [OpenVINO only] [use_compiled_network]: Can be enabled to directly import pre-compiled blobs if exists. currently this feature is only supported on MyriadX(VPU) hardware device target.\n"
|
||||
"\t [OpenVINO only] [blob_dump_path]: Explicitly specify the path where you would like to dump and load the blobs for the use_compiled_network(save/load blob) feature. This overrides the default path.\n"
|
||||
"\t [Usage]: -e <provider_name> -i '<key1>|<value1> <key2>|<value2>'\n\n"
|
||||
"\t [Example] [For OpenVINO EP] -e openvino -i 'device_type|CPU_FP32 enable_vpu_fast_compile|true num_of_threads|5'\n"
|
||||
"\t [Example] [For OpenVINO EP] -e openvino -i \"device_type|CPU_FP32 enable_vpu_fast_compile|true num_of_threads|5 use_compiled_network|true blob_dump_path|\"<path>\"\"\n"
|
||||
"\t [TensorRT only] [use_trt_options]: Overrides TensorRT environment variables (if any) with following settings at runtime.\n"
|
||||
"\t [TensorRT only] [trt_max_workspace_size]: Set TensorRT maximum workspace size in byte.\n"
|
||||
"\t [TensorRT only] [trt_fp16_enable]: Enable TensorRT FP16 precision.\n"
|
||||
|
|
|
|||
|
|
@ -162,9 +162,11 @@ OnnxRuntimeTestSession::OnnxRuntimeTestSession(Ort::Env& env, std::random_device
|
|||
} else if (provider_name == onnxruntime::kOpenVINOExecutionProvider) {
|
||||
#ifdef USE_OPENVINO
|
||||
std::string device_type = ""; // [device_type]: Overrides the accelerator hardware type and precision with these values at runtime.
|
||||
bool enable_vpu_fast_compile = false; // [device_id]: Selects a particular hardware device for inference.
|
||||
std::string device_id = ""; // [enable_vpu_fast_compile]: Fast-compile may be optionally enabled to speeds up the model's compilation to VPU device specific format.
|
||||
bool enable_vpu_fast_compile = false; // [enable_vpu_fast_compile]: Fast-compile may be optionally enabled to speeds up the model's compilation to VPU device specific format.
|
||||
std::string device_id = ""; // [device_id]: Selects a particular hardware device for inference.
|
||||
size_t num_of_threads = 8; // [num_of_threads]: Overrides the accelerator default value of number of threads with this value at runtime.
|
||||
bool use_compiled_network = false; // [use_compiled_network]: Can be enabled to directly import pre-compiled blobs if exists.
|
||||
std::string blob_dump_path = ""; // [blob_dump_path]: Explicitly specify the path where you would like to dump and load the blobs for the use_compiled_network(save/load blob) feature. This overrides the default path.
|
||||
|
||||
#ifdef _MSC_VER
|
||||
std::string ov_string = ToMBString(performance_test_config.run_config.ep_runtime_config_string);
|
||||
|
|
@ -203,14 +205,24 @@ OnnxRuntimeTestSession::OnnxRuntimeTestSession(Ort::Env& env, std::random_device
|
|||
} else {
|
||||
ORT_THROW("[ERROR] [OpenVINO] The value for the key 'enable_vpu_fast_compile' should be a boolean i.e. true or false. Default value is false.\n");
|
||||
}
|
||||
} else if (key == "use_compiled_network") {
|
||||
if(value == "true" || value == "True"){
|
||||
use_compiled_network = true;
|
||||
} else if (value == "false" || value == "False") {
|
||||
use_compiled_network = false;
|
||||
} else {
|
||||
ORT_THROW("[ERROR] [OpenVINO] The value for the key 'use_compiled_network' should be a boolean i.e. true or false. Default value is false.\n");
|
||||
}
|
||||
} else if (key == "num_of_threads") {
|
||||
std::stringstream sstream(value);
|
||||
sstream >> num_of_threads;
|
||||
if ((int)num_of_threads <=0) {
|
||||
ORT_THROW("[ERROR] [OpenVINO] The value for the key 'num_of_threads' should be greater than 0\n");
|
||||
}
|
||||
} else if (key == "blob_dump_path") {
|
||||
blob_dump_path = value;
|
||||
} else {
|
||||
ORT_THROW("[ERROR] [OpenVINO] wrong key type entered. Choose from the following runtime key options that are available for OpenVINO. ['device_type', 'device_id', 'enable_vpu_fast_compile', 'num_of_threads'] \n");
|
||||
ORT_THROW("[ERROR] [OpenVINO] wrong key type entered. Choose from the following runtime key options that are available for OpenVINO. ['device_type', 'device_id', 'enable_vpu_fast_compile', 'num_of_threads', 'use_compiled_network', 'blob_dump_path'] \n");
|
||||
}
|
||||
}
|
||||
OrtOpenVINOProviderOptions options;
|
||||
|
|
@ -218,6 +230,8 @@ OnnxRuntimeTestSession::OnnxRuntimeTestSession(Ort::Env& env, std::random_device
|
|||
options.device_id = device_id.c_str(); // To set the device_id
|
||||
options.enable_vpu_fast_compile = enable_vpu_fast_compile; // To enable_vpu_fast_compile, default is false
|
||||
options.num_of_threads = num_of_threads; // To set number of free InferRequests, default is 8
|
||||
options.use_compiled_network = use_compiled_network; // To use_compiled_network, default is false
|
||||
options.blob_dump_path = blob_dump_path.c_str(); // sets the blob_dump_path, default is ""
|
||||
session_options.AppendExecutionProvider_OpenVINO(options);
|
||||
#else
|
||||
ORT_THROW("OpenVINO is not supported in this build\n");
|
||||
|
|
|
|||
|
|
@ -18,7 +18,7 @@
|
|||
namespace onnxruntime {
|
||||
|
||||
std::shared_ptr<IExecutionProviderFactory> CreateExecutionProviderFactory_OpenVINO(
|
||||
const char* device_type, bool enable_vpu_fast_compile, const char* device_id, size_t num_of_threads);
|
||||
const char* device_type, bool enable_vpu_fast_compile, const char* device_id, size_t num_of_threads, bool use_compiled_network, const char* blob_dump_path);
|
||||
|
||||
std::shared_ptr<IExecutionProviderFactory> CreateExecutionProviderFactory_Dnnl(int use_arena);
|
||||
std::shared_ptr<IExecutionProviderFactory> CreateExecutionProviderFactory_OpenVINO(const OrtOpenVINOProviderOptions* params);
|
||||
|
|
|
|||
Loading…
Reference in a new issue