[OpenVINO-EP] Enabling save/Load blob feature (#7054)

* Enabling save/Load blob feature for OpenVINO-EP

Signed-off-by: MaajidKhan <n.maajidkhan@gmail.com>

* Added changes to enhance save/load feature

->This feature applies only for MYRIAD device target
->cleaned up the code and added error checks

Signed-off-by: MaajidKhan <n.maajidkhan@gmail.com>

* Enabled the feature only for MyriadX and only for Linux

Signed-off-by: MaajidKhan <n.maajidkhan@gmail.com>

* Fixed compilation issues on windows

Signed-off-by: MaajidKhan <n.maajidkhan@gmail.com>

* Added changes to fix const subgraph issue

Signed-off-by: MaajidKhan <n.maajidkhan@gmail.com>

* Fixed issues on windows

Signed-off-by: MaajidKhan <n.maajidkhan@gmail.com>

* Added changes for the feature

-> Removed default location dir dump using cmake
-> Enabled saving blob dumps at the executable path
   by default

Signed-off-by: MaajidKhan <n.maajidkhan@gmail.com>

* Made save/load dump path configurable

-> The save/load blob dump path is now also made configurable
using a c/python Api's.

-> Introduced a flag named blob_dump_path

Signed-off-by: MaajidKhan <n.maajidkhan@gmail.com>

* Minor fixes added

Signed-off-by: MaajidKhan <n.maajidkhan@gmail.com>

* Fixed python API issues

Signed-off-by: MaajidKhan <n.maajidkhan@gmail.com>

* Using GetEnvironmentVar to get the path

Signed-off-by: MaajidKhan <n.maajidkhan@gmail.com>

* Fixed python runtime option issue

Signed-off-by: MaajidKhan <n.maajidkhan@gmail.com>

* Fixes import network issue on windows

Signed-off-by: MaajidKhan <n.maajidkhan@gmail.com>
This commit is contained in:
Maajid khan 2021-04-08 09:29:16 +05:30 committed by GitHub
parent def4cc09c7
commit 27e778909d
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
13 changed files with 274 additions and 76 deletions

View file

@ -305,12 +305,14 @@ typedef struct OrtTensorRTProviderOptions {
/// </summary>
typedef struct OrtOpenVINOProviderOptions {
#ifdef __cplusplus
OrtOpenVINOProviderOptions() : device_type{}, enable_vpu_fast_compile{}, device_id{}, num_of_threads{} {}
OrtOpenVINOProviderOptions() : device_type{}, enable_vpu_fast_compile{}, device_id{}, num_of_threads{}, use_compiled_network{}, blob_dump_path{} {}
#endif
const char* device_type; // CPU_FP32, GPU_FP32, GPU_FP16, MYRIAD_FP16, VAD-M_FP16 or VAD-F_FP32
unsigned char enable_vpu_fast_compile; // 0 = false, nonzero = true
const char* device_id;
size_t num_of_threads; // 0 uses default number of threads
unsigned char use_compiled_network; // 0 = false, nonzero = true
const char* blob_dump_path; // path is set to empty by default
} OrtOpenVINOProviderOptions;
struct OrtApi;

View file

@ -23,15 +23,11 @@ namespace backend_utils {
#ifndef NDEBUG
bool IsDebugEnabled() {
#ifdef _WIN32
size_t env_name_len = 0;
char* env_name = nullptr;
bool res = (_dupenv_s(&env_name, &env_name_len, "ORT_OPENVINO_ENABLE_DEBUG") == 0 && env_name != nullptr);
free(env_name);
return res;
#else
return (std::getenv("ORT_OPENVINO_ENABLE_DEBUG") != nullptr);
#endif
const std::string env_name = onnxruntime::GetEnvironmentVar("ORT_OPENVINO_ENABLE_DEBUG");
if (!env_name.empty()) {
return true;
}
return false;
}
void DumpOnnxModelProto(const ONNX_NAMESPACE::ModelProto& model_proto, std::string file_name) {
std::fstream outfile(file_name, std::ios::out | std::ios::trunc | std::ios::binary);
@ -40,6 +36,56 @@ void DumpOnnxModelProto(const ONNX_NAMESPACE::ModelProto& model_proto, std::stri
#endif
bool UseCompiledNetwork() {
const std::string env_name = onnxruntime::GetEnvironmentVar("OV_USE_COMPILED_NETWORK");
if (!env_name.empty()) {
return true;
}
return false;
}
std::string GetCurrentWorkingDir() {
std::string curr_dir;
ORT_UNUSED_PARAMETER(curr_dir);
char buff[FILENAME_MAX];
curr_dir = GetCurrentDir(buff, FILENAME_MAX);
std::string current_working_dir(buff);
return current_working_dir;
}
bool IsDirExists(const std::string& pathname) {
struct stat info;
if(stat(pathname.c_str(), &info) != 0) {
LOGS_DEFAULT(INFO) << log_tag << "cannot access pathname: " << pathname;
return false;
} else if(info.st_mode & S_IFDIR) {
LOGS_DEFAULT(INFO) << log_tag << "pathname exists: " << pathname;
return true;
} else {
LOGS_DEFAULT(INFO) << log_tag << "pathname: " << pathname << ": doesn't contain the directory 'ov_compiled_blobs' ";
}
return false;
}
void CreateDirectory(const std::string& ov_compiled_blobs_dir) {
LOGS_DEFAULT(INFO) << log_tag << "'ov_compiled_blobs' directory doesn't exist at the executable path, so creating one";
#if defined(_WIN32)
if (_mkdir(ov_compiled_blobs_dir.c_str()) == 0) { // Creating a directory
LOGS_DEFAULT(INFO) << log_tag << "created a directory named 'ov_compiled_blobs' at the executable path";
} else {
LOGS_DEFAULT(INFO) << log_tag << "Error creating a directory named 'ov_compiled_blobs' at the executable path";
throw std::runtime_error("Could not create the directory");
}
#else
if (mkdir(ov_compiled_blobs_dir.c_str(), 0777) == 0) { // Creating a directory
LOGS_DEFAULT(INFO) << log_tag << "created a directory named 'ov_compiled_blobs' at the executable path";
} else {
LOGS_DEFAULT(INFO) << log_tag << "Error creating a directory named 'ov_compiled_blobs' at the executable path";
throw std::runtime_error("Could not create the directory");
}
#endif
}
struct static_cast_int64 {
template <typename T1> // T1 models type statically convertible to T
int64_t operator()(const T1& x) const { return static_cast<int64_t>(x); }

View file

@ -10,6 +10,16 @@
#include "contexts.h"
#include <iomanip>
#ifdef _WIN32
#include <direct.h>
#define GetCurrentDir _getcwd
#else
#include <unistd.h>
#define GetCurrentDir getcwd
#endif
#include <sys/stat.h>
namespace onnxruntime {
namespace openvino_ep {
namespace backend_utils {
@ -19,6 +29,14 @@ const std::string log_tag = "[OpenVINO-EP] ";
bool IsDebugEnabled();
#endif
bool UseCompiledNetwork();
std::string GetCurrentWorkingDir();
bool IsDirExists(const std::string& pathname);
void CreateDirectory(const std::string& ov_compiled_blobs_dir);
void SetIODefs(const ONNX_NAMESPACE::ModelProto& model_proto,
std::shared_ptr<InferenceEngine::CNNNetwork> network,
std::unordered_map<std::string, int> output_names,

View file

@ -16,6 +16,7 @@
#include <ngraph/pass/constant_folding.hpp>
#include "basic_backend.h"
#include "../backend_manager.h"
namespace onnxruntime {
namespace openvino_ep {
@ -26,56 +27,135 @@ BasicBackend::BasicBackend(const ONNX_NAMESPACE::ModelProto& model_proto,
GlobalContext& global_context,
const SubGraphContext& subgraph_context)
: global_context_(global_context), subgraph_context_(subgraph_context) {
ie_cnn_network_ = CreateCNNNetwork(model_proto, global_context_, subgraph_context_, const_outputs_map_);
SetIODefs(model_proto, ie_cnn_network_, subgraph_context_.output_names, const_outputs_map_, global_context_.device_type);
InferenceEngine::ExecutableNetwork exe_network;
#if defined(OPENVINO_2020_4) || defined(OPENVINO_2021_1) || defined(OPENVINO_2021_2) || defined(OPENVINO_2021_3)
if (const_outputs_map_.size() == subgraph_context_.output_names.size())
subgraph_context_.is_constant = true;
#endif
// Loading model to the plugin
if (subgraph_context_.is_constant)
return;
std::map<std::string, std::string> config;
#ifndef NDEBUG
if (openvino_ep::backend_utils::IsDebugEnabled()) {
config["PERF_COUNT"] = CONFIG_VALUE(YES);
}
#endif
if (global_context_.device_type.find("MYRIAD") != std::string::npos) {
#if defined(OPENVINO_2021_1) || defined(OPENVINO_2021_2) || defined(OPENVINO_2021_3)
if (subgraph_context_.set_vpu_config) {
config["MYRIAD_DETECT_NETWORK_BATCH"] = CONFIG_VALUE(NO);
}
if (global_context_.enable_vpu_fast_compile) {
config["MYRIAD_HW_INJECT_STAGES"] = CONFIG_VALUE(NO);
config["MYRIAD_COPY_OPTIMIZATION"] = CONFIG_VALUE(NO);
}
#else
if (subgraph_context_.set_vpu_config) {
config["VPU_DETECT_NETWORK_BATCH"] = CONFIG_VALUE(NO);
}
if (global_context_.enable_vpu_fast_compile) {
config["VPU_HW_INJECT_STAGES"] = CONFIG_VALUE(NO);
config["VPU_COPY_OPTIMIZATION"] = CONFIG_VALUE(NO);
}
#endif
}
std::string& hw_target = (global_context_.device_id != "") ? global_context_.device_id : global_context_.device_type;
try {
exe_network = global_context_.ie_core.LoadNetwork(*ie_cnn_network_, hw_target, config);
} catch (const InferenceEngine::details::InferenceEngineException& e) {
ORT_THROW(log_tag + " Exception while Loading Network for graph: " + subgraph_context_.subgraph_name + ": " + e.what());
} catch (...) {
ORT_THROW(log_tag + " Exception while Loading Network for graph " + subgraph_context_.subgraph_name);
}
LOGS_DEFAULT(INFO) << log_tag << "Loaded model to the plugin";
bool vpu_status = false;
bool import_blob_status = false;
std::string model_blob_name;
std::ifstream blob_path;
std::string ov_compiled_blobs_dir = "";
if(hw_target == "MYRIAD" && global_context_.use_compiled_network == true) {
if(!openvino_ep::backend_utils::UseCompiledNetwork()) {
std::size_t model_index = global_context_.onnx_model_path_name.find_last_of("/\\");
std::string model_name= global_context_.onnx_model_path_name.substr(model_index+1);
std::size_t model_extension_index = model_name.find_last_of(".");
if(openvino_ep::BackendManager::GetGlobalContext().is_wholly_supported_graph) {
model_blob_name = global_context_.onnx_model_name + "_" + "op_v_" + std::to_string(global_context_.onnx_opset_version) + "_" + model_name.substr(0,model_extension_index) + "_" + hw_target + "_" + subgraph_context_.subgraph_name + "_ov_" + "fully" + ".blob";
}
else {
model_blob_name = global_context_.onnx_model_name + "_" + "op_v_" + std::to_string(global_context_.onnx_opset_version) + "_" + model_name.substr(0,model_extension_index) + "_" + hw_target + "_" + subgraph_context_.subgraph_name + "_ov_" + "partially" + ".blob";
}
if(global_context_.blob_dump_path == "" || global_context_.blob_dump_path == "\"" || global_context_.blob_dump_path.empty()) {
ov_compiled_blobs_dir = openvino_ep::backend_utils::GetCurrentWorkingDir() + "/ov_compiled_blobs/";
} else {
ov_compiled_blobs_dir = global_context_.blob_dump_path + "/ov_compiled_blobs";
}
if(openvino_ep::backend_utils::IsDirExists(ov_compiled_blobs_dir)) {
LOGS_DEFAULT(INFO) << log_tag << "'ov_compiled_blobs' directory already exists at the executable path";
}
else {
CreateDirectory(ov_compiled_blobs_dir);
}
blob_path.open(ov_compiled_blobs_dir + "/" + model_blob_name);
if (!blob_path.is_open()) {
LOGS_DEFAULT(INFO) << log_tag << "Device specific Compiled blob doesn't exist for this model";
} else {
LOGS_DEFAULT(INFO) << log_tag << "Device specific Compiled blob already exists for this model";
vpu_status = true;
}
}
}
//validate const subgraphs
if(!openvino_ep::BackendManager::GetGlobalContext().is_wholly_supported_graph) {
ie_cnn_network_ = CreateCNNNetwork(model_proto, global_context_, subgraph_context_, const_outputs_map_);
SetIODefs(model_proto, ie_cnn_network_, subgraph_context_.output_names, const_outputs_map_, global_context_.device_type);
#if defined(OPENVINO_2020_4) || defined(OPENVINO_2021_1) || defined(OPENVINO_2021_2) || defined(OPENVINO_2021_3)
if (const_outputs_map_.size() == subgraph_context_.output_names.size())
subgraph_context_.is_constant = true;
#endif
// Loading model to the plugin
if (subgraph_context_.is_constant) {
LOGS_DEFAULT(INFO) << log_tag << "The subgraph is a const. Directly moving to Infer stage.";
return;
}
}
if (vpu_status == true || openvino_ep::backend_utils::UseCompiledNetwork()) {
const std::string model_blob_path = ov_compiled_blobs_dir + "/" + model_blob_name;
const std::string compiled_blob_path = onnxruntime::GetEnvironmentVar("OV_BLOB_PATH");
try {
if(vpu_status == true) {
LOGS_DEFAULT(INFO) << log_tag << "Importing the pre-compiled blob for this model which already exists in the directory 'ov_compiled_blobs'";
exe_network_ = global_context_.ie_core.ImportNetwork(model_blob_path, hw_target, {});
} else {
LOGS_DEFAULT(INFO) << log_tag << "Importing the pre-compiled blob from the path set by the user";
if (compiled_blob_path.empty())
throw std::runtime_error("The compiled blob path is not set");
exe_network_ = global_context_.ie_core.ImportNetwork(compiled_blob_path, hw_target, {});
}
} catch (InferenceEngine::details::InferenceEngineException &e) {
ORT_THROW(log_tag + " Exception while Importing Network for graph: " + subgraph_context_.subgraph_name + ": " + e.what());
} catch(...) {
ORT_THROW(log_tag + " Exception while Importing Network for graph: " + subgraph_context_.subgraph_name);
}
import_blob_status = true;
LOGS_DEFAULT(INFO) << log_tag << "Succesfully Created an executable network from a previously exported network";
}
if ((global_context_.use_compiled_network == true && import_blob_status == false) || vpu_status == false) {
if(!openvino_ep::backend_utils::UseCompiledNetwork()) {
ie_cnn_network_ = CreateCNNNetwork(model_proto, global_context_, subgraph_context_, const_outputs_map_);
SetIODefs(model_proto, ie_cnn_network_, subgraph_context_.output_names, const_outputs_map_, global_context_.device_type);
#if defined(OPENVINO_2020_4) || defined(OPENVINO_2021_1) || defined(OPENVINO_2021_2) || defined(OPENVINO_2021_3)
if (const_outputs_map_.size() == subgraph_context_.output_names.size())
subgraph_context_.is_constant = true;
#endif
// Loading model to the plugin
if (subgraph_context_.is_constant)
return;
std::map<std::string, std::string> config;
#ifndef NDEBUG
if (openvino_ep::backend_utils::IsDebugEnabled()) {
config["PERF_COUNT"] = CONFIG_VALUE(YES);
}
#endif
if (global_context_.device_type.find("MYRIAD") != std::string::npos) {
#if defined(OPENVINO_2021_1) || defined(OPENVINO_2021_2) || defined(OPENVINO_2021_3)
if (subgraph_context_.set_vpu_config) {
config["MYRIAD_DETECT_NETWORK_BATCH"] = CONFIG_VALUE(NO);
}
if (global_context_.enable_vpu_fast_compile) {
config["MYRIAD_HW_INJECT_STAGES"] = CONFIG_VALUE(NO);
config["MYRIAD_COPY_OPTIMIZATION"] = CONFIG_VALUE(NO);
}
#else
if (subgraph_context_.set_vpu_config) {
config["VPU_DETECT_NETWORK_BATCH"] = CONFIG_VALUE(NO);
}
if (global_context_.enable_vpu_fast_compile) {
config["VPU_HW_INJECT_STAGES"] = CONFIG_VALUE(NO);
config["VPU_COPY_OPTIMIZATION"] = CONFIG_VALUE(NO);
}
#endif
}
try {
exe_network_ = global_context_.ie_core.LoadNetwork(*ie_cnn_network_, hw_target, config);
} catch (const InferenceEngine::details::InferenceEngineException& e) {
ORT_THROW(log_tag + " Exception while Loading Network for graph: " + subgraph_context_.subgraph_name + ": " + e.what());
} catch (...) {
ORT_THROW(log_tag + " Exception while Loading Network for graph " + subgraph_context_.subgraph_name);
}
LOGS_DEFAULT(INFO) << log_tag << "Loaded model to the plugin";
if(global_context_.use_compiled_network && hw_target == "MYRIAD") {
LOGS_DEFAULT(INFO) << log_tag << "Dumping the compiled blob for this model into the directory 'ov_compiled_blobs'";
std::ofstream compiled_blob_dump{ov_compiled_blobs_dir + "/" + model_blob_name};
exe_network_.Export(compiled_blob_dump);
}
}
}
//The infer_requests_ pool will be intialized with a default value of 8 infer_request's
//The nireq value can also be configured to any num_of_threads during runtime
size_t nireq = global_context_.num_of_threads;
@ -85,13 +165,13 @@ BasicBackend::BasicBackend(const ONNX_NAMESPACE::ModelProto& model_proto,
std::cout << "The value of nireq being used is: " << nireq << std::endl;
}
#endif
inferRequestsQueue_ = std::unique_ptr<InferRequestsQueue>(new InferRequestsQueue(exe_network, nireq));
inferRequestsQueue_ = std::unique_ptr<InferRequestsQueue>(new InferRequestsQueue(exe_network_, nireq));
}
// Starts an asynchronous inference request for data in slice indexed by batch_slice_idx on
// an Infer Request indexed by infer_req_idx
void BasicBackend::StartAsyncInference(Ort::CustomOpApi& ort, OrtKernelContext* context, std::shared_ptr<InferenceEngine::InferRequest> infer_request) {
auto graph_input_info = ie_cnn_network_->getInputsInfo();
auto graph_input_info = exe_network_.GetInputsInfo();
size_t index = 0;
for (auto input_info_iter = graph_input_info.begin();
@ -132,7 +212,7 @@ void BasicBackend::CompleteAsyncInference(Ort::CustomOpApi& ort, OrtKernelContex
} catch (...) {
ORT_THROW(log_tag + " Exception with completing Inference");
}
auto graph_output_info = ie_cnn_network_->getOutputsInfo();
auto graph_output_info = exe_network_.GetOutputsInfo();
for (auto output_info_iter = graph_output_info.begin();
output_info_iter != graph_output_info.end(); ++output_info_iter) {

View file

@ -37,6 +37,7 @@ class BasicBackend : public IBackend {
SubGraphContext subgraph_context_;
mutable std::mutex compute_lock_;
std::shared_ptr<InferenceEngine::CNNNetwork> ie_cnn_network_;
InferenceEngine::ExecutableNetwork exe_network_;
std::map<std::string, std::shared_ptr<ngraph::Node>> const_outputs_map_;
std::unique_ptr<InferRequestsQueue> inferRequestsQueue_;
};

View file

@ -13,12 +13,17 @@ struct GlobalContext {
InferenceEngine::Core ie_core;
bool is_wholly_supported_graph = false;
bool enable_vpu_fast_compile = false;
bool use_compiled_network = false;
size_t num_of_threads;
std::string device_type;
std::string precision_str;
std::string device_id;
std::string blob_dump_path;
std::vector<bool> deviceAvailableList = {true, true, true, true, true, true, true, true};
std::vector<std::string> deviceTags = {"0", "1", "2", "3", "4", "5", "6", "7"};
std::string onnx_model_name;
std::string onnx_model_path_name;
int onnx_opset_version;
};
// Holds context specific to subgraph.

View file

@ -18,6 +18,9 @@ OpenVINOExecutionProvider::OpenVINOExecutionProvider(const OpenVINOExecutionProv
openvino_ep::BackendManager::GetGlobalContext().device_type = info.device_type_;
openvino_ep::BackendManager::GetGlobalContext().precision_str = info.precision_;
openvino_ep::BackendManager::GetGlobalContext().enable_vpu_fast_compile = info.enable_vpu_fast_compile_;
openvino_ep::BackendManager::GetGlobalContext().use_compiled_network = info.use_compiled_network_;
openvino_ep::BackendManager::GetGlobalContext().blob_dump_path = info.blob_dump_path_;
if ((int)info.num_of_threads_ <= 0) {
openvino_ep::BackendManager::GetGlobalContext().num_of_threads = 8;
} else {
@ -55,6 +58,14 @@ OpenVINOExecutionProvider::GetCapability(const GraphViewer& graph_viewer, const
ORT_UNUSED_PARAMETER(kernel_registries);
std::vector<std::unique_ptr<ComputeCapability>> result;
openvino_ep::BackendManager::GetGlobalContext().onnx_model_name = graph_viewer.Name();
#ifdef _WIN32
std::wstring onnx_path = graph_viewer.ModelPath().ToPathString();
openvino_ep::BackendManager::GetGlobalContext().onnx_model_path_name = std::string(onnx_path.begin(), onnx_path.end());
#else
openvino_ep::BackendManager::GetGlobalContext().onnx_model_path_name = graph_viewer.ModelPath().ToPathString();
#endif
openvino_ep::BackendManager::GetGlobalContext().onnx_opset_version = graph_viewer.DomainToVersionMap().at(kOnnxDomain);
#if defined OPENVINO_2020_3
result = openvino_ep::GetCapability_2020_3(graph_viewer,

View file

@ -55,9 +55,11 @@ struct OpenVINOExecutionProviderInfo {
bool enable_vpu_fast_compile_;
std::string device_id_;
size_t num_of_threads_;
bool use_compiled_network_;
std::string blob_dump_path_;
explicit OpenVINOExecutionProviderInfo(std::string dev_type, bool enable_vpu_fast_compile, std::string dev_id, size_t num_of_threads)
: enable_vpu_fast_compile_(enable_vpu_fast_compile), device_id_(dev_id), num_of_threads_(num_of_threads) {
explicit OpenVINOExecutionProviderInfo(std::string dev_type, bool enable_vpu_fast_compile, std::string dev_id, size_t num_of_threads, bool use_compiled_network, std::string blob_dump_path)
: enable_vpu_fast_compile_(enable_vpu_fast_compile), device_id_(dev_id), num_of_threads_(num_of_threads), use_compiled_network_(use_compiled_network), blob_dump_path_(blob_dump_path) {
if (dev_type == "") {
LOGS_DEFAULT(INFO) << "[OpenVINO-EP]"
<< "No runtime device selection option provided.";
@ -125,7 +127,7 @@ struct OpenVINOExecutionProviderInfo {
<< "Choosing Device: " << device_type_ << " , Precision: " << precision_;
}
OpenVINOExecutionProviderInfo() {
OpenVINOExecutionProviderInfo("", false, "", 0);
OpenVINOExecutionProviderInfo("", false, "", 0, false,"");
}
};

View file

@ -8,10 +8,12 @@
namespace onnxruntime {
struct OpenVINOProviderFactory : IExecutionProviderFactory {
OpenVINOProviderFactory(const char* device_type, bool enable_vpu_fast_compile,
const char* device_id, size_t num_of_threads)
: enable_vpu_fast_compile_(enable_vpu_fast_compile), num_of_threads_(num_of_threads) {
const char* device_id, size_t num_of_threads,
bool use_compiled_network, const char* blob_dump_path)
: enable_vpu_fast_compile_(enable_vpu_fast_compile), num_of_threads_(num_of_threads), use_compiled_network_(use_compiled_network) {
device_type_ = (device_type == nullptr) ? "" : device_type;
device_id_ = (device_id == nullptr) ? "" : device_id;
blob_dump_path_ = (blob_dump_path == nullptr) ? "" : blob_dump_path;
}
~OpenVINOProviderFactory() override {
}
@ -23,16 +25,18 @@ struct OpenVINOProviderFactory : IExecutionProviderFactory {
bool enable_vpu_fast_compile_;
std::string device_id_;
size_t num_of_threads_;
bool use_compiled_network_;
std::string blob_dump_path_;
};
std::unique_ptr<IExecutionProvider> OpenVINOProviderFactory::CreateProvider() {
OpenVINOExecutionProviderInfo info(device_type_, enable_vpu_fast_compile_, device_id_, num_of_threads_);
OpenVINOExecutionProviderInfo info(device_type_, enable_vpu_fast_compile_, device_id_, num_of_threads_, use_compiled_network_, blob_dump_path_);
return std::make_unique<OpenVINOExecutionProvider>(info);
}
std::shared_ptr<IExecutionProviderFactory> CreateExecutionProviderFactory_OpenVINO(
const char* device_type, bool enable_vpu_fast_compile, const char* device_id, size_t num_of_threads) {
return std::make_shared<onnxruntime::OpenVINOProviderFactory>(device_type, enable_vpu_fast_compile, device_id, num_of_threads);
const char* device_type, bool enable_vpu_fast_compile, const char* device_id, size_t num_of_threads, bool use_compiled_network, const char* blob_dump_path) {
return std::make_shared<onnxruntime::OpenVINOProviderFactory>(device_type, enable_vpu_fast_compile, device_id, num_of_threads, use_compiled_network, blob_dump_path);
}
} // namespace onnxruntime
@ -50,7 +54,7 @@ struct OpenVINO_Provider : Provider {
std::shared_ptr<IExecutionProviderFactory> CreateExecutionProviderFactory(const void* void_params) override {
auto& params = *reinterpret_cast<const OrtOpenVINOProviderOptions*>(void_params);
return std::make_shared<OpenVINOProviderFactory>(params.device_type, params.enable_vpu_fast_compile, params.device_id, params.num_of_threads);
return std::make_shared<OpenVINOProviderFactory>(params.device_type, params.enable_vpu_fast_compile, params.device_id, params.num_of_threads, params.use_compiled_network, params.blob_dump_path);
}
void Shutdown() override {

View file

@ -665,6 +665,7 @@ static void RegisterExecutionProviders(InferenceSession* sess, const std::vector
#ifdef USE_OPENVINO
OrtOpenVINOProviderOptions params;
params.device_type = openvino_device_type.c_str();
std::string blob_dump_path;
auto it = provider_options_map.find(type);
if (it != provider_options_map.end()) {
@ -681,10 +682,22 @@ static void RegisterExecutionProviders(InferenceSession* sess, const std::vector
ORT_THROW("Invalid value passed for enable_vpu_fast_compile: ", option.second);
}
} else if (option.first == "use_compiled_network") {
if (option.second == "True") {
params.use_compiled_network = true;
} else if (option.second == "False") {
params.use_compiled_network = false;
} else {
ORT_THROW("Invalid value passed for use_compiled_network: ", option.second);
}
} else if (option.first == "device_id") {
params.device_id = option.second.c_str();
} else if (option.first == "num_of_threads") {
params.num_of_threads = std::stoi(option.second);
} else if (option.first == "blob_dump_path") {
blob_dump_path = option.second;
params.blob_dump_path = blob_dump_path.c_str();
} else {
ORT_THROW("Invalid OpenVINO EP option: ", option.first);
}
@ -967,7 +980,7 @@ void addGlobalMethods(py::module& m, Environment& env) {
onnxruntime::CreateExecutionProviderFactory_Dnnl(1),
#endif
#ifdef USE_OPENVINO
onnxruntime::CreateExecutionProviderFactory_OpenVINO(openvino_device_type, false, "", 8),
onnxruntime::CreateExecutionProviderFactory_OpenVINO(openvino_device_type, false, "", 8, false, ""),
#endif
#ifdef USE_TENSORRT
onnxruntime::CreateExecutionProviderFactory_Tensorrt(

View file

@ -60,8 +60,10 @@ namespace perftest {
"\t [OpenVINO only] [device_id]: Selects a particular hardware device for inference.\n"
"\t [OpenVINO only] [enable_vpu_fast_compile]: Optionally enabled to speeds up the model's compilation on VPU device targets.\n"
"\t [OpenVINO only] [num_of_threads]: Overrides the accelerator hardware type and precision with these values at runtime.\n"
"\t [OpenVINO only] [use_compiled_network]: Can be enabled to directly import pre-compiled blobs if exists. currently this feature is only supported on MyriadX(VPU) hardware device target.\n"
"\t [OpenVINO only] [blob_dump_path]: Explicitly specify the path where you would like to dump and load the blobs for the use_compiled_network(save/load blob) feature. This overrides the default path.\n"
"\t [Usage]: -e <provider_name> -i '<key1>|<value1> <key2>|<value2>'\n\n"
"\t [Example] [For OpenVINO EP] -e openvino -i 'device_type|CPU_FP32 enable_vpu_fast_compile|true num_of_threads|5'\n"
"\t [Example] [For OpenVINO EP] -e openvino -i \"device_type|CPU_FP32 enable_vpu_fast_compile|true num_of_threads|5 use_compiled_network|true blob_dump_path|\"<path>\"\"\n"
"\t [TensorRT only] [use_trt_options]: Overrides TensorRT environment variables (if any) with following settings at runtime.\n"
"\t [TensorRT only] [trt_max_workspace_size]: Set TensorRT maximum workspace size in byte.\n"
"\t [TensorRT only] [trt_fp16_enable]: Enable TensorRT FP16 precision.\n"

View file

@ -162,9 +162,11 @@ OnnxRuntimeTestSession::OnnxRuntimeTestSession(Ort::Env& env, std::random_device
} else if (provider_name == onnxruntime::kOpenVINOExecutionProvider) {
#ifdef USE_OPENVINO
std::string device_type = ""; // [device_type]: Overrides the accelerator hardware type and precision with these values at runtime.
bool enable_vpu_fast_compile = false; // [device_id]: Selects a particular hardware device for inference.
std::string device_id = ""; // [enable_vpu_fast_compile]: Fast-compile may be optionally enabled to speeds up the model's compilation to VPU device specific format.
bool enable_vpu_fast_compile = false; // [enable_vpu_fast_compile]: Fast-compile may be optionally enabled to speeds up the model's compilation to VPU device specific format.
std::string device_id = ""; // [device_id]: Selects a particular hardware device for inference.
size_t num_of_threads = 8; // [num_of_threads]: Overrides the accelerator default value of number of threads with this value at runtime.
bool use_compiled_network = false; // [use_compiled_network]: Can be enabled to directly import pre-compiled blobs if exists.
std::string blob_dump_path = ""; // [blob_dump_path]: Explicitly specify the path where you would like to dump and load the blobs for the use_compiled_network(save/load blob) feature. This overrides the default path.
#ifdef _MSC_VER
std::string ov_string = ToMBString(performance_test_config.run_config.ep_runtime_config_string);
@ -203,14 +205,24 @@ OnnxRuntimeTestSession::OnnxRuntimeTestSession(Ort::Env& env, std::random_device
} else {
ORT_THROW("[ERROR] [OpenVINO] The value for the key 'enable_vpu_fast_compile' should be a boolean i.e. true or false. Default value is false.\n");
}
} else if (key == "use_compiled_network") {
if(value == "true" || value == "True"){
use_compiled_network = true;
} else if (value == "false" || value == "False") {
use_compiled_network = false;
} else {
ORT_THROW("[ERROR] [OpenVINO] The value for the key 'use_compiled_network' should be a boolean i.e. true or false. Default value is false.\n");
}
} else if (key == "num_of_threads") {
std::stringstream sstream(value);
sstream >> num_of_threads;
if ((int)num_of_threads <=0) {
ORT_THROW("[ERROR] [OpenVINO] The value for the key 'num_of_threads' should be greater than 0\n");
}
} else if (key == "blob_dump_path") {
blob_dump_path = value;
} else {
ORT_THROW("[ERROR] [OpenVINO] wrong key type entered. Choose from the following runtime key options that are available for OpenVINO. ['device_type', 'device_id', 'enable_vpu_fast_compile', 'num_of_threads'] \n");
ORT_THROW("[ERROR] [OpenVINO] wrong key type entered. Choose from the following runtime key options that are available for OpenVINO. ['device_type', 'device_id', 'enable_vpu_fast_compile', 'num_of_threads', 'use_compiled_network', 'blob_dump_path'] \n");
}
}
OrtOpenVINOProviderOptions options;
@ -218,6 +230,8 @@ OnnxRuntimeTestSession::OnnxRuntimeTestSession(Ort::Env& env, std::random_device
options.device_id = device_id.c_str(); // To set the device_id
options.enable_vpu_fast_compile = enable_vpu_fast_compile; // To enable_vpu_fast_compile, default is false
options.num_of_threads = num_of_threads; // To set number of free InferRequests, default is 8
options.use_compiled_network = use_compiled_network; // To use_compiled_network, default is false
options.blob_dump_path = blob_dump_path.c_str(); // sets the blob_dump_path, default is ""
session_options.AppendExecutionProvider_OpenVINO(options);
#else
ORT_THROW("OpenVINO is not supported in this build\n");

View file

@ -18,7 +18,7 @@
namespace onnxruntime {
std::shared_ptr<IExecutionProviderFactory> CreateExecutionProviderFactory_OpenVINO(
const char* device_type, bool enable_vpu_fast_compile, const char* device_id, size_t num_of_threads);
const char* device_type, bool enable_vpu_fast_compile, const char* device_id, size_t num_of_threads, bool use_compiled_network, const char* blob_dump_path);
std::shared_ptr<IExecutionProviderFactory> CreateExecutionProviderFactory_Dnnl(int use_arena);
std::shared_ptr<IExecutionProviderFactory> CreateExecutionProviderFactory_OpenVINO(const OrtOpenVINOProviderOptions* params);