[OpenVINO-EP] Enabling save/Load blob feature (#7054)

* Enabling save/Load blob feature for OpenVINO-EP Signed-off-by: MaajidKhan <n.maajidkhan@gmail.com> * Added changes to enhance save/load feature ->This feature applies only for MYRIAD device target ->cleaned up the code and added error checks Signed-off-by: MaajidKhan <n.maajidkhan@gmail.com> * Enabled the feature only for MyriadX and only for Linux Signed-off-by: MaajidKhan <n.maajidkhan@gmail.com> * Fixed compilation issues on windows Signed-off-by: MaajidKhan <n.maajidkhan@gmail.com> * Added changes to fix const subgraph issue Signed-off-by: MaajidKhan <n.maajidkhan@gmail.com> * Fixed issues on windows Signed-off-by: MaajidKhan <n.maajidkhan@gmail.com> * Added changes for the feature -> Removed default location dir dump using cmake -> Enabled saving blob dumps at the executable path by default Signed-off-by: MaajidKhan <n.maajidkhan@gmail.com> * Made save/load dump path configurable -> The save/load blob dump path is now also made configurable using a c/python Api's. -> Introduced a flag named blob_dump_path Signed-off-by: MaajidKhan <n.maajidkhan@gmail.com> * Minor fixes added Signed-off-by: MaajidKhan <n.maajidkhan@gmail.com> * Fixed python API issues Signed-off-by: MaajidKhan <n.maajidkhan@gmail.com> * Using GetEnvironmentVar to get the path Signed-off-by: MaajidKhan <n.maajidkhan@gmail.com> * Fixed python runtime option issue Signed-off-by: MaajidKhan <n.maajidkhan@gmail.com> * Fixes import network issue on windows Signed-off-by: MaajidKhan <n.maajidkhan@gmail.com>
2026-05-15 20:50:42 +00:00 · 2021-04-08 09:29:16 +05:30 · 2021-04-08 09:29:16 +05:30 · 27e778909d
commit 27e778909d
parent def4cc09c7
13 changed files with 274 additions and 76 deletions
--- a/include/onnxruntime/core/session/onnxruntime_c_api.h
+++ b/include/onnxruntime/core/session/onnxruntime_c_api.h
@ -305,12 +305,14 @@ typedef struct OrtTensorRTProviderOptions {
 /// </summary>
 typedef struct OrtOpenVINOProviderOptions {
 #ifdef __cplusplus
-  OrtOpenVINOProviderOptions() : device_type{}, enable_vpu_fast_compile{}, device_id{}, num_of_threads{} {}
+  OrtOpenVINOProviderOptions() : device_type{}, enable_vpu_fast_compile{}, device_id{}, num_of_threads{}, use_compiled_network{}, blob_dump_path{} {}
 #endif
  const char* device_type;                // CPU_FP32, GPU_FP32, GPU_FP16, MYRIAD_FP16, VAD-M_FP16 or VAD-F_FP32
  unsigned char enable_vpu_fast_compile;  // 0 = false, nonzero = true
  const char* device_id;
  size_t num_of_threads;  // 0 uses default number of threads
+  unsigned char use_compiled_network; // 0 = false, nonzero = true
+  const char* blob_dump_path; // path is set to empty by default
 } OrtOpenVINOProviderOptions;

 struct OrtApi;
--- a/onnxruntime/core/providers/openvino/backend_utils.cc
+++ b/onnxruntime/core/providers/openvino/backend_utils.cc
@ -23,15 +23,11 @@ namespace backend_utils {

 #ifndef NDEBUG
 bool IsDebugEnabled() {
-#ifdef _WIN32
-  size_t env_name_len = 0;
-  char* env_name = nullptr;
-  bool res = (_dupenv_s(&env_name, &env_name_len, "ORT_OPENVINO_ENABLE_DEBUG") == 0 && env_name != nullptr);
-  free(env_name);
-  return res;
-#else
-  return (std::getenv("ORT_OPENVINO_ENABLE_DEBUG") != nullptr);
-#endif
+  const std::string env_name = onnxruntime::GetEnvironmentVar("ORT_OPENVINO_ENABLE_DEBUG");
+  if (!env_name.empty()) {
+    return true;
+  }
+  return false;
 }
 void DumpOnnxModelProto(const ONNX_NAMESPACE::ModelProto& model_proto, std::string file_name) {
  std::fstream outfile(file_name, std::ios::out | std::ios::trunc | std::ios::binary);
@ -40,6 +36,56 @@ void DumpOnnxModelProto(const ONNX_NAMESPACE::ModelProto& model_proto, std::stri

 #endif

+bool UseCompiledNetwork() {
+  const std::string env_name = onnxruntime::GetEnvironmentVar("OV_USE_COMPILED_NETWORK");
+  if (!env_name.empty()) {
+    return true;
+  }
+  return false;
+}
+
+std::string GetCurrentWorkingDir() {
+  std::string curr_dir;
+  ORT_UNUSED_PARAMETER(curr_dir);
+  char buff[FILENAME_MAX];
+  curr_dir = GetCurrentDir(buff, FILENAME_MAX);
+  std::string current_working_dir(buff);
+  return current_working_dir;
+}
+
+bool IsDirExists(const std::string& pathname) {
+  struct stat info;
+  if(stat(pathname.c_str(), &info) != 0) {
+    LOGS_DEFAULT(INFO) << log_tag << "cannot access pathname: " << pathname;
+	  return false;
+  } else if(info.st_mode & S_IFDIR) {
+      LOGS_DEFAULT(INFO) << log_tag << "pathname exists: " << pathname;
+	    return true;
+  } else {
+      LOGS_DEFAULT(INFO) << log_tag << "pathname: " << pathname << ": doesn't contain the directory 'ov_compiled_blobs' ";
+  }
+  return false;
+}
+
+void CreateDirectory(const std::string& ov_compiled_blobs_dir) {
+  LOGS_DEFAULT(INFO) << log_tag << "'ov_compiled_blobs' directory doesn't exist at the executable path, so creating one";
+#if defined(_WIN32)
+  if (_mkdir(ov_compiled_blobs_dir.c_str()) == 0) { // Creating a directory 
+	  LOGS_DEFAULT(INFO) << log_tag << "created a directory named 'ov_compiled_blobs' at the executable path";
+  } else {
+    LOGS_DEFAULT(INFO) << log_tag << "Error creating a directory named 'ov_compiled_blobs' at the executable path";
+    throw std::runtime_error("Could not create the directory");
+  }
+#else
+  if (mkdir(ov_compiled_blobs_dir.c_str(), 0777) == 0) { // Creating a directory
+    LOGS_DEFAULT(INFO) << log_tag << "created a directory named 'ov_compiled_blobs' at the executable path";
+  } else {
+    LOGS_DEFAULT(INFO) << log_tag << "Error creating a directory named 'ov_compiled_blobs' at the executable path";
+    throw std::runtime_error("Could not create the directory");
+  }
+#endif
+}
+
 struct static_cast_int64 {
  template <typename T1>  // T1 models type statically convertible to T
  int64_t operator()(const T1& x) const { return static_cast<int64_t>(x); }
--- a/onnxruntime/core/providers/openvino/backend_utils.h
+++ b/onnxruntime/core/providers/openvino/backend_utils.h
@ -10,6 +10,16 @@
 #include "contexts.h"
 #include <iomanip>

+#ifdef _WIN32
+#include <direct.h>
+#define GetCurrentDir _getcwd
+#else
+#include <unistd.h>
+#define GetCurrentDir getcwd
+#endif
+
+#include <sys/stat.h>
+
 namespace onnxruntime {
 namespace openvino_ep {
 namespace backend_utils {
@ -19,6 +29,14 @@ const std::string log_tag = "[OpenVINO-EP] ";
 bool IsDebugEnabled();
 #endif

+bool UseCompiledNetwork();
+
+std::string GetCurrentWorkingDir();
+
+bool IsDirExists(const std::string& pathname);
+
+void CreateDirectory(const std::string& ov_compiled_blobs_dir);
+
 void SetIODefs(const ONNX_NAMESPACE::ModelProto& model_proto,
               std::shared_ptr<InferenceEngine::CNNNetwork> network,
               std::unordered_map<std::string, int> output_names,
--- a/onnxruntime/core/providers/openvino/backends/basic_backend.cc
+++ b/onnxruntime/core/providers/openvino/backends/basic_backend.cc
@ -16,6 +16,7 @@
 #include <ngraph/pass/constant_folding.hpp>

 #include "basic_backend.h"
+#include "../backend_manager.h"

 namespace onnxruntime {
 namespace openvino_ep {
@ -26,56 +27,135 @@ BasicBackend::BasicBackend(const ONNX_NAMESPACE::ModelProto& model_proto,
                           GlobalContext& global_context,
                           const SubGraphContext& subgraph_context)
    : global_context_(global_context), subgraph_context_(subgraph_context) {
-  ie_cnn_network_ = CreateCNNNetwork(model_proto, global_context_, subgraph_context_, const_outputs_map_);
-  SetIODefs(model_proto, ie_cnn_network_, subgraph_context_.output_names, const_outputs_map_, global_context_.device_type);
-
-  InferenceEngine::ExecutableNetwork exe_network;
-
-#if defined(OPENVINO_2020_4) || defined(OPENVINO_2021_1) || defined(OPENVINO_2021_2) || defined(OPENVINO_2021_3)
-  if (const_outputs_map_.size() == subgraph_context_.output_names.size())
-    subgraph_context_.is_constant = true;
-#endif
-
-  // Loading model to the plugin
-  if (subgraph_context_.is_constant)
-    return;
-  std::map<std::string, std::string> config;
-#ifndef NDEBUG
-  if (openvino_ep::backend_utils::IsDebugEnabled()) {
-    config["PERF_COUNT"] = CONFIG_VALUE(YES);
-  }
-#endif
-  if (global_context_.device_type.find("MYRIAD") != std::string::npos) {
-#if defined(OPENVINO_2021_1) || defined(OPENVINO_2021_2) || defined(OPENVINO_2021_3)
-    if (subgraph_context_.set_vpu_config) {
-      config["MYRIAD_DETECT_NETWORK_BATCH"] = CONFIG_VALUE(NO);
-    }
-
-    if (global_context_.enable_vpu_fast_compile) {
-      config["MYRIAD_HW_INJECT_STAGES"] = CONFIG_VALUE(NO);
-      config["MYRIAD_COPY_OPTIMIZATION"] = CONFIG_VALUE(NO);
-    }
-#else
-    if (subgraph_context_.set_vpu_config) {
-      config["VPU_DETECT_NETWORK_BATCH"] = CONFIG_VALUE(NO);
-    }
-
-    if (global_context_.enable_vpu_fast_compile) {
-      config["VPU_HW_INJECT_STAGES"] = CONFIG_VALUE(NO);
-      config["VPU_COPY_OPTIMIZATION"] = CONFIG_VALUE(NO);
-    }
-#endif
-  }
  std::string& hw_target = (global_context_.device_id != "") ? global_context_.device_id : global_context_.device_type;
-  try {
-    exe_network = global_context_.ie_core.LoadNetwork(*ie_cnn_network_, hw_target, config);
-  } catch (const InferenceEngine::details::InferenceEngineException& e) {
-    ORT_THROW(log_tag + " Exception while Loading Network for graph: " + subgraph_context_.subgraph_name + ": " + e.what());
-  } catch (...) {
-    ORT_THROW(log_tag + " Exception while Loading Network for graph " + subgraph_context_.subgraph_name);
-  }
-  LOGS_DEFAULT(INFO) << log_tag << "Loaded model to the plugin";
+  bool vpu_status = false;
+  bool import_blob_status = false;
+  std::string model_blob_name;
+  std::ifstream blob_path;
+  std::string ov_compiled_blobs_dir = "";

+  if(hw_target == "MYRIAD" && global_context_.use_compiled_network == true) {
+    if(!openvino_ep::backend_utils::UseCompiledNetwork()) {
+      std::size_t model_index = global_context_.onnx_model_path_name.find_last_of("/\\");
+      std::string model_name= global_context_.onnx_model_path_name.substr(model_index+1);
+      std::size_t model_extension_index = model_name.find_last_of(".");
+      if(openvino_ep::BackendManager::GetGlobalContext().is_wholly_supported_graph) {
+          model_blob_name = global_context_.onnx_model_name + "_" + "op_v_" + std::to_string(global_context_.onnx_opset_version) + "_" + model_name.substr(0,model_extension_index) + "_" + hw_target + "_" + subgraph_context_.subgraph_name + "_ov_" + "fully" + ".blob";
+      }
+      else {
+          model_blob_name = global_context_.onnx_model_name + "_" + "op_v_" + std::to_string(global_context_.onnx_opset_version) + "_" + model_name.substr(0,model_extension_index) + "_" + hw_target + "_" + subgraph_context_.subgraph_name + "_ov_" + "partially" + ".blob";
+      }
+      if(global_context_.blob_dump_path == "" || global_context_.blob_dump_path == "\"" || global_context_.blob_dump_path.empty()) {
+        ov_compiled_blobs_dir = openvino_ep::backend_utils::GetCurrentWorkingDir() + "/ov_compiled_blobs/";
+      } else {
+        ov_compiled_blobs_dir = global_context_.blob_dump_path + "/ov_compiled_blobs";
+      }
+      if(openvino_ep::backend_utils::IsDirExists(ov_compiled_blobs_dir)) {
+        LOGS_DEFAULT(INFO) << log_tag << "'ov_compiled_blobs' directory already exists at the executable path";
+      }
+      else {
+        CreateDirectory(ov_compiled_blobs_dir);
+      }
+      blob_path.open(ov_compiled_blobs_dir + "/" + model_blob_name);
+      if (!blob_path.is_open()) {
+          LOGS_DEFAULT(INFO) << log_tag << "Device specific Compiled blob doesn't exist for this model";
+      } else {
+          LOGS_DEFAULT(INFO) << log_tag << "Device specific Compiled blob already exists for this model";
+          vpu_status = true;
+      }
+    }
+  }
+
+  //validate const subgraphs
+  if(!openvino_ep::BackendManager::GetGlobalContext().is_wholly_supported_graph) {
+    ie_cnn_network_ = CreateCNNNetwork(model_proto, global_context_, subgraph_context_, const_outputs_map_);
+    SetIODefs(model_proto, ie_cnn_network_, subgraph_context_.output_names, const_outputs_map_, global_context_.device_type);
+  #if defined(OPENVINO_2020_4) || defined(OPENVINO_2021_1) || defined(OPENVINO_2021_2) || defined(OPENVINO_2021_3)
+    if (const_outputs_map_.size() == subgraph_context_.output_names.size())
+      subgraph_context_.is_constant = true;
+  #endif
+
+    // Loading model to the plugin
+    if (subgraph_context_.is_constant) {
+      LOGS_DEFAULT(INFO) << log_tag << "The subgraph is a const. Directly moving to Infer stage.";
+      return;
+    }
+  }
+
+  if (vpu_status == true || openvino_ep::backend_utils::UseCompiledNetwork()) {
+    const std::string model_blob_path = ov_compiled_blobs_dir + "/" + model_blob_name;
+    const std::string compiled_blob_path = onnxruntime::GetEnvironmentVar("OV_BLOB_PATH");
+    try {
+      if(vpu_status == true) {
+        LOGS_DEFAULT(INFO) << log_tag << "Importing the pre-compiled blob for this model which already exists in the directory 'ov_compiled_blobs'";
+        exe_network_ = global_context_.ie_core.ImportNetwork(model_blob_path, hw_target, {});
+      } else {
+        LOGS_DEFAULT(INFO) << log_tag << "Importing the pre-compiled blob from the path set by the user";
+        if (compiled_blob_path.empty())
+          throw std::runtime_error("The compiled blob path is not set");
+        exe_network_ = global_context_.ie_core.ImportNetwork(compiled_blob_path, hw_target, {});
+      }
+    } catch (InferenceEngine::details::InferenceEngineException &e) {
+      ORT_THROW(log_tag + " Exception while Importing Network for graph: " + subgraph_context_.subgraph_name + ": " + e.what());
+    } catch(...) {
+      ORT_THROW(log_tag + " Exception while Importing Network for graph: " + subgraph_context_.subgraph_name);
+    }
+    import_blob_status = true;
+    LOGS_DEFAULT(INFO) << log_tag << "Succesfully Created an executable network from a previously exported network";
+  }
+
+  if ((global_context_.use_compiled_network == true && import_blob_status == false) || vpu_status == false) {
+    if(!openvino_ep::backend_utils::UseCompiledNetwork()) {
+      ie_cnn_network_ = CreateCNNNetwork(model_proto, global_context_, subgraph_context_, const_outputs_map_);
+      SetIODefs(model_proto, ie_cnn_network_, subgraph_context_.output_names, const_outputs_map_, global_context_.device_type);
+    #if defined(OPENVINO_2020_4) || defined(OPENVINO_2021_1) || defined(OPENVINO_2021_2) || defined(OPENVINO_2021_3)
+      if (const_outputs_map_.size() == subgraph_context_.output_names.size())
+        subgraph_context_.is_constant = true;
+    #endif
+
+      // Loading model to the plugin
+      if (subgraph_context_.is_constant)
+        return;
+      std::map<std::string, std::string> config;
+    #ifndef NDEBUG
+      if (openvino_ep::backend_utils::IsDebugEnabled()) {
+        config["PERF_COUNT"] = CONFIG_VALUE(YES);
+      }
+    #endif
+      if (global_context_.device_type.find("MYRIAD") != std::string::npos) {
+    #if defined(OPENVINO_2021_1) || defined(OPENVINO_2021_2) || defined(OPENVINO_2021_3)
+        if (subgraph_context_.set_vpu_config) {
+          config["MYRIAD_DETECT_NETWORK_BATCH"] = CONFIG_VALUE(NO);
+        }
+        if (global_context_.enable_vpu_fast_compile) {
+          config["MYRIAD_HW_INJECT_STAGES"] = CONFIG_VALUE(NO);
+          config["MYRIAD_COPY_OPTIMIZATION"] = CONFIG_VALUE(NO);
+        }
+    #else
+        if (subgraph_context_.set_vpu_config) {
+          config["VPU_DETECT_NETWORK_BATCH"] = CONFIG_VALUE(NO);
+        }
+        if (global_context_.enable_vpu_fast_compile) {
+          config["VPU_HW_INJECT_STAGES"] = CONFIG_VALUE(NO);
+          config["VPU_COPY_OPTIMIZATION"] = CONFIG_VALUE(NO);
+        }
+    #endif
+      }
+      try {
+        exe_network_ = global_context_.ie_core.LoadNetwork(*ie_cnn_network_, hw_target, config);
+      } catch (const InferenceEngine::details::InferenceEngineException& e) {
+        ORT_THROW(log_tag + " Exception while Loading Network for graph: " + subgraph_context_.subgraph_name + ": " + e.what());
+      } catch (...) {
+        ORT_THROW(log_tag + " Exception while Loading Network for graph " + subgraph_context_.subgraph_name);
+      }
+      LOGS_DEFAULT(INFO) << log_tag << "Loaded model to the plugin";
+      if(global_context_.use_compiled_network && hw_target == "MYRIAD") {
+        LOGS_DEFAULT(INFO) << log_tag << "Dumping the compiled blob for this model into the directory 'ov_compiled_blobs'";
+        std::ofstream compiled_blob_dump{ov_compiled_blobs_dir + "/" + model_blob_name};
+        exe_network_.Export(compiled_blob_dump);
+      }
+    }
+  }
  //The infer_requests_ pool will be intialized with a default value of 8 infer_request's
  //The nireq value can also be configured to any num_of_threads during runtime
  size_t nireq = global_context_.num_of_threads;
@ -85,13 +165,13 @@ BasicBackend::BasicBackend(const ONNX_NAMESPACE::ModelProto& model_proto,
    std::cout << "The value of nireq being used is: " << nireq << std::endl;
  }
 #endif
-  inferRequestsQueue_ = std::unique_ptr<InferRequestsQueue>(new InferRequestsQueue(exe_network, nireq));
+  inferRequestsQueue_ = std::unique_ptr<InferRequestsQueue>(new InferRequestsQueue(exe_network_, nireq));
 }

 // Starts an asynchronous inference request for data in slice indexed by batch_slice_idx on
 // an Infer Request indexed by infer_req_idx
 void BasicBackend::StartAsyncInference(Ort::CustomOpApi& ort, OrtKernelContext* context, std::shared_ptr<InferenceEngine::InferRequest> infer_request) {
-  auto graph_input_info = ie_cnn_network_->getInputsInfo();
+  auto graph_input_info = exe_network_.GetInputsInfo();

  size_t index = 0;
  for (auto input_info_iter = graph_input_info.begin();
@ -132,7 +212,7 @@ void BasicBackend::CompleteAsyncInference(Ort::CustomOpApi& ort, OrtKernelContex
  } catch (...) {
    ORT_THROW(log_tag + " Exception with completing Inference");
  }
-  auto graph_output_info = ie_cnn_network_->getOutputsInfo();
+  auto graph_output_info = exe_network_.GetOutputsInfo();

  for (auto output_info_iter = graph_output_info.begin();
       output_info_iter != graph_output_info.end(); ++output_info_iter) {
--- a/onnxruntime/core/providers/openvino/backends/basic_backend.h
+++ b/onnxruntime/core/providers/openvino/backends/basic_backend.h
@ -37,6 +37,7 @@ class BasicBackend : public IBackend {
  SubGraphContext subgraph_context_;
  mutable std::mutex compute_lock_;
  std::shared_ptr<InferenceEngine::CNNNetwork> ie_cnn_network_;
+  InferenceEngine::ExecutableNetwork exe_network_;
  std::map<std::string, std::shared_ptr<ngraph::Node>> const_outputs_map_;
  std::unique_ptr<InferRequestsQueue> inferRequestsQueue_;
 };
--- a/onnxruntime/core/providers/openvino/contexts.h
+++ b/onnxruntime/core/providers/openvino/contexts.h
@ -13,12 +13,17 @@ struct GlobalContext {
  InferenceEngine::Core ie_core;
  bool is_wholly_supported_graph = false;
  bool enable_vpu_fast_compile = false;
+  bool use_compiled_network = false;
  size_t num_of_threads;
  std::string device_type;
  std::string precision_str;
  std::string device_id;
+  std::string blob_dump_path;
  std::vector<bool> deviceAvailableList = {true, true, true, true, true, true, true, true};
  std::vector<std::string> deviceTags = {"0", "1", "2", "3", "4", "5", "6", "7"};
+  std::string onnx_model_name;
+  std::string onnx_model_path_name;
+  int onnx_opset_version;
 };

 // Holds context specific to subgraph.
--- a/onnxruntime/core/providers/openvino/openvino_execution_provider.cc
+++ b/onnxruntime/core/providers/openvino/openvino_execution_provider.cc
@ -18,6 +18,9 @@ OpenVINOExecutionProvider::OpenVINOExecutionProvider(const OpenVINOExecutionProv
  openvino_ep::BackendManager::GetGlobalContext().device_type = info.device_type_;
  openvino_ep::BackendManager::GetGlobalContext().precision_str = info.precision_;
  openvino_ep::BackendManager::GetGlobalContext().enable_vpu_fast_compile = info.enable_vpu_fast_compile_;
+  openvino_ep::BackendManager::GetGlobalContext().use_compiled_network = info.use_compiled_network_;
+  openvino_ep::BackendManager::GetGlobalContext().blob_dump_path = info.blob_dump_path_;
+
  if ((int)info.num_of_threads_ <= 0) {
    openvino_ep::BackendManager::GetGlobalContext().num_of_threads = 8;
  } else {
@ -55,6 +58,14 @@ OpenVINOExecutionProvider::GetCapability(const GraphViewer& graph_viewer, const
  ORT_UNUSED_PARAMETER(kernel_registries);

  std::vector<std::unique_ptr<ComputeCapability>> result;
+  openvino_ep::BackendManager::GetGlobalContext().onnx_model_name = graph_viewer.Name();
+#ifdef _WIN32
+  std::wstring onnx_path = graph_viewer.ModelPath().ToPathString();
+  openvino_ep::BackendManager::GetGlobalContext().onnx_model_path_name = std::string(onnx_path.begin(), onnx_path.end());
+#else
+  openvino_ep::BackendManager::GetGlobalContext().onnx_model_path_name = graph_viewer.ModelPath().ToPathString();
+#endif
+  openvino_ep::BackendManager::GetGlobalContext().onnx_opset_version = graph_viewer.DomainToVersionMap().at(kOnnxDomain);

 #if defined OPENVINO_2020_3
  result = openvino_ep::GetCapability_2020_3(graph_viewer,
--- a/onnxruntime/core/providers/openvino/openvino_execution_provider.h
+++ b/onnxruntime/core/providers/openvino/openvino_execution_provider.h
@ -55,9 +55,11 @@ struct OpenVINOExecutionProviderInfo {
  bool enable_vpu_fast_compile_;
  std::string device_id_;
  size_t num_of_threads_;
+  bool use_compiled_network_;
+  std::string blob_dump_path_;

-  explicit OpenVINOExecutionProviderInfo(std::string dev_type, bool enable_vpu_fast_compile, std::string dev_id, size_t num_of_threads)
-      : enable_vpu_fast_compile_(enable_vpu_fast_compile), device_id_(dev_id), num_of_threads_(num_of_threads) {
+  explicit OpenVINOExecutionProviderInfo(std::string dev_type, bool enable_vpu_fast_compile, std::string dev_id, size_t num_of_threads, bool use_compiled_network, std::string blob_dump_path)
+      : enable_vpu_fast_compile_(enable_vpu_fast_compile), device_id_(dev_id), num_of_threads_(num_of_threads), use_compiled_network_(use_compiled_network), blob_dump_path_(blob_dump_path) {
    if (dev_type == "") {
      LOGS_DEFAULT(INFO) << "[OpenVINO-EP]"
                         << "No runtime device selection option provided.";
@ -125,7 +127,7 @@ struct OpenVINOExecutionProviderInfo {
                       << "Choosing Device: " << device_type_ << " , Precision: " << precision_;
  }
  OpenVINOExecutionProviderInfo() {
-    OpenVINOExecutionProviderInfo("", false, "", 0);
+    OpenVINOExecutionProviderInfo("", false, "", 0, false,"");
  }
 };

--- a/onnxruntime/core/providers/openvino/openvino_provider_factory.cc
+++ b/onnxruntime/core/providers/openvino/openvino_provider_factory.cc
@ -8,10 +8,12 @@
 namespace onnxruntime {
 struct OpenVINOProviderFactory : IExecutionProviderFactory {
  OpenVINOProviderFactory(const char* device_type, bool enable_vpu_fast_compile,
-                          const char* device_id, size_t num_of_threads)
-      : enable_vpu_fast_compile_(enable_vpu_fast_compile), num_of_threads_(num_of_threads) {
+                          const char* device_id, size_t num_of_threads,
+                          bool use_compiled_network, const char* blob_dump_path)
+      : enable_vpu_fast_compile_(enable_vpu_fast_compile), num_of_threads_(num_of_threads), use_compiled_network_(use_compiled_network) {
    device_type_ = (device_type == nullptr) ? "" : device_type;
    device_id_ = (device_id == nullptr) ? "" : device_id;
+    blob_dump_path_ = (blob_dump_path == nullptr) ? "" : blob_dump_path;
  }
  ~OpenVINOProviderFactory() override {
  }
@ -23,16 +25,18 @@ struct OpenVINOProviderFactory : IExecutionProviderFactory {
  bool enable_vpu_fast_compile_;
  std::string device_id_;
  size_t num_of_threads_;
+  bool use_compiled_network_;
+  std::string blob_dump_path_;
 };

 std::unique_ptr<IExecutionProvider> OpenVINOProviderFactory::CreateProvider() {
-  OpenVINOExecutionProviderInfo info(device_type_, enable_vpu_fast_compile_, device_id_, num_of_threads_);
+  OpenVINOExecutionProviderInfo info(device_type_, enable_vpu_fast_compile_, device_id_, num_of_threads_, use_compiled_network_, blob_dump_path_);
  return std::make_unique<OpenVINOExecutionProvider>(info);
 }

 std::shared_ptr<IExecutionProviderFactory> CreateExecutionProviderFactory_OpenVINO(
-    const char* device_type, bool enable_vpu_fast_compile, const char* device_id, size_t num_of_threads) {
-  return std::make_shared<onnxruntime::OpenVINOProviderFactory>(device_type, enable_vpu_fast_compile, device_id, num_of_threads);
+    const char* device_type, bool enable_vpu_fast_compile, const char* device_id, size_t num_of_threads, bool use_compiled_network, const char* blob_dump_path) {
+  return std::make_shared<onnxruntime::OpenVINOProviderFactory>(device_type, enable_vpu_fast_compile, device_id, num_of_threads, use_compiled_network, blob_dump_path);
 }

 }  // namespace onnxruntime
@ -50,7 +54,7 @@ struct OpenVINO_Provider : Provider {

  std::shared_ptr<IExecutionProviderFactory> CreateExecutionProviderFactory(const void* void_params) override {
    auto& params = *reinterpret_cast<const OrtOpenVINOProviderOptions*>(void_params);
-    return std::make_shared<OpenVINOProviderFactory>(params.device_type, params.enable_vpu_fast_compile, params.device_id, params.num_of_threads);
+    return std::make_shared<OpenVINOProviderFactory>(params.device_type, params.enable_vpu_fast_compile, params.device_id, params.num_of_threads, params.use_compiled_network, params.blob_dump_path);
  }

  void Shutdown() override {
--- a/onnxruntime/python/onnxruntime_pybind_state.cc
+++ b/onnxruntime/python/onnxruntime_pybind_state.cc
@ -665,6 +665,7 @@ static void RegisterExecutionProviders(InferenceSession* sess, const std::vector
 #ifdef USE_OPENVINO
      OrtOpenVINOProviderOptions params;
      params.device_type = openvino_device_type.c_str();
+      std::string blob_dump_path;

      auto it = provider_options_map.find(type);
      if (it != provider_options_map.end()) {
@ -681,10 +682,22 @@ static void RegisterExecutionProviders(InferenceSession* sess, const std::vector
              ORT_THROW("Invalid value passed for enable_vpu_fast_compile: ", option.second);
            }

+          } else if (option.first == "use_compiled_network") {
+            if (option.second == "True") {
+              params.use_compiled_network = true;
+            } else if (option.second == "False") {
+              params.use_compiled_network = false;
+            } else {
+              ORT_THROW("Invalid value passed for use_compiled_network: ", option.second);
+            }
+
          } else if (option.first == "device_id") {
            params.device_id = option.second.c_str();
          } else if (option.first == "num_of_threads") {
            params.num_of_threads = std::stoi(option.second);
+          } else if (option.first == "blob_dump_path") {
+            blob_dump_path = option.second;
+            params.blob_dump_path = blob_dump_path.c_str();
          } else {
            ORT_THROW("Invalid OpenVINO EP option: ", option.first);
          }
@ -967,7 +980,7 @@ void addGlobalMethods(py::module& m, Environment& env) {
            onnxruntime::CreateExecutionProviderFactory_Dnnl(1),
 #endif
 #ifdef USE_OPENVINO
-            onnxruntime::CreateExecutionProviderFactory_OpenVINO(openvino_device_type, false, "", 8),
+            onnxruntime::CreateExecutionProviderFactory_OpenVINO(openvino_device_type, false, "", 8, false, ""),
 #endif
 #ifdef USE_TENSORRT
            onnxruntime::CreateExecutionProviderFactory_Tensorrt(
--- a/onnxruntime/test/perftest/command_args_parser.cc
+++ b/onnxruntime/test/perftest/command_args_parser.cc
@ -60,8 +60,10 @@ namespace perftest {
      "\t    [OpenVINO only] [device_id]: Selects a particular hardware device for inference.\n"
      "\t    [OpenVINO only] [enable_vpu_fast_compile]: Optionally enabled to speeds up the model's compilation on VPU device targets.\n"
      "\t    [OpenVINO only] [num_of_threads]: Overrides the accelerator hardware type and precision with these values at runtime.\n"
+      "\t    [OpenVINO only] [use_compiled_network]: Can be enabled to directly import pre-compiled blobs if exists. currently this feature is only supported on MyriadX(VPU) hardware device target.\n"
+      "\t    [OpenVINO only] [blob_dump_path]: Explicitly specify the path where you would like to dump and load the blobs for the use_compiled_network(save/load blob) feature. This overrides the default path.\n"
      "\t [Usage]: -e <provider_name> -i '<key1>|<value1> <key2>|<value2>'\n\n"
-      "\t [Example] [For OpenVINO EP] -e openvino -i 'device_type|CPU_FP32 enable_vpu_fast_compile|true num_of_threads|5'\n"
+      "\t [Example] [For OpenVINO EP] -e openvino -i \"device_type|CPU_FP32 enable_vpu_fast_compile|true num_of_threads|5 use_compiled_network|true blob_dump_path|\"<path>\"\"\n"
      "\t    [TensorRT only] [use_trt_options]: Overrides TensorRT environment variables (if any) with following settings at runtime.\n"		  
      "\t    [TensorRT only] [trt_max_workspace_size]: Set TensorRT maximum workspace size in byte.\n"	  
      "\t    [TensorRT only] [trt_fp16_enable]: Enable TensorRT FP16 precision.\n"
--- a/onnxruntime/test/perftest/ort_test_session.cc
+++ b/onnxruntime/test/perftest/ort_test_session.cc
@ -162,9 +162,11 @@ OnnxRuntimeTestSession::OnnxRuntimeTestSession(Ort::Env& env, std::random_device
  } else if (provider_name == onnxruntime::kOpenVINOExecutionProvider) {
 #ifdef USE_OPENVINO
    std::string device_type = ""; // [device_type]: Overrides the accelerator hardware type and precision with these values at runtime.
-    bool enable_vpu_fast_compile = false; // [device_id]: Selects a particular hardware device for inference.
-    std::string device_id = ""; // [enable_vpu_fast_compile]: Fast-compile may be optionally enabled to speeds up the model's compilation to VPU device specific format.
+    bool enable_vpu_fast_compile = false; // [enable_vpu_fast_compile]: Fast-compile may be optionally enabled to speeds up the model's compilation to VPU device specific format.
+    std::string device_id = ""; // [device_id]: Selects a particular hardware device for inference.
    size_t num_of_threads = 8; // [num_of_threads]: Overrides the accelerator default value of number of threads with this value at runtime.
+    bool use_compiled_network = false; // [use_compiled_network]: Can be enabled to directly import pre-compiled blobs if exists.
+    std::string blob_dump_path = ""; // [blob_dump_path]: Explicitly specify the path where you would like to dump and load the blobs for the use_compiled_network(save/load blob) feature. This overrides the default path.

    #ifdef _MSC_VER
    std::string ov_string = ToMBString(performance_test_config.run_config.ep_runtime_config_string);
@ -203,14 +205,24 @@ OnnxRuntimeTestSession::OnnxRuntimeTestSession(Ort::Env& env, std::random_device
        } else {
          ORT_THROW("[ERROR] [OpenVINO] The value for the key 'enable_vpu_fast_compile' should be a boolean i.e. true or false. Default value is false.\n");
        }
+      } else if (key == "use_compiled_network") {
+        if(value == "true" || value == "True"){
+          use_compiled_network = true;
+        } else if (value == "false" || value == "False") {
+          use_compiled_network = false;
+        } else {
+          ORT_THROW("[ERROR] [OpenVINO] The value for the key 'use_compiled_network' should be a boolean i.e. true or false. Default value is false.\n");
+        }
      } else if (key == "num_of_threads") {
        std::stringstream sstream(value);
        sstream >> num_of_threads;
        if ((int)num_of_threads <=0) {
          ORT_THROW("[ERROR] [OpenVINO] The value for the key 'num_of_threads' should be greater than 0\n");
        }
+      } else if (key == "blob_dump_path") {
+        blob_dump_path = value;
      } else {
-          ORT_THROW("[ERROR] [OpenVINO] wrong key type entered. Choose from the following runtime key options that are available for OpenVINO. ['device_type', 'device_id', 'enable_vpu_fast_compile', 'num_of_threads'] \n");
+          ORT_THROW("[ERROR] [OpenVINO] wrong key type entered. Choose from the following runtime key options that are available for OpenVINO. ['device_type', 'device_id', 'enable_vpu_fast_compile', 'num_of_threads', 'use_compiled_network', 'blob_dump_path'] \n");
      }
    }
    OrtOpenVINOProviderOptions options;
@ -218,6 +230,8 @@ OnnxRuntimeTestSession::OnnxRuntimeTestSession(Ort::Env& env, std::random_device
    options.device_id = device_id.c_str(); // To set the device_id
    options.enable_vpu_fast_compile = enable_vpu_fast_compile; // To enable_vpu_fast_compile, default is false
    options.num_of_threads = num_of_threads; // To set number of free InferRequests, default is 8
+    options.use_compiled_network = use_compiled_network; // To use_compiled_network, default is false
+    options.blob_dump_path = blob_dump_path.c_str(); // sets the blob_dump_path, default is ""
    session_options.AppendExecutionProvider_OpenVINO(options);
 #else
    ORT_THROW("OpenVINO is not supported in this build\n");
--- a/onnxruntime/test/util/default_providers.cc
+++ b/onnxruntime/test/util/default_providers.cc
@ -18,7 +18,7 @@
 namespace onnxruntime {

 std::shared_ptr<IExecutionProviderFactory> CreateExecutionProviderFactory_OpenVINO(
-    const char* device_type, bool enable_vpu_fast_compile, const char* device_id, size_t num_of_threads);
+    const char* device_type, bool enable_vpu_fast_compile, const char* device_id, size_t num_of_threads, bool use_compiled_network, const char* blob_dump_path);

 std::shared_ptr<IExecutionProviderFactory> CreateExecutionProviderFactory_Dnnl(int use_arena);
 std::shared_ptr<IExecutionProviderFactory> CreateExecutionProviderFactory_OpenVINO(const OrtOpenVINOProviderOptions* params);