From 27e778909d75d0d7ff1177f3e86ef8c19db71d5d Mon Sep 17 00:00:00 2001
From: Maajid khan <n.maajidkhan@gmail.com>
Date: Thu, 8 Apr 2021 09:29:16 +0530
Subject: [PATCH] [OpenVINO-EP] Enabling save/Load blob feature (#7054)

* Enabling save/Load blob feature for OpenVINO-EP

Signed-off-by: MaajidKhan <n.maajidkhan@gmail.com>

* Added changes to enhance save/load feature

->This feature applies only for MYRIAD device target
->cleaned up the code and added error checks

Signed-off-by: MaajidKhan <n.maajidkhan@gmail.com>

* Enabled the feature only for MyriadX and only for Linux

Signed-off-by: MaajidKhan <n.maajidkhan@gmail.com>

* Fixed compilation issues on windows

Signed-off-by: MaajidKhan <n.maajidkhan@gmail.com>

* Added changes to fix const subgraph issue

Signed-off-by: MaajidKhan <n.maajidkhan@gmail.com>

* Fixed issues on windows

Signed-off-by: MaajidKhan <n.maajidkhan@gmail.com>

* Added changes for the feature

-> Removed default location dir dump using cmake
-> Enabled saving blob dumps at the executable path
   by default

Signed-off-by: MaajidKhan <n.maajidkhan@gmail.com>

* Made save/load dump path configurable

-> The save/load blob dump path is now also made configurable
using a c/python Api's.

-> Introduced a flag named blob_dump_path

Signed-off-by: MaajidKhan <n.maajidkhan@gmail.com>

* Minor fixes added

Signed-off-by: MaajidKhan <n.maajidkhan@gmail.com>

* Fixed python API issues

Signed-off-by: MaajidKhan <n.maajidkhan@gmail.com>

* Using GetEnvironmentVar to get the path

Signed-off-by: MaajidKhan <n.maajidkhan@gmail.com>

* Fixed python runtime option issue

Signed-off-by: MaajidKhan <n.maajidkhan@gmail.com>

* Fixes import network issue on windows

Signed-off-by: MaajidKhan <n.maajidkhan@gmail.com>
---
 .../core/session/onnxruntime_c_api.h          |   4 +-
 .../core/providers/openvino/backend_utils.cc  |  64 +++++-
 .../core/providers/openvino/backend_utils.h   |  18 ++
 .../openvino/backends/basic_backend.cc        | 182 +++++++++++++-----
 .../openvino/backends/basic_backend.h         |   1 +
 .../core/providers/openvino/contexts.h        |   5 +
 .../openvino/openvino_execution_provider.cc   |  11 ++
 .../openvino/openvino_execution_provider.h    |   8 +-
 .../openvino/openvino_provider_factory.cc     |  16 +-
 .../python/onnxruntime_pybind_state.cc        |  15 +-
 .../test/perftest/command_args_parser.cc      |   4 +-
 onnxruntime/test/perftest/ort_test_session.cc |  20 +-
 onnxruntime/test/util/default_providers.cc    |   2 +-
 13 files changed, 274 insertions(+), 76 deletions(-)
diff --git a/include/onnxruntime/core/session/onnxruntime_c_api.h b/include/onnxruntime/core/session/onnxruntime_c_api.h
index 4cb1b2c746..faca901ce9 100644
--- a/include/onnxruntime/core/session/onnxruntime_c_api.h
+++ b/include/onnxruntime/core/session/onnxruntime_c_api.h
@@ -305,12 +305,14 @@ typedef struct OrtTensorRTProviderOptions {
 /// </summary>
 typedef struct OrtOpenVINOProviderOptions {
 #ifdef __cplusplus
-  OrtOpenVINOProviderOptions() : device_type{}, enable_vpu_fast_compile{}, device_id{}, num_of_threads{} {}
+  OrtOpenVINOProviderOptions() : device_type{}, enable_vpu_fast_compile{}, device_id{}, num_of_threads{}, use_compiled_network{}, blob_dump_path{} {}
 #endif
   const char* device_type;                // CPU_FP32, GPU_FP32, GPU_FP16, MYRIAD_FP16, VAD-M_FP16 or VAD-F_FP32
   unsigned char enable_vpu_fast_compile;  // 0 = false, nonzero = true
   const char* device_id;
   size_t num_of_threads;  // 0 uses default number of threads
+  unsigned char use_compiled_network; // 0 = false, nonzero = true
+  const char* blob_dump_path; // path is set to empty by default
 } OrtOpenVINOProviderOptions;
 
 struct OrtApi;
diff --git a/onnxruntime/core/providers/openvino/backend_utils.cc b/onnxruntime/core/providers/openvino/backend_utils.cc
index 74ebd33f4e..ff5697a124 100644
--- a/onnxruntime/core/providers/openvino/backend_utils.cc
+++ b/onnxruntime/core/providers/openvino/backend_utils.cc
@@ -23,15 +23,11 @@ namespace backend_utils {
 
 #ifndef NDEBUG
 bool IsDebugEnabled() {
-#ifdef _WIN32
-  size_t env_name_len = 0;
-  char* env_name = nullptr;
-  bool res = (_dupenv_s(&env_name, &env_name_len, "ORT_OPENVINO_ENABLE_DEBUG") == 0 && env_name != nullptr);
-  free(env_name);
-  return res;
-#else
-  return (std::getenv("ORT_OPENVINO_ENABLE_DEBUG") != nullptr);
-#endif
+  const std::string env_name = onnxruntime::GetEnvironmentVar("ORT_OPENVINO_ENABLE_DEBUG");
+  if (!env_name.empty()) {
+    return true;
+  }
+  return false;
 }
 void DumpOnnxModelProto(const ONNX_NAMESPACE::ModelProto& model_proto, std::string file_name) {
   std::fstream outfile(file_name, std::ios::out | std::ios::trunc | std::ios::binary);
@@ -40,6 +36,56 @@ void DumpOnnxModelProto(const ONNX_NAMESPACE::ModelProto& model_proto, std::stri
 
 #endif
 
+bool UseCompiledNetwork() {
+  const std::string env_name = onnxruntime::GetEnvironmentVar("OV_USE_COMPILED_NETWORK");
+  if (!env_name.empty()) {
+    return true;
+  }
+  return false;
+}
+
+std::string GetCurrentWorkingDir() {
+  std::string curr_dir;
+  ORT_UNUSED_PARAMETER(curr_dir);
+  char buff[FILENAME_MAX];
+  curr_dir = GetCurrentDir(buff, FILENAME_MAX);
+  std::string current_working_dir(buff);
+  return current_working_dir;
+}
+
+bool IsDirExists(const std::string& pathname) {
+  struct stat info;
+  if(stat(pathname.c_str(), &info) != 0) {
+    LOGS_DEFAULT(INFO) << log_tag << "cannot access pathname: " << pathname;
+	  return false;
+  } else if(info.st_mode & S_IFDIR) {
+      LOGS_DEFAULT(INFO) << log_tag << "pathname exists: " << pathname;
+	    return true;
+  } else {
+      LOGS_DEFAULT(INFO) << log_tag << "pathname: " << pathname << ": doesn't contain the directory 'ov_compiled_blobs' ";
+  }
+  return false;
+}
+
+void CreateDirectory(const std::string& ov_compiled_blobs_dir) {
+  LOGS_DEFAULT(INFO) << log_tag << "'ov_compiled_blobs' directory doesn't exist at the executable path, so creating one";
+#if defined(_WIN32)
+  if (_mkdir(ov_compiled_blobs_dir.c_str()) == 0) { // Creating a directory 
+	  LOGS_DEFAULT(INFO) << log_tag << "created a directory named 'ov_compiled_blobs' at the executable path";
+  } else {
+    LOGS_DEFAULT(INFO) << log_tag << "Error creating a directory named 'ov_compiled_blobs' at the executable path";
+    throw std::runtime_error("Could not create the directory");
+  }
+#else
+  if (mkdir(ov_compiled_blobs_dir.c_str(), 0777) == 0) { // Creating a directory
+    LOGS_DEFAULT(INFO) << log_tag << "created a directory named 'ov_compiled_blobs' at the executable path";
+  } else {
+    LOGS_DEFAULT(INFO) << log_tag << "Error creating a directory named 'ov_compiled_blobs' at the executable path";
+    throw std::runtime_error("Could not create the directory");
+  }
+#endif
+}
+
 struct static_cast_int64 {
   template <typename T1>  // T1 models type statically convertible to T
   int64_t operator()(const T1& x) const { return static_cast<int64_t>(x); }
diff --git a/onnxruntime/core/providers/openvino/backend_utils.h b/onnxruntime/core/providers/openvino/backend_utils.h
index bfc197ad52..1c3cd10059 100644
--- a/onnxruntime/core/providers/openvino/backend_utils.h
+++ b/onnxruntime/core/providers/openvino/backend_utils.h
@@ -10,6 +10,16 @@
 #include "contexts.h"
 #include <iomanip>
 
+#ifdef _WIN32
+#include <direct.h>
+#define GetCurrentDir _getcwd
+#else
+#include <unistd.h>
+#define GetCurrentDir getcwd
+#endif
+
+#include <sys/stat.h>
+
 namespace onnxruntime {
 namespace openvino_ep {
 namespace backend_utils {
@@ -19,6 +29,14 @@ const std::string log_tag = "[OpenVINO-EP] ";
 bool IsDebugEnabled();
 #endif
 
+bool UseCompiledNetwork();
+
+std::string GetCurrentWorkingDir();
+
+bool IsDirExists(const std::string& pathname);
+
+void CreateDirectory(const std::string& ov_compiled_blobs_dir);
+
 void SetIODefs(const ONNX_NAMESPACE::ModelProto& model_proto,
                std::shared_ptr<InferenceEngine::CNNNetwork> network,
                std::unordered_map<std::string, int> output_names,
diff --git a/onnxruntime/core/providers/openvino/backends/basic_backend.cc b/onnxruntime/core/providers/openvino/backends/basic_backend.cc
index 88b08fcb25..93f16fa1d6 100644
--- a/onnxruntime/core/providers/openvino/backends/basic_backend.cc
+++ b/onnxruntime/core/providers/openvino/backends/basic_backend.cc
@@ -16,6 +16,7 @@
 #include <ngraph/pass/constant_folding.hpp>
 
 #include "basic_backend.h"
+#include "../backend_manager.h"
 
 namespace onnxruntime {
 namespace openvino_ep {
@@ -26,56 +27,135 @@ BasicBackend::BasicBackend(const ONNX_NAMESPACE::ModelProto& model_proto,
                            GlobalContext& global_context,
                            const SubGraphContext& subgraph_context)
     : global_context_(global_context), subgraph_context_(subgraph_context) {
-  ie_cnn_network_ = CreateCNNNetwork(model_proto, global_context_, subgraph_context_, const_outputs_map_);
-  SetIODefs(model_proto, ie_cnn_network_, subgraph_context_.output_names, const_outputs_map_, global_context_.device_type);
-
-  InferenceEngine::ExecutableNetwork exe_network;
-
-#if defined(OPENVINO_2020_4) || defined(OPENVINO_2021_1) || defined(OPENVINO_2021_2) || defined(OPENVINO_2021_3)
-  if (const_outputs_map_.size() == subgraph_context_.output_names.size())
-    subgraph_context_.is_constant = true;
-#endif
-
-  // Loading model to the plugin
-  if (subgraph_context_.is_constant)
-    return;
-  std::map<std::string, std::string> config;
-#ifndef NDEBUG
-  if (openvino_ep::backend_utils::IsDebugEnabled()) {
-    config["PERF_COUNT"] = CONFIG_VALUE(YES);
-  }
-#endif
-  if (global_context_.device_type.find("MYRIAD") != std::string::npos) {
-#if defined(OPENVINO_2021_1) || defined(OPENVINO_2021_2) || defined(OPENVINO_2021_3)
-    if (subgraph_context_.set_vpu_config) {
-      config["MYRIAD_DETECT_NETWORK_BATCH"] = CONFIG_VALUE(NO);
-    }
-
-    if (global_context_.enable_vpu_fast_compile) {
-      config["MYRIAD_HW_INJECT_STAGES"] = CONFIG_VALUE(NO);
-      config["MYRIAD_COPY_OPTIMIZATION"] = CONFIG_VALUE(NO);
-    }
-#else
-    if (subgraph_context_.set_vpu_config) {
-      config["VPU_DETECT_NETWORK_BATCH"] = CONFIG_VALUE(NO);
-    }
-
-    if (global_context_.enable_vpu_fast_compile) {
-      config["VPU_HW_INJECT_STAGES"] = CONFIG_VALUE(NO);
-      config["VPU_COPY_OPTIMIZATION"] = CONFIG_VALUE(NO);
-    }
-#endif
-  }
   std::string& hw_target = (global_context_.device_id != "") ? global_context_.device_id : global_context_.device_type;
-  try {
-    exe_network = global_context_.ie_core.LoadNetwork(*ie_cnn_network_, hw_target, config);
-  } catch (const InferenceEngine::details::InferenceEngineException& e) {
-    ORT_THROW(log_tag + " Exception while Loading Network for graph: " + subgraph_context_.subgraph_name + ": " + e.what());
-  } catch (...) {
-    ORT_THROW(log_tag + " Exception while Loading Network for graph " + subgraph_context_.subgraph_name);
-  }
-  LOGS_DEFAULT(INFO) << log_tag << "Loaded model to the plugin";
+  bool vpu_status = false;
+  bool import_blob_status = false;
+  std::string model_blob_name;
+  std::ifstream blob_path;
+  std::string ov_compiled_blobs_dir = "";
 
+  if(hw_target == "MYRIAD" && global_context_.use_compiled_network == true) {
+    if(!openvino_ep::backend_utils::UseCompiledNetwork()) {
+      std::size_t model_index = global_context_.onnx_model_path_name.find_last_of("/\\");
+      std::string model_name= global_context_.onnx_model_path_name.substr(model_index+1);
+      std::size_t model_extension_index = model_name.find_last_of(".");
+      if(openvino_ep::BackendManager::GetGlobalContext().is_wholly_supported_graph) {
+          model_blob_name = global_context_.onnx_model_name + "_" + "op_v_" + std::to_string(global_context_.onnx_opset_version) + "_" + model_name.substr(0,model_extension_index) + "_" + hw_target + "_" + subgraph_context_.subgraph_name + "_ov_" + "fully" + ".blob";
+      }
+      else {
+          model_blob_name = global_context_.onnx_model_name + "_" + "op_v_" + std::to_string(global_context_.onnx_opset_version) + "_" + model_name.substr(0,model_extension_index) + "_" + hw_target + "_" + subgraph_context_.subgraph_name + "_ov_" + "partially" + ".blob";
+      }
+      if(global_context_.blob_dump_path == "" || global_context_.blob_dump_path == "\"" || global_context_.blob_dump_path.empty()) {
+        ov_compiled_blobs_dir = openvino_ep::backend_utils::GetCurrentWorkingDir() + "/ov_compiled_blobs/";
+      } else {
+        ov_compiled_blobs_dir = global_context_.blob_dump_path + "/ov_compiled_blobs";
+      }
+      if(openvino_ep::backend_utils::IsDirExists(ov_compiled_blobs_dir)) {
+        LOGS_DEFAULT(INFO) << log_tag << "'ov_compiled_blobs' directory already exists at the executable path";
+      }
+      else {
+        CreateDirectory(ov_compiled_blobs_dir);
+      }
+      blob_path.open(ov_compiled_blobs_dir + "/" + model_blob_name);
+      if (!blob_path.is_open()) {
+          LOGS_DEFAULT(INFO) << log_tag << "Device specific Compiled blob doesn't exist for this model";
+      } else {
+          LOGS_DEFAULT(INFO) << log_tag << "Device specific Compiled blob already exists for this model";
+          vpu_status = true;
+      }
+    }
+  }
+
+  //validate const subgraphs
+  if(!openvino_ep::BackendManager::GetGlobalContext().is_wholly_supported_graph) {
+    ie_cnn_network_ = CreateCNNNetwork(model_proto, global_context_, subgraph_context_, const_outputs_map_);
+    SetIODefs(model_proto, ie_cnn_network_, subgraph_context_.output_names, const_outputs_map_, global_context_.device_type);
+  #if defined(OPENVINO_2020_4) || defined(OPENVINO_2021_1) || defined(OPENVINO_2021_2) || defined(OPENVINO_2021_3)
+    if (const_outputs_map_.size() == subgraph_context_.output_names.size())
+      subgraph_context_.is_constant = true;
+  #endif
+
+    // Loading model to the plugin
+    if (subgraph_context_.is_constant) {
+      LOGS_DEFAULT(INFO) << log_tag << "The subgraph is a const. Directly moving to Infer stage.";
+      return;
+    }
+  }
+
+  if (vpu_status == true || openvino_ep::backend_utils::UseCompiledNetwork()) {
+    const std::string model_blob_path = ov_compiled_blobs_dir + "/" + model_blob_name;
+    const std::string compiled_blob_path = onnxruntime::GetEnvironmentVar("OV_BLOB_PATH");
+    try {
+      if(vpu_status == true) {
+        LOGS_DEFAULT(INFO) << log_tag << "Importing the pre-compiled blob for this model which already exists in the directory 'ov_compiled_blobs'";
+        exe_network_ = global_context_.ie_core.ImportNetwork(model_blob_path, hw_target, {});
+      } else {
+        LOGS_DEFAULT(INFO) << log_tag << "Importing the pre-compiled blob from the path set by the user";
+        if (compiled_blob_path.empty())
+          throw std::runtime_error("The compiled blob path is not set");
+        exe_network_ = global_context_.ie_core.ImportNetwork(compiled_blob_path, hw_target, {});
+      }
+    } catch (InferenceEngine::details::InferenceEngineException &e) {
+      ORT_THROW(log_tag + " Exception while Importing Network for graph: " + subgraph_context_.subgraph_name + ": " + e.what());
+    } catch(...) {
+      ORT_THROW(log_tag + " Exception while Importing Network for graph: " + subgraph_context_.subgraph_name);
+    }
+    import_blob_status = true;
+    LOGS_DEFAULT(INFO) << log_tag << "Succesfully Created an executable network from a previously exported network";
+  }
+
+  if ((global_context_.use_compiled_network == true && import_blob_status == false) || vpu_status == false) {
+    if(!openvino_ep::backend_utils::UseCompiledNetwork()) {
+      ie_cnn_network_ = CreateCNNNetwork(model_proto, global_context_, subgraph_context_, const_outputs_map_);
+      SetIODefs(model_proto, ie_cnn_network_, subgraph_context_.output_names, const_outputs_map_, global_context_.device_type);
+    #if defined(OPENVINO_2020_4) || defined(OPENVINO_2021_1) || defined(OPENVINO_2021_2) || defined(OPENVINO_2021_3)
+      if (const_outputs_map_.size() == subgraph_context_.output_names.size())
+        subgraph_context_.is_constant = true;
+    #endif
+
+      // Loading model to the plugin
+      if (subgraph_context_.is_constant)
+        return;
+      std::map<std::string, std::string> config;
+    #ifndef NDEBUG
+      if (openvino_ep::backend_utils::IsDebugEnabled()) {
+        config["PERF_COUNT"] = CONFIG_VALUE(YES);
+      }
+    #endif
+      if (global_context_.device_type.find("MYRIAD") != std::string::npos) {
+    #if defined(OPENVINO_2021_1) || defined(OPENVINO_2021_2) || defined(OPENVINO_2021_3)
+        if (subgraph_context_.set_vpu_config) {
+          config["MYRIAD_DETECT_NETWORK_BATCH"] = CONFIG_VALUE(NO);
+        }
+        if (global_context_.enable_vpu_fast_compile) {
+          config["MYRIAD_HW_INJECT_STAGES"] = CONFIG_VALUE(NO);
+          config["MYRIAD_COPY_OPTIMIZATION"] = CONFIG_VALUE(NO);
+        }
+    #else
+        if (subgraph_context_.set_vpu_config) {
+          config["VPU_DETECT_NETWORK_BATCH"] = CONFIG_VALUE(NO);
+        }
+        if (global_context_.enable_vpu_fast_compile) {
+          config["VPU_HW_INJECT_STAGES"] = CONFIG_VALUE(NO);
+          config["VPU_COPY_OPTIMIZATION"] = CONFIG_VALUE(NO);
+        }
+    #endif
+      }
+      try {
+        exe_network_ = global_context_.ie_core.LoadNetwork(*ie_cnn_network_, hw_target, config);
+      } catch (const InferenceEngine::details::InferenceEngineException& e) {
+        ORT_THROW(log_tag + " Exception while Loading Network for graph: " + subgraph_context_.subgraph_name + ": " + e.what());
+      } catch (...) {
+        ORT_THROW(log_tag + " Exception while Loading Network for graph " + subgraph_context_.subgraph_name);
+      }
+      LOGS_DEFAULT(INFO) << log_tag << "Loaded model to the plugin";
+      if(global_context_.use_compiled_network && hw_target == "MYRIAD") {
+        LOGS_DEFAULT(INFO) << log_tag << "Dumping the compiled blob for this model into the directory 'ov_compiled_blobs'";
+        std::ofstream compiled_blob_dump{ov_compiled_blobs_dir + "/" + model_blob_name};
+        exe_network_.Export(compiled_blob_dump);
+      }
+    }
+  }
   //The infer_requests_ pool will be intialized with a default value of 8 infer_request's
   //The nireq value can also be configured to any num_of_threads during runtime
   size_t nireq = global_context_.num_of_threads;
@@ -85,13 +165,13 @@ BasicBackend::BasicBackend(const ONNX_NAMESPACE::ModelProto& model_proto,
     std::cout << "The value of nireq being used is: " << nireq << std::endl;
   }
 #endif
-  inferRequestsQueue_ = std::unique_ptr<InferRequestsQueue>(new InferRequestsQueue(exe_network, nireq));
+  inferRequestsQueue_ = std::unique_ptr<InferRequestsQueue>(new InferRequestsQueue(exe_network_, nireq));
 }
 
 // Starts an asynchronous inference request for data in slice indexed by batch_slice_idx on
 // an Infer Request indexed by infer_req_idx
 void BasicBackend::StartAsyncInference(Ort::CustomOpApi& ort, OrtKernelContext* context, std::shared_ptr<InferenceEngine::InferRequest> infer_request) {
-  auto graph_input_info = ie_cnn_network_->getInputsInfo();
+  auto graph_input_info = exe_network_.GetInputsInfo();
 
   size_t index = 0;
   for (auto input_info_iter = graph_input_info.begin();
@@ -132,7 +212,7 @@ void BasicBackend::CompleteAsyncInference(Ort::CustomOpApi& ort, OrtKernelContex
   } catch (...) {
     ORT_THROW(log_tag + " Exception with completing Inference");
   }
-  auto graph_output_info = ie_cnn_network_->getOutputsInfo();
+  auto graph_output_info = exe_network_.GetOutputsInfo();
 
   for (auto output_info_iter = graph_output_info.begin();
        output_info_iter != graph_output_info.end(); ++output_info_iter) {
diff --git a/onnxruntime/core/providers/openvino/backends/basic_backend.h b/onnxruntime/core/providers/openvino/backends/basic_backend.h
index 80f14eeb2b..1083a2ce6b 100644
--- a/onnxruntime/core/providers/openvino/backends/basic_backend.h
+++ b/onnxruntime/core/providers/openvino/backends/basic_backend.h
@@ -37,6 +37,7 @@ class BasicBackend : public IBackend {
   SubGraphContext subgraph_context_;
   mutable std::mutex compute_lock_;
   std::shared_ptr<InferenceEngine::CNNNetwork> ie_cnn_network_;
+  InferenceEngine::ExecutableNetwork exe_network_;
   std::map<std::string, std::shared_ptr<ngraph::Node>> const_outputs_map_;
   std::unique_ptr<InferRequestsQueue> inferRequestsQueue_;
 };
diff --git a/onnxruntime/core/providers/openvino/contexts.h b/onnxruntime/core/providers/openvino/contexts.h
index 1049668a7e..6453c41569 100644
--- a/onnxruntime/core/providers/openvino/contexts.h
+++ b/onnxruntime/core/providers/openvino/contexts.h
@@ -13,12 +13,17 @@ struct GlobalContext {
   InferenceEngine::Core ie_core;
   bool is_wholly_supported_graph = false;
   bool enable_vpu_fast_compile = false;
+  bool use_compiled_network = false;
   size_t num_of_threads;
   std::string device_type;
   std::string precision_str;
   std::string device_id;
+  std::string blob_dump_path;
   std::vector<bool> deviceAvailableList = {true, true, true, true, true, true, true, true};
   std::vector<std::string> deviceTags = {"0", "1", "2", "3", "4", "5", "6", "7"};
+  std::string onnx_model_name;
+  std::string onnx_model_path_name;
+  int onnx_opset_version;
 };
 
 // Holds context specific to subgraph.
diff --git a/onnxruntime/core/providers/openvino/openvino_execution_provider.cc b/onnxruntime/core/providers/openvino/openvino_execution_provider.cc
index e1cd784b65..b25a1f5ea2 100644
--- a/onnxruntime/core/providers/openvino/openvino_execution_provider.cc
+++ b/onnxruntime/core/providers/openvino/openvino_execution_provider.cc
@@ -18,6 +18,9 @@ OpenVINOExecutionProvider::OpenVINOExecutionProvider(const OpenVINOExecutionProv
   openvino_ep::BackendManager::GetGlobalContext().device_type = info.device_type_;
   openvino_ep::BackendManager::GetGlobalContext().precision_str = info.precision_;
   openvino_ep::BackendManager::GetGlobalContext().enable_vpu_fast_compile = info.enable_vpu_fast_compile_;
+  openvino_ep::BackendManager::GetGlobalContext().use_compiled_network = info.use_compiled_network_;
+  openvino_ep::BackendManager::GetGlobalContext().blob_dump_path = info.blob_dump_path_;
+
   if ((int)info.num_of_threads_ <= 0) {
     openvino_ep::BackendManager::GetGlobalContext().num_of_threads = 8;
   } else {
@@ -55,6 +58,14 @@ OpenVINOExecutionProvider::GetCapability(const GraphViewer& graph_viewer, const
   ORT_UNUSED_PARAMETER(kernel_registries);
 
   std::vector<std::unique_ptr<ComputeCapability>> result;
+  openvino_ep::BackendManager::GetGlobalContext().onnx_model_name = graph_viewer.Name();
+#ifdef _WIN32
+  std::wstring onnx_path = graph_viewer.ModelPath().ToPathString();
+  openvino_ep::BackendManager::GetGlobalContext().onnx_model_path_name = std::string(onnx_path.begin(), onnx_path.end());
+#else
+  openvino_ep::BackendManager::GetGlobalContext().onnx_model_path_name = graph_viewer.ModelPath().ToPathString();
+#endif
+  openvino_ep::BackendManager::GetGlobalContext().onnx_opset_version = graph_viewer.DomainToVersionMap().at(kOnnxDomain);
 
 #if defined OPENVINO_2020_3
   result = openvino_ep::GetCapability_2020_3(graph_viewer,
diff --git a/onnxruntime/core/providers/openvino/openvino_execution_provider.h b/onnxruntime/core/providers/openvino/openvino_execution_provider.h
index d547e00e38..3db730e29c 100644
--- a/onnxruntime/core/providers/openvino/openvino_execution_provider.h
+++ b/onnxruntime/core/providers/openvino/openvino_execution_provider.h
@@ -55,9 +55,11 @@ struct OpenVINOExecutionProviderInfo {
   bool enable_vpu_fast_compile_;
   std::string device_id_;
   size_t num_of_threads_;
+  bool use_compiled_network_;
+  std::string blob_dump_path_;
 
-  explicit OpenVINOExecutionProviderInfo(std::string dev_type, bool enable_vpu_fast_compile, std::string dev_id, size_t num_of_threads)
-      : enable_vpu_fast_compile_(enable_vpu_fast_compile), device_id_(dev_id), num_of_threads_(num_of_threads) {
+  explicit OpenVINOExecutionProviderInfo(std::string dev_type, bool enable_vpu_fast_compile, std::string dev_id, size_t num_of_threads, bool use_compiled_network, std::string blob_dump_path)
+      : enable_vpu_fast_compile_(enable_vpu_fast_compile), device_id_(dev_id), num_of_threads_(num_of_threads), use_compiled_network_(use_compiled_network), blob_dump_path_(blob_dump_path) {
     if (dev_type == "") {
       LOGS_DEFAULT(INFO) << "[OpenVINO-EP]"
                          << "No runtime device selection option provided.";
@@ -125,7 +127,7 @@ struct OpenVINOExecutionProviderInfo {
                        << "Choosing Device: " << device_type_ << " , Precision: " << precision_;
   }
   OpenVINOExecutionProviderInfo() {
-    OpenVINOExecutionProviderInfo("", false, "", 0);
+    OpenVINOExecutionProviderInfo("", false, "", 0, false,"");
   }
 };
 
diff --git a/onnxruntime/core/providers/openvino/openvino_provider_factory.cc b/onnxruntime/core/providers/openvino/openvino_provider_factory.cc
index 9ef16dcbbd..6b2b990615 100644
--- a/onnxruntime/core/providers/openvino/openvino_provider_factory.cc
+++ b/onnxruntime/core/providers/openvino/openvino_provider_factory.cc
@@ -8,10 +8,12 @@
 namespace onnxruntime {
 struct OpenVINOProviderFactory : IExecutionProviderFactory {
   OpenVINOProviderFactory(const char* device_type, bool enable_vpu_fast_compile,
-                          const char* device_id, size_t num_of_threads)
-      : enable_vpu_fast_compile_(enable_vpu_fast_compile), num_of_threads_(num_of_threads) {
+                          const char* device_id, size_t num_of_threads,
+                          bool use_compiled_network, const char* blob_dump_path)
+      : enable_vpu_fast_compile_(enable_vpu_fast_compile), num_of_threads_(num_of_threads), use_compiled_network_(use_compiled_network) {
     device_type_ = (device_type == nullptr) ? "" : device_type;
     device_id_ = (device_id == nullptr) ? "" : device_id;
+    blob_dump_path_ = (blob_dump_path == nullptr) ? "" : blob_dump_path;
   }
   ~OpenVINOProviderFactory() override {
   }
@@ -23,16 +25,18 @@ struct OpenVINOProviderFactory : IExecutionProviderFactory {
   bool enable_vpu_fast_compile_;
   std::string device_id_;
   size_t num_of_threads_;
+  bool use_compiled_network_;
+  std::string blob_dump_path_;
 };
 
 std::unique_ptr<IExecutionProvider> OpenVINOProviderFactory::CreateProvider() {
-  OpenVINOExecutionProviderInfo info(device_type_, enable_vpu_fast_compile_, device_id_, num_of_threads_);
+  OpenVINOExecutionProviderInfo info(device_type_, enable_vpu_fast_compile_, device_id_, num_of_threads_, use_compiled_network_, blob_dump_path_);
   return std::make_unique<OpenVINOExecutionProvider>(info);
 }
 
 std::shared_ptr<IExecutionProviderFactory> CreateExecutionProviderFactory_OpenVINO(
-    const char* device_type, bool enable_vpu_fast_compile, const char* device_id, size_t num_of_threads) {
-  return std::make_shared<onnxruntime::OpenVINOProviderFactory>(device_type, enable_vpu_fast_compile, device_id, num_of_threads);
+    const char* device_type, bool enable_vpu_fast_compile, const char* device_id, size_t num_of_threads, bool use_compiled_network, const char* blob_dump_path) {
+  return std::make_shared<onnxruntime::OpenVINOProviderFactory>(device_type, enable_vpu_fast_compile, device_id, num_of_threads, use_compiled_network, blob_dump_path);
 }
 
 }  // namespace onnxruntime
@@ -50,7 +54,7 @@ struct OpenVINO_Provider : Provider {
 
   std::shared_ptr<IExecutionProviderFactory> CreateExecutionProviderFactory(const void* void_params) override {
     auto& params = *reinterpret_cast<const OrtOpenVINOProviderOptions*>(void_params);
-    return std::make_shared<OpenVINOProviderFactory>(params.device_type, params.enable_vpu_fast_compile, params.device_id, params.num_of_threads);
+    return std::make_shared<OpenVINOProviderFactory>(params.device_type, params.enable_vpu_fast_compile, params.device_id, params.num_of_threads, params.use_compiled_network, params.blob_dump_path);
   }
 
   void Shutdown() override {
diff --git a/onnxruntime/python/onnxruntime_pybind_state.cc b/onnxruntime/python/onnxruntime_pybind_state.cc
index a3b0800c0a..64071a89bc 100644
--- a/onnxruntime/python/onnxruntime_pybind_state.cc
+++ b/onnxruntime/python/onnxruntime_pybind_state.cc
@@ -665,6 +665,7 @@ static void RegisterExecutionProviders(InferenceSession* sess, const std::vector
 #ifdef USE_OPENVINO
       OrtOpenVINOProviderOptions params;
       params.device_type = openvino_device_type.c_str();
+      std::string blob_dump_path;
 
       auto it = provider_options_map.find(type);
       if (it != provider_options_map.end()) {
@@ -681,10 +682,22 @@ static void RegisterExecutionProviders(InferenceSession* sess, const std::vector
               ORT_THROW("Invalid value passed for enable_vpu_fast_compile: ", option.second);
             }
 
+          } else if (option.first == "use_compiled_network") {
+            if (option.second == "True") {
+              params.use_compiled_network = true;
+            } else if (option.second == "False") {
+              params.use_compiled_network = false;
+            } else {
+              ORT_THROW("Invalid value passed for use_compiled_network: ", option.second);
+            }
+
           } else if (option.first == "device_id") {
             params.device_id = option.second.c_str();
           } else if (option.first == "num_of_threads") {
             params.num_of_threads = std::stoi(option.second);
+          } else if (option.first == "blob_dump_path") {
+            blob_dump_path = option.second;
+            params.blob_dump_path = blob_dump_path.c_str();
           } else {
             ORT_THROW("Invalid OpenVINO EP option: ", option.first);
           }
@@ -967,7 +980,7 @@ void addGlobalMethods(py::module& m, Environment& env) {
             onnxruntime::CreateExecutionProviderFactory_Dnnl(1),
 #endif
 #ifdef USE_OPENVINO
-            onnxruntime::CreateExecutionProviderFactory_OpenVINO(openvino_device_type, false, "", 8),
+            onnxruntime::CreateExecutionProviderFactory_OpenVINO(openvino_device_type, false, "", 8, false, ""),
 #endif
 #ifdef USE_TENSORRT
             onnxruntime::CreateExecutionProviderFactory_Tensorrt(
diff --git a/onnxruntime/test/perftest/command_args_parser.cc b/onnxruntime/test/perftest/command_args_parser.cc
index 013b646061..c9e8888f6b 100644
--- a/onnxruntime/test/perftest/command_args_parser.cc
+++ b/onnxruntime/test/perftest/command_args_parser.cc
@@ -60,8 +60,10 @@ namespace perftest {
       "\t    [OpenVINO only] [device_id]: Selects a particular hardware device for inference.\n"
       "\t    [OpenVINO only] [enable_vpu_fast_compile]: Optionally enabled to speeds up the model's compilation on VPU device targets.\n"
       "\t    [OpenVINO only] [num_of_threads]: Overrides the accelerator hardware type and precision with these values at runtime.\n"
+      "\t    [OpenVINO only] [use_compiled_network]: Can be enabled to directly import pre-compiled blobs if exists. currently this feature is only supported on MyriadX(VPU) hardware device target.\n"
+      "\t    [OpenVINO only] [blob_dump_path]: Explicitly specify the path where you would like to dump and load the blobs for the use_compiled_network(save/load blob) feature. This overrides the default path.\n"
       "\t [Usage]: -e <provider_name> -i '<key1>|<value1> <key2>|<value2>'\n\n"
-      "\t [Example] [For OpenVINO EP] -e openvino -i 'device_type|CPU_FP32 enable_vpu_fast_compile|true num_of_threads|5'\n"
+      "\t [Example] [For OpenVINO EP] -e openvino -i \"device_type|CPU_FP32 enable_vpu_fast_compile|true num_of_threads|5 use_compiled_network|true blob_dump_path|\"<path>\"\"\n"
       "\t    [TensorRT only] [use_trt_options]: Overrides TensorRT environment variables (if any) with following settings at runtime.\n"		  
       "\t    [TensorRT only] [trt_max_workspace_size]: Set TensorRT maximum workspace size in byte.\n"	  
       "\t    [TensorRT only] [trt_fp16_enable]: Enable TensorRT FP16 precision.\n"
diff --git a/onnxruntime/test/perftest/ort_test_session.cc b/onnxruntime/test/perftest/ort_test_session.cc
index 7e95baf8d1..ed439eb40c 100644
--- a/onnxruntime/test/perftest/ort_test_session.cc
+++ b/onnxruntime/test/perftest/ort_test_session.cc
@@ -162,9 +162,11 @@ OnnxRuntimeTestSession::OnnxRuntimeTestSession(Ort::Env& env, std::random_device
   } else if (provider_name == onnxruntime::kOpenVINOExecutionProvider) {
 #ifdef USE_OPENVINO
     std::string device_type = ""; // [device_type]: Overrides the accelerator hardware type and precision with these values at runtime.
-    bool enable_vpu_fast_compile = false; // [device_id]: Selects a particular hardware device for inference.
-    std::string device_id = ""; // [enable_vpu_fast_compile]: Fast-compile may be optionally enabled to speeds up the model's compilation to VPU device specific format.
+    bool enable_vpu_fast_compile = false; // [enable_vpu_fast_compile]: Fast-compile may be optionally enabled to speeds up the model's compilation to VPU device specific format.
+    std::string device_id = ""; // [device_id]: Selects a particular hardware device for inference.
     size_t num_of_threads = 8; // [num_of_threads]: Overrides the accelerator default value of number of threads with this value at runtime.
+    bool use_compiled_network = false; // [use_compiled_network]: Can be enabled to directly import pre-compiled blobs if exists.
+    std::string blob_dump_path = ""; // [blob_dump_path]: Explicitly specify the path where you would like to dump and load the blobs for the use_compiled_network(save/load blob) feature. This overrides the default path.
 
     #ifdef _MSC_VER
     std::string ov_string = ToMBString(performance_test_config.run_config.ep_runtime_config_string);
@@ -203,14 +205,24 @@ OnnxRuntimeTestSession::OnnxRuntimeTestSession(Ort::Env& env, std::random_device
         } else {
           ORT_THROW("[ERROR] [OpenVINO] The value for the key 'enable_vpu_fast_compile' should be a boolean i.e. true or false. Default value is false.\n");
         }
+      } else if (key == "use_compiled_network") {
+        if(value == "true" || value == "True"){
+          use_compiled_network = true;
+        } else if (value == "false" || value == "False") {
+          use_compiled_network = false;
+        } else {
+          ORT_THROW("[ERROR] [OpenVINO] The value for the key 'use_compiled_network' should be a boolean i.e. true or false. Default value is false.\n");
+        }
       } else if (key == "num_of_threads") {
         std::stringstream sstream(value);
         sstream >> num_of_threads;
         if ((int)num_of_threads <=0) {
           ORT_THROW("[ERROR] [OpenVINO] The value for the key 'num_of_threads' should be greater than 0\n");
         }
+      } else if (key == "blob_dump_path") {
+        blob_dump_path = value;
       } else {
-          ORT_THROW("[ERROR] [OpenVINO] wrong key type entered. Choose from the following runtime key options that are available for OpenVINO. ['device_type', 'device_id', 'enable_vpu_fast_compile', 'num_of_threads'] \n");
+          ORT_THROW("[ERROR] [OpenVINO] wrong key type entered. Choose from the following runtime key options that are available for OpenVINO. ['device_type', 'device_id', 'enable_vpu_fast_compile', 'num_of_threads', 'use_compiled_network', 'blob_dump_path'] \n");
       }
     }
     OrtOpenVINOProviderOptions options;
@@ -218,6 +230,8 @@ OnnxRuntimeTestSession::OnnxRuntimeTestSession(Ort::Env& env, std::random_device
     options.device_id = device_id.c_str(); // To set the device_id
     options.enable_vpu_fast_compile = enable_vpu_fast_compile; // To enable_vpu_fast_compile, default is false
     options.num_of_threads = num_of_threads; // To set number of free InferRequests, default is 8
+    options.use_compiled_network = use_compiled_network; // To use_compiled_network, default is false
+    options.blob_dump_path = blob_dump_path.c_str(); // sets the blob_dump_path, default is ""
     session_options.AppendExecutionProvider_OpenVINO(options);
 #else
     ORT_THROW("OpenVINO is not supported in this build\n");
diff --git a/onnxruntime/test/util/default_providers.cc b/onnxruntime/test/util/default_providers.cc
index 7cb8e4c216..9ea46c991f 100644
--- a/onnxruntime/test/util/default_providers.cc
+++ b/onnxruntime/test/util/default_providers.cc
@@ -18,7 +18,7 @@
 namespace onnxruntime {
 
 std::shared_ptr<IExecutionProviderFactory> CreateExecutionProviderFactory_OpenVINO(
-    const char* device_type, bool enable_vpu_fast_compile, const char* device_id, size_t num_of_threads);
+    const char* device_type, bool enable_vpu_fast_compile, const char* device_id, size_t num_of_threads, bool use_compiled_network, const char* blob_dump_path);
 
 std::shared_ptr<IExecutionProviderFactory> CreateExecutionProviderFactory_Dnnl(int use_arena);
 std::shared_ptr<IExecutionProviderFactory> CreateExecutionProviderFactory_OpenVINO(const OrtOpenVINOProviderOptions* params);