From f7edf0aa57c82315642be3fa248fb1175455d2b7 Mon Sep 17 00:00:00 2001
From: "S. Manohar Karlapalem" <manohar.karlapalem@intel.com>
Date: Mon, 14 Sep 2020 15:46:14 -0700
Subject: [PATCH] [OpenVINO-EP] Enable EP config options for VPU hardware
 (#5119)

* Added config flags for VPU Fast Recompile

* clean-up ifdefs

* Add VPU Fast compile config option

Adds an option that enables Fast compilation of models to VPU
hardware specific format.

* Add config option to choose specific device id for inference

Inference of all subgraphs will be scheduled only on this device
even if other devices of the same type are available.

* Add Python API to list available device IDs

* code cleanup

* Add second C/C++ API with settings string parameter

Adds an additional C/C++ API that allows passing multiple
key-value pairs for settings as a single string. Multiple
settings are delimited by '\n' while the key and value
within a setting are delimited by '|'.

* Append 'Ex' to the extended C/C++ API

* Use set_providers Py API to set config options.

Uses Session.set_providers Python API to set EP runtime config
options as key/val pairs
Deprecated older module function definitions for config settings.
Updates documentation.

* avoid globals for py config options where possible

Co-authored-by: intel <you@example.com>
---
 .../OpenVINO-ExecutionProvider.md             | 70 ++++++++++++++++--
 .../openvino/openvino_provider_factory.h      | 14 +++-
 .../providers/openvino/backend_manager.cc     | 18 ++---
 .../core/providers/openvino/backend_manager.h |  3 +-
 .../core/providers/openvino/backend_utils.cc  |  8 +-
 .../core/providers/openvino/backend_utils.h   |  2 +-
 .../openvino/backends/backend_factory.cc      |  2 +-
 .../openvino/backends/basic_backend.cc        | 19 +++--
 .../openvino/backends/vadm_backend.cc         |  2 +-
 .../core/providers/openvino/contexts.h        |  6 +-
 .../openvino/openvino_execution_provider.cc   | 33 ++++++++-
 .../openvino/openvino_execution_provider.h    | 73 +++++++++----------
 .../openvino/openvino_provider_factory.cc     | 67 ++++++++++++++---
 .../openvino/ov_versions/capabilities.h       |  4 +-
 .../openvino/ov_versions/capability_2020_2.cc | 24 +++---
 .../openvino/ov_versions/capability_2020_4.cc | 28 +++----
 .../python/onnxruntime_pybind_state.cc        | 58 ++++++++++++---
 onnxruntime/test/util/default_providers.cc    |  4 +-
 18 files changed, 305 insertions(+), 130 deletions(-)
diff --git a/docs/execution_providers/OpenVINO-ExecutionProvider.md b/docs/execution_providers/OpenVINO-ExecutionProvider.md
index b14b99821e..92a22bd5d7 100644
--- a/docs/execution_providers/OpenVINO-ExecutionProvider.md
+++ b/docs/execution_providers/OpenVINO-ExecutionProvider.md
@@ -2,37 +2,91 @@
 
 OpenVINO Execution Provider enables deep learning inference on Intel CPUs, Intel integrated GPUs and Intel<sup>®</sup> Movidius<sup>TM</sup> Vision Processing Units (VPUs). Please refer to [this](https://software.intel.com/en-us/openvino-toolkit/hardware) page for details on the Intel hardware supported.
 
-## Build
+### Build
 For build instructions, please see the [BUILD page](../../BUILD.md#openvino).
 
-## Onnxruntime Graph Optimization level
+## Runtime configuration options
+---
+
+OpenVINO EP can be configured with certain options at runtime that control the behavior of the EP. These options can be set as key-value pairs as below:-
+
+### Python API
+Key-Value pairs for config options can be set using the Session.set_providers API as follows:-
+
+```
+session = onnxruntime.InferenceSession(<path_to_model_file>, options)
+session.set_providers(['OpenVINOExecutionProviders'], [{Key1 : Value1, Key2 : Value2, ...}])
+```
+*Note that this causes the InferenceSession to be re-initialized, which may cause model recompilation and hardware re-initialization*
+
+### C/C++ API
+All the options (key-value pairs) need to be concantenated into a string as shown below and passed to OrtSessionOptionsAppendExecutionProviderEx_OpenVINO() API as shown below:-
+
+```
+std::string settings_str;
+settings_str.append("Key1|Value1\n");
+settings_str.append("Key2|Value2\n");
+settings_str.append("Key3|Value3\n");
+Ort::ThrowOnError(OrtSessionOptionsAppendExecutionProviderEx_OpenVINO(sf, settings_str));
+```
+
+### Available configuration options
+The following table lists all the available configuratoin optoins and the Key-Value pairs to set them:-
+
+| **Key** | **Key type** | **Allowable Values** | **Value type** | **Description** |
+| --- | --- | --- | --- | --- | --- |
+| device_type | string | CPU_FP32, GPU_FP32, GPU_FP16, MYRIAD_FP16, VAD-M_FP16, VAD-M_FP32 | string | Overrides the accelerator hardware type and precision with these values at runtime. If this option is not explicitly set, default hardware and precision specified during build time is used. |
+| device_id   | string | Any valid OpenVINO device ID | string | Selects a particular hardware device for inference. The list of valid OpenVINO device ID's available on a platform can be obtained either by Python API (`onnxruntime.capi._pybind_state.get_available_openvino_device_ids()`) or by [OpenVINO C/C++ API](https://docs.openvinotoolkit.org/latest/classInferenceEngine_1_1Core.html#acb212aa879e1234f51b845d2befae41c). If this option is not explicitly set, an arbitrary free device will be automatically selected by OpenVINO runtime.|
+| enable_vpu_fast_recompile | string | True/False | boolean | This option is only available for MYRIAD_FP16 VPU devices. During initialization of the VPU device with compiled model, Fast-compile may be optionally enabled to speeds up the model's compilation to VPU device specific format. This in-turn speeds up model initialization time. However, enabling this option may slowdown inference due to some of the optimizations not being fully applied, so caution is to be exercised while enabling this option. |
+
+## Other configuration settings
+### Onnxruntime Graph Optimization level
 OpenVINO backend performs both hardware dependent as well as independent optimizations to the graph to infer it with on the target hardware with best possible performance. In most of the cases it has been observed that passing in the graph from the input model as is would lead to best possible optimizations by OpenVINO. For this reason, it is advised to turn off high level optimizations performed by ONNX Runtime before handing the graph over to OpenVINO backend. This can be done using Session options as shown below:-
 
-1. Python API
+### Python API
 ```
 options = onnxruntime.SessionOptions()
 options.graph_optimization_level = onnxruntime.GraphOptimizationLevel.ORT_DISABLE_ALL
 sess = onnxruntime.InferenceSession(<path_to_model_file>, options)
 ```
 
-2. C++ API
+### C/C++ API
 ```
 SessionOptions::SetGraphOptimizationLevel(ORT_DISABLE_ALL);
 ```
 
-## Dynamic device selection
+### Deprecated: Dynamic device type selection
+**Note: This API has been deprecated. Please use the Key-Value mechanism mentioned above to set the 'device-type' option.**
 When ONNX Runtime is built with OpenVINO Execution Provider, a target hardware option needs to be provided. This build time option becomes the default target harware the EP schedules inference on. However, this target may be overriden at runtime to schedule inference on a different hardware as shown below.
 
 Note. This dynamic hardware selection is optional. The EP falls back to the build-time default selection if no dynamic hardware option value is specified.
-1. Python API
+
+### Python API
 ```
 import onnxruntime
 onnxruntime.capi._pybind_state.set_openvino_device("<harware_option>")
 # Create session after this
 ```
-2. C/C++ API
+*This property persists and gets applied to new sessions until it is explicity unset. To unset, assign a null string ("").*
+
+### C/C++ API
+
+Append the settings string "device_type|<hardware_option>\n" to the EP settings string. Example shown below for the CPU_FP32 option:
 ```
-Ort::ThrowOnError(OrtSessionOptionsAppendExecutionProvider_OpenVINO(sf, "<hardware_option>"));
+std::string settings_str;
+...
+settings_str.append("device_type|CPU_FP32\n");
+Ort::ThrowOnError(OrtSessionOptionsAppendExecutionProviderEx_OpenVINO(sf, settings_str));
+```
+
+
+### C/C++ API
+Append the settings string "device_id|<device_id>\n" to the EP settings string, where <device_id> is the unique identifier of the hardware device.
+```
+std::string settings_str;
+...
+settings_str.append("device_id|<device_id>\n");
+Ort::ThrowOnError(OrtSessionOptionsAppendExecutionProviderEx_OpenVINO(sf, settings_str));
 ```
 
 ## ONNX Layers supported using OpenVINO
diff --git a/include/onnxruntime/core/providers/openvino/openvino_provider_factory.h b/include/onnxruntime/core/providers/openvino/openvino_provider_factory.h
index d8484d4d3c..3016cefcfb 100644
--- a/include/onnxruntime/core/providers/openvino/openvino_provider_factory.h
+++ b/include/onnxruntime/core/providers/openvino/openvino_provider_factory.h
@@ -5,13 +5,23 @@
 
 #ifdef __cplusplus
 extern "C" {
+#else
+#include <stdbool.h>
 #endif
 
 /**
- * \param device_id openvino device id, starts from zero.
+ * \param device_type openvino device type and precision. Could be any of
+ * CPU_FP32, GPU_FP32, GPU_FP16, MYRIAD_FP16, VAD-M_FP16 or VAD-F_FP32.
  */
 ORT_API_STATUS(OrtSessionOptionsAppendExecutionProvider_OpenVINO,
-    _In_ OrtSessionOptions* options, const char* device_id);
+    _In_ OrtSessionOptions* options, _In_ const char* device_type);
+
+/**
+ * \param settings_str string of Key-Value pairs with '\n' used to delimit
+ * pairs and '|' used to delimit key and value within a pair.
+ */
+ORT_API_STATUS(OrtSessionOptionsAppendExecutionProviderEx_OpenVINO,
+    _In_ OrtSessionOptions* options, _In_ const char* settings_str);
 
 #ifdef __cplusplus
 }
diff --git a/onnxruntime/core/providers/openvino/backend_manager.cc b/onnxruntime/core/providers/openvino/backend_manager.cc
index 1db82800cb..fc016c648c 100644
--- a/onnxruntime/core/providers/openvino/backend_manager.cc
+++ b/onnxruntime/core/providers/openvino/backend_manager.cc
@@ -21,10 +21,8 @@ GlobalContext& BackendManager::GetGlobalContext() {
   return global_context;
 }
 
-BackendManager::BackendManager(const onnxruntime::Node* fused_node, const logging::Logger& logger,
-                               std::string dev_id, std::string prec_str) {
-  subgraph_context_.device_id = dev_id;
-  subgraph_context_.precision_str = prec_str;
+BackendManager::BackendManager(const onnxruntime::Node* fused_node, const logging::Logger& logger) {
+  auto prec_str = GetGlobalContext().precision_str; 
   if (prec_str == "FP32") {
     subgraph_context_.precision = InferenceEngine::Precision::FP32;
   } else if (prec_str == "FP16") {
@@ -51,7 +49,7 @@ BackendManager::BackendManager(const onnxruntime::Node* fused_node, const loggin
 
   auto graph_inputs = fused_node->GetFunctionBody()->Body().GetInputs();
   for (auto input : graph_inputs) {
-    if(subgraph_context_.device_id == "MYRIAD"){
+    if(GetGlobalContext().device_type == "MYRIAD"){
       auto shape = input->Shape();
       if(shape != nullptr){
         if(shape->dim_size() != 4){
@@ -81,7 +79,7 @@ BackendManager::BackendManager(const onnxruntime::Node* fused_node, const loggin
 
   if (ModelHasBatchedInputs(model_proto_) &&
       GetGlobalContext().is_wholly_supported_graph &&
-      subgraph_context_.device_id == "HDDL") {
+      GetGlobalContext().device_type == "HDDL") {
     subgraph_context_.enable_batching = true;
     LOGS_DEFAULT(INFO) << "[OpenVINO-EP] Model can be Batch inferenced \n";
     auto model_copy = ReWriteBatchDimWithOne(model_proto_);
@@ -212,9 +210,9 @@ std::vector<std::vector<int64_t>> GetInputTensorShapes(Ort::CustomOpApi& api,
 }
 
 std::string MakeMapKeyString(std::vector<std::vector<int64_t>>& shapes,
-                             std::string& device_id) {
+                             std::string& device_type) {
   std::string key;
-  key += device_id;
+  key += device_type;
   key += "|";  //separator
   for (auto shape : shapes) {
     for (auto dim : shape) {
@@ -267,9 +265,9 @@ BackendManager::ReWriteBatchDimWithOne(const ONNX_NAMESPACE::ModelProto& model_p
 void BackendManager::Compute(Ort::CustomOpApi api, OrtKernelContext* context) {
   if (subgraph_context_.has_dynamic_input_shape) {
     std::vector<std::vector<int64_t>> tensor_shapes = GetInputTensorShapes(api, context);
-    auto key = MakeMapKeyString(tensor_shapes, subgraph_context_.device_id);
+    auto key = MakeMapKeyString(tensor_shapes, GetGlobalContext().device_type);
 
-    if(subgraph_context_.device_id == "MYRIAD"){
+    if(GetGlobalContext().device_type == "MYRIAD"){
       
       #if (defined OPENVINO_2020_2) || (defined OPENVINO_2020_3)
       for(size_t i = 0; i < subgraph_context_.input_indexes.size(); i++){
diff --git a/onnxruntime/core/providers/openvino/backend_manager.h b/onnxruntime/core/providers/openvino/backend_manager.h
index da3b957eb6..8a17c4815d 100644
--- a/onnxruntime/core/providers/openvino/backend_manager.h
+++ b/onnxruntime/core/providers/openvino/backend_manager.h
@@ -20,8 +20,7 @@ namespace openvino_ep {
 // Singleton class that manages all the backends
 class BackendManager {
  public:
-  BackendManager(const onnxruntime::Node* fused_node, const logging::Logger& logger,
-                 std::string dev_id, std::string prec_str);
+  BackendManager(const onnxruntime::Node* fused_node, const logging::Logger& logger);
   void Compute(Ort::CustomOpApi api, OrtKernelContext* context);
   void ShutdownBackendManager();
   static GlobalContext& GetGlobalContext();
diff --git a/onnxruntime/core/providers/openvino/backend_utils.cc b/onnxruntime/core/providers/openvino/backend_utils.cc
index 37ae030195..96b7afcb55 100644
--- a/onnxruntime/core/providers/openvino/backend_utils.cc
+++ b/onnxruntime/core/providers/openvino/backend_utils.cc
@@ -42,16 +42,13 @@ void DumpOnnxModelProto(const ONNX_NAMESPACE::ModelProto& model_proto, std::stri
 #endif
 
 std::shared_ptr<InferenceEngine::CNNNetwork>
-CreateCNNNetwork(const ONNX_NAMESPACE::ModelProto& model_proto, const SubGraphContext& subgraph_context, std::map<std::string, std::shared_ptr<ngraph::Node>>& const_outputs_map) {
+CreateCNNNetwork(const ONNX_NAMESPACE::ModelProto& model_proto, const GlobalContext& global_context, const SubGraphContext& subgraph_context, std::map<std::string, std::shared_ptr<ngraph::Node>>& const_outputs_map) {
 
 
 #if (defined OPENVINO_2020_2) || (defined OPENVINO_2020_3)
   ORT_UNUSED_PARAMETER(const_outputs_map);
 #endif
 
-  InferenceEngine::Precision precision = subgraph_context.precision;
-  std::string device_id = subgraph_context.device_id;
-
   std::istringstream model_stream{model_proto.SerializeAsString()};
   std::shared_ptr<ngraph::Function> ng_function;
 
@@ -70,7 +67,8 @@ CreateCNNNetwork(const ONNX_NAMESPACE::ModelProto& model_proto, const SubGraphCo
     ORT_THROW(log_tag + "[OpenVINO-EP] Unknown exception while importing model to nGraph Func");
   }
 
-  if (device_id == "GPU" && precision == InferenceEngine::Precision::FP16) {
+  if (global_context.device_type == "GPU" &&
+       subgraph_context.precision == InferenceEngine::Precision::FP16) {
     //FP16 transformations
     ngraph::pass::ConvertFP32ToFP16().run_on_function(ng_function);
     ng_function->validate_nodes_and_infer_types();
diff --git a/onnxruntime/core/providers/openvino/backend_utils.h b/onnxruntime/core/providers/openvino/backend_utils.h
index f72190de08..c7d87889fc 100644
--- a/onnxruntime/core/providers/openvino/backend_utils.h
+++ b/onnxruntime/core/providers/openvino/backend_utils.h
@@ -23,7 +23,7 @@ void SetIODefs(const ONNX_NAMESPACE::ModelProto& model_proto,
                std::map<std::string, std::shared_ptr<ngraph::Node>>& const_outputs_map);
 
 std::shared_ptr<InferenceEngine::CNNNetwork>
-CreateCNNNetwork(const ONNX_NAMESPACE::ModelProto& model_proto, const SubGraphContext& subgraph_context, std::map<std::string,
+CreateCNNNetwork(const ONNX_NAMESPACE::ModelProto& model_proto, const GlobalContext& global_context, const SubGraphContext& subgraph_context, std::map<std::string,
                    std::shared_ptr<ngraph::Node>>& const_outputs_map);
 
 int GetFirstAvailableDevice(GlobalContext& global_context);
diff --git a/onnxruntime/core/providers/openvino/backends/backend_factory.cc b/onnxruntime/core/providers/openvino/backends/backend_factory.cc
index c195ebab92..7dd9e7fda9 100644
--- a/onnxruntime/core/providers/openvino/backends/backend_factory.cc
+++ b/onnxruntime/core/providers/openvino/backends/backend_factory.cc
@@ -16,7 +16,7 @@ std::shared_ptr<IBackend>
 BackendFactory::MakeBackend(const ONNX_NAMESPACE::ModelProto& model_proto,
                             GlobalContext& global_context,
                             const SubGraphContext& subgraph_context) {
-  std::string type = subgraph_context.device_id;
+  std::string type = global_context.device_type;
   if (type == "CPU" || type == "GPU" || type == "MYRIAD" || type == "HETERO:FPGA,CPU") {
     return std::make_shared<BasicBackend>(model_proto, global_context, subgraph_context);
   } else if (type == "HDDL") {
diff --git a/onnxruntime/core/providers/openvino/backends/basic_backend.cc b/onnxruntime/core/providers/openvino/backends/basic_backend.cc
index 871ad3b356..3e9a832cdb 100644
--- a/onnxruntime/core/providers/openvino/backends/basic_backend.cc
+++ b/onnxruntime/core/providers/openvino/backends/basic_backend.cc
@@ -36,7 +36,7 @@ BasicBackend::BasicBackend(const ONNX_NAMESPACE::ModelProto& model_proto,
                            const SubGraphContext& subgraph_context)
     : global_context_(global_context), subgraph_context_(subgraph_context) {
 
-  ie_cnn_network_ = CreateCNNNetwork(model_proto, subgraph_context_, const_outputs_map_);
+  ie_cnn_network_ = CreateCNNNetwork(model_proto, global_context_, subgraph_context_, const_outputs_map_);
   SetIODefs(model_proto, ie_cnn_network_, subgraph_context_.output_names, const_outputs_map_);
   InferenceEngine::ExecutableNetwork exe_network;
 
@@ -49,11 +49,20 @@ BasicBackend::BasicBackend(const ONNX_NAMESPACE::ModelProto& model_proto,
   if(subgraph_context_.is_constant)
     return;
   std::map<std::string, std::string> config;
-  if(subgraph_context_.device_id == "MYRIAD" && subgraph_context_.set_vpu_config){
-    config["VPU_DETECT_NETWORK_BATCH"] = CONFIG_VALUE(NO);
+  if(global_context_.device_type == "MYRIAD"){
+
+    if(subgraph_context_.set_vpu_config) {
+      config["VPU_DETECT_NETWORK_BATCH"] = CONFIG_VALUE(NO);
+    }
+
+    if(global_context_.enable_vpu_fast_compile) {
+      config["VPU_HW_INJECT_STAGES"] = CONFIG_VALUE(NO);
+      config["VPU_COPY_OPTIMIZATION"] = CONFIG_VALUE(NO);
+    }
   }
+  std::string& hw_target = (global_context_.device_id != "") ? global_context_.device_id : global_context_.device_type;
   try {
-    exe_network = global_context_.ie_core.LoadNetwork(*ie_cnn_network_, subgraph_context_.device_id, config);
+    exe_network = global_context_.ie_core.LoadNetwork(*ie_cnn_network_, hw_target, config);
   } catch (InferenceEngine::details::InferenceEngineException e) {
     ORT_THROW(log_tag + " Exception while Loading Network for graph: " + subgraph_context_.subgraph_name + ": " +  e.what());
   } catch (...) {
@@ -228,4 +237,4 @@ void BasicBackend::Infer(Ort::CustomOpApi& ort, OrtKernelContext* context) {
 }
 
 }  // namespace openvino_ep
-}  // namespace onnxruntime
\ No newline at end of file
+}  // namespace onnxruntime
diff --git a/onnxruntime/core/providers/openvino/backends/vadm_backend.cc b/onnxruntime/core/providers/openvino/backends/vadm_backend.cc
index 45becae2a4..d04827600c 100644
--- a/onnxruntime/core/providers/openvino/backends/vadm_backend.cc
+++ b/onnxruntime/core/providers/openvino/backends/vadm_backend.cc
@@ -47,7 +47,7 @@ VADMBackend::VADMBackend(const ONNX_NAMESPACE::ModelProto& model_proto,
   // sets number of maximum parallel inferences
   num_inf_reqs_ = 8;
 
-  ie_cnn_network_ = CreateCNNNetwork(model_proto, subgraph_context_, const_outputs_map_);
+  ie_cnn_network_ = CreateCNNNetwork(model_proto, global_context_, subgraph_context_, const_outputs_map_);
 
   SetIODefs(model_proto, ie_cnn_network_, subgraph_context_.output_names, const_outputs_map_);
   std::map<std::string, std::string> config;
diff --git a/onnxruntime/core/providers/openvino/contexts.h b/onnxruntime/core/providers/openvino/contexts.h
index 9f6b557047..3ddb4b3e98 100644
--- a/onnxruntime/core/providers/openvino/contexts.h
+++ b/onnxruntime/core/providers/openvino/contexts.h
@@ -12,6 +12,10 @@ namespace openvino_ep {
 struct GlobalContext {
   InferenceEngine::Core ie_core;
   bool is_wholly_supported_graph = false;
+  bool enable_vpu_fast_compile = false;
+  std::string device_type;
+  std::string precision_str;
+  std::string device_id;
   std::vector<bool> deviceAvailableList = {true, true, true, true, true, true, true, true};
   std::vector<std::string> deviceTags = {"0", "1", "2", "3", "4", "5", "6", "7"};
 };
@@ -29,9 +33,7 @@ struct SubGraphContext {
   std::unordered_map<std::string, int> input_names;
   #endif
   std::unordered_map<std::string, int> output_names;
-  std::string device_id;
   InferenceEngine::Precision precision;
-  std::string precision_str;
 };
 
 }  // namespace openvino_ep
diff --git a/onnxruntime/core/providers/openvino/openvino_execution_provider.cc b/onnxruntime/core/providers/openvino/openvino_execution_provider.cc
index d2fc3cdc01..1bc7c6438f 100644
--- a/onnxruntime/core/providers/openvino/openvino_execution_provider.cc
+++ b/onnxruntime/core/providers/openvino/openvino_execution_provider.cc
@@ -19,7 +19,30 @@ namespace onnxruntime {
 constexpr const char* OpenVINO = "OpenVINO";
 
 OpenVINOExecutionProvider::OpenVINOExecutionProvider(const OpenVINOExecutionProviderInfo& info)
-    : IExecutionProvider{onnxruntime::kOpenVINOExecutionProvider}, info_(info) {
+    : IExecutionProvider{onnxruntime::kOpenVINOExecutionProvider} {
+
+  openvino_ep::BackendManager::GetGlobalContext().device_type = info.device_type_;
+  openvino_ep::BackendManager::GetGlobalContext().precision_str = info.precision_;
+  openvino_ep::BackendManager::GetGlobalContext().enable_vpu_fast_compile = info.enable_vpu_fast_compile_;
+  if(info.device_id_ != "") {
+    bool device_found = false;
+    auto available_devices = openvino_ep::BackendManager::GetGlobalContext().ie_core.GetAvailableDevices();
+    for(auto device : available_devices) {
+      if(device == info.device_id_) {
+        device_found = true;
+        break;
+      }
+    }
+    if(!device_found) {
+      std::string err_msg = std::string("Device not found : ") + info.device_id_ + "\nChoose one of:\n";
+      for(auto device : available_devices) {
+        err_msg = err_msg + device + "\n";
+      }
+      ORT_THROW(err_msg);
+    }
+  }
+  openvino_ep::BackendManager::GetGlobalContext().device_id = info.device_id_;
+
   AllocatorCreationInfo device_info(
       [](int) {
         return std::make_unique<CPUAllocator>(OrtMemoryInfo(OpenVINO, OrtDeviceAllocator));
@@ -36,9 +59,11 @@ OpenVINOExecutionProvider::GetCapability(const onnxruntime::GraphViewer& graph_v
   std::vector<std::unique_ptr<ComputeCapability>> result;
 
 #if (defined OPENVINO_2020_2) || (defined OPENVINO_2020_3)
-  result = openvino_ep::GetCapability_2020_2(graph_viewer, info_.device_id_);
+  result = openvino_ep::GetCapability_2020_2(graph_viewer,
+                          openvino_ep::BackendManager::GetGlobalContext().device_type);
 #elif defined OPENVINO_2020_4
-  result = openvino_ep::GetCapability_2020_4(graph_viewer, info_.device_id_);
+  result = openvino_ep::GetCapability_2020_4(graph_viewer,
+                          openvino_ep::BackendManager::GetGlobalContext().device_type);
 #endif
 
   return result;
@@ -49,7 +74,7 @@ common::Status OpenVINOExecutionProvider::Compile(
     std::vector<NodeComputeInfo>& node_compute_funcs) {
   for (const auto& fused_node : fused_nodes) {
     NodeComputeInfo compute_info;
-    std::shared_ptr<openvino_ep::BackendManager> backend_manager = std::make_shared<openvino_ep::BackendManager>(fused_node, *GetLogger(), info_.device_id_, info_.precision_);
+    std::shared_ptr<openvino_ep::BackendManager> backend_manager = std::make_shared<openvino_ep::BackendManager>(fused_node, *GetLogger());
 
     compute_info.create_state_func =
         [backend_manager](ComputeContext* context, FunctionState* state) {
diff --git a/onnxruntime/core/providers/openvino/openvino_execution_provider.h b/onnxruntime/core/providers/openvino/openvino_execution_provider.h
index 606b71575f..5e4755ac25 100644
--- a/onnxruntime/core/providers/openvino/openvino_execution_provider.h
+++ b/onnxruntime/core/providers/openvino/openvino_execution_provider.h
@@ -15,63 +15,62 @@ namespace onnxruntime {
 
 // Information needed to construct OpenVINO execution providers.
 struct OpenVINOExecutionProviderInfo {
-  std::string device_id_;
+  std::string device_type_;
   std::string precision_;
+  bool enable_vpu_fast_compile_;
+  std::string device_id_;
 
-  explicit OpenVINOExecutionProviderInfo(std::string dev_id) {
-    if (dev_id == "") {
+  explicit OpenVINOExecutionProviderInfo(std::string dev_type, bool enable_vpu_fast_compile, std::string dev_id)
+            : enable_vpu_fast_compile_(enable_vpu_fast_compile), device_id_(dev_id) {
+
+    if (dev_type == "") {
       LOGS_DEFAULT(INFO) << "[OpenVINO-EP]"
                          << "No runtime device selection option provided.";
-#ifdef OPENVINO_CONFIG_CPU_FP32
-      device_id_ = "CPU";
+      #if defined OPENVINO_CONFIG_CPU_FP32
+      device_type_ = "CPU";
       precision_ = "FP32";
-#endif
-#ifdef OPENVINO_CONFIG_GPU_FP32
-      device_id_ = "GPU";
+      #elif defined OPENVINO_CONFIG_GPU_FP32
+      device_type_ = "GPU";
       precision_ = "FP32";
-#endif
-#ifdef OPENVINO_CONFIG_GPU_FP16
-      device_id_ = "GPU";
+      #elif defined OPENVINO_CONFIG_GPU_FP16
+      device_type_ = "GPU";
       precision_ = "FP16";
-#endif
-#ifdef OPENVINO_CONFIG_MYRIAD
-      device_id_ = "MYRIAD";
+      #elif defined OPENVINO_CONFIG_MYRIAD
+      device_type_ = "MYRIAD";
       precision_ = "FP16";
-#endif
-#ifdef OPENVINO_CONFIG_VAD_M
-      device_id_ = "HDDL";
+      #elif defined OPENVINO_CONFIG_VAD_M
+      device_type_ = "HDDL";
       precision_ = "FP16";
-#endif
-#ifdef OPENVINO_CONFIG_VAD_F
-      device_id_ = "HETERO:FPGA,CPU";
+      #elif defined OPENVINO_CONFIG_VAD_F
+      device_type_ = "HETERO:FPGA,CPU";
       precision_ = "FP32";
-#endif
-    } else if (dev_id == "CPU_FP32") {
-      device_id_ = "CPU";
+      #endif
+    } else if (dev_type == "CPU_FP32") {
+      device_type_ = "CPU";
       precision_ = "FP32";
-    } else if (dev_id == "GPU_FP32") {
-      device_id_ = "GPU";
+    } else if (dev_type == "GPU_FP32") {
+      device_type_ = "GPU";
       precision_ = "FP32";
-    } else if (dev_id == "GPU_FP16") {
-      device_id_ = "GPU";
+    } else if (dev_type == "GPU_FP16") {
+      device_type_ = "GPU";
       precision_ = "FP16";
-    } else if (dev_id == "MYRIAD_FP16") {
-      device_id_ = "MYRIAD";
+    } else if (dev_type == "MYRIAD_FP16") {
+      device_type_ = "MYRIAD";
       precision_ = "FP16";
-    } else if (dev_id == "VAD-M_FP16") {
-      device_id_ = "HDDL";
+    } else if (dev_type == "VAD-M_FP16") {
+      device_type_ = "HDDL";
       precision_ = "FP16";
-    } else if (dev_id == "VAD-F_FP32") {
-      device_id_ = "HETERO:FPGA,CPU";
+    } else if (dev_type == "VAD-F_FP32") {
+      device_type_ = "HETERO:FPGA,CPU";
       precision_ = "FP32";
     } else {
-      ORT_THROW("Invalid device string: " + dev_id);
+      ORT_THROW("Invalid device string: " + dev_type);
     }
     LOGS_DEFAULT(INFO) << "[OpenVINO-EP]"
-                       << "Choosing Device: " << device_id_ << " , Precision: " << precision_;
+                       << "Choosing Device: " << device_type_ << " , Precision: " << precision_;
   }
   OpenVINOExecutionProviderInfo() {
-    OpenVINOExecutionProviderInfo("");
+    OpenVINOExecutionProviderInfo("", false, "");
   }
 };
 
@@ -102,8 +101,6 @@ class OpenVINOExecutionProvider : public IExecutionProvider {
   const void* GetExecutionHandle() const noexcept override {
     return nullptr;
   }
- private:
-  OpenVINOExecutionProviderInfo info_;
 };
 
 }  // namespace onnxruntime
diff --git a/onnxruntime/core/providers/openvino/openvino_provider_factory.cc b/onnxruntime/core/providers/openvino/openvino_provider_factory.cc
index 457bf59063..379afad430 100644
--- a/onnxruntime/core/providers/openvino/openvino_provider_factory.cc
+++ b/onnxruntime/core/providers/openvino/openvino_provider_factory.cc
@@ -7,12 +7,11 @@
 
 namespace onnxruntime {
 struct OpenVINOProviderFactory : IExecutionProviderFactory {
-  OpenVINOProviderFactory(const char* device) {
-    if (device == nullptr) {
-      device_ = "";
-    } else {
-      device_ = device;
-    }
+  OpenVINOProviderFactory(const char* device_type, bool enable_vpu_fast_compile,
+                          const char* device_id)
+    : enable_vpu_fast_compile_(enable_vpu_fast_compile) {
+    device_type_ = (device_type == nullptr) ? "" : device_type;
+    device_id_ = (device_id == nullptr) ? "" : device_id;
   }
   ~OpenVINOProviderFactory() override {
   }
@@ -20,24 +19,68 @@ struct OpenVINOProviderFactory : IExecutionProviderFactory {
   std::unique_ptr<IExecutionProvider> CreateProvider() override;
 
  private:
-  std::string device_;
+  std::string device_type_;
+  bool enable_vpu_fast_compile_;
+  std::string device_id_;
 };
 
 std::unique_ptr<IExecutionProvider> OpenVINOProviderFactory::CreateProvider() {
-  OpenVINOExecutionProviderInfo info(device_);
+  OpenVINOExecutionProviderInfo info(device_type_, enable_vpu_fast_compile_, device_id_);
   return std::make_unique<OpenVINOExecutionProvider>(info);
 }
 
 std::shared_ptr<IExecutionProviderFactory> CreateExecutionProviderFactory_OpenVINO(
-    const char* device_id) {
-  return std::make_shared<onnxruntime::OpenVINOProviderFactory>(device_id);
+    const char* device_type, bool enable_vpu_fast_compile, const char* device_id) {
+  return std::make_shared<onnxruntime::OpenVINOProviderFactory>(device_type, enable_vpu_fast_compile, device_id);
 }
 
 }  // namespace onnxruntime
 
 ORT_API_STATUS_IMPL(OrtSessionOptionsAppendExecutionProvider_OpenVINO,
-                    _In_ OrtSessionOptions* options, const char* device_id) {
+                    _In_ OrtSessionOptions* options, _In_ const char* device_type) {
   options->provider_factories.push_back(
-      onnxruntime::CreateExecutionProviderFactory_OpenVINO(device_id));
+      onnxruntime::CreateExecutionProviderFactory_OpenVINO(device_type, false, ""));
+  return nullptr;
+}
+
+ORT_API_STATUS_IMPL(OrtSessionOptionsAppendExecutionProviderEx_OpenVINOEP,
+                    _In_ OrtSessionOptions* options, _In_ const char* settings_str) {
+
+  std::string device_type = "";
+  bool enable_vpu_fast_compile = false;
+  std::string device_id = "";
+
+  // Parse settings string
+  std::stringstream iss;
+  iss << settings_str;
+  std::string token;
+  while (std::getline(iss, token)) {
+    if(token == "") {
+      continue;
+    }
+    auto pos = token.find("|");
+    if(pos == std::string::npos || pos == 0 || pos == token.length()) {
+      continue;
+    }
+
+    auto key = token.substr(0,pos);
+    auto value = token.substr(pos+1);
+
+    if ( key == "device_type") {
+      device_type = value;
+    } else if (key == "enable_vpu_fast_compile") {
+      if(value == "true" || value == "True"){
+        enable_vpu_fast_compile = true;
+      }
+    } else if(key == "device_id") {
+      device_id = value;
+    }
+
+  }
+
+  options->provider_factories.push_back(
+      onnxruntime::CreateExecutionProviderFactory_OpenVINO(device_type.c_str(),
+                                                           enable_vpu_fast_compile,
+                                                           device_id.c_str()));
   return nullptr;
 }
diff --git a/onnxruntime/core/providers/openvino/ov_versions/capabilities.h b/onnxruntime/core/providers/openvino/ov_versions/capabilities.h
index c6d8394930..3f9c17ab80 100644
--- a/onnxruntime/core/providers/openvino/ov_versions/capabilities.h
+++ b/onnxruntime/core/providers/openvino/ov_versions/capabilities.h
@@ -7,11 +7,11 @@ namespace openvino_ep {
 
 #if (defined OPENVINO_2020_2) || (defined OPENVINO_2020_3)
 std::vector<std::unique_ptr<ComputeCapability>>
-GetCapability_2020_2(const onnxruntime::GraphViewer& graph_viewer, const std::string device_id);
+GetCapability_2020_2(const onnxruntime::GraphViewer& graph_viewer, const std::string device_type);
 
 #elif defined OPENVINO_2020_4
 std::vector<std::unique_ptr<ComputeCapability>>
-GetCapability_2020_4(const onnxruntime::GraphViewer& graph_viewer, const std::string device_id);
+GetCapability_2020_4(const onnxruntime::GraphViewer& graph_viewer, const std::string device_type);
 
 #endif
 
diff --git a/onnxruntime/core/providers/openvino/ov_versions/capability_2020_2.cc b/onnxruntime/core/providers/openvino/ov_versions/capability_2020_2.cc
index 94cb802444..9a875f9bcf 100644
--- a/onnxruntime/core/providers/openvino/ov_versions/capability_2020_2.cc
+++ b/onnxruntime/core/providers/openvino/ov_versions/capability_2020_2.cc
@@ -413,7 +413,7 @@ static bool IsUnsupportedOpMode(const Node* node, const onnxruntime::GraphViewer
   return false;
 }
 
-static bool IsTypeSupported(const NodeArg* node_arg, bool is_initializer, const std::string& device_id) {
+static bool IsTypeSupported(const NodeArg* node_arg, bool is_initializer, const std::string& device_type) {
   const auto* type_proto = node_arg->TypeAsProto();
   if (!type_proto) {
     return false;
@@ -449,7 +449,7 @@ static bool IsTypeSupported(const NodeArg* node_arg, bool is_initializer, const
         ONNX_NAMESPACE::TensorProto_DataType::TensorProto_DataType_INT32};
     auto dtype = type_proto->tensor_type().elem_type();
 
-    if (device_id == "CPU" || device_id == "MYRIAD" || device_id == "HDDL") {
+    if (device_type == "CPU" || device_type == "MYRIAD" || device_type == "HDDL") {
       if (supported_types_cpu.find(dtype) != supported_types_cpu.end())
         return true;
       else {
@@ -460,7 +460,7 @@ static bool IsTypeSupported(const NodeArg* node_arg, bool is_initializer, const
 #endif
         return false;
       }
-    } else if (device_id == "GPU") {
+    } else if (device_type == "GPU") {
       if (supported_types_gpu.find(dtype) != supported_types_gpu.end())
         return true;
       else {
@@ -478,7 +478,7 @@ static bool IsTypeSupported(const NodeArg* node_arg, bool is_initializer, const
 
 static bool IsNodeSupported(const std::map<std::string, std::set<std::string>>& op_map,
                             const onnxruntime::GraphViewer& graph_viewer,
-                            const NodeIndex node_idx, std::string& device_id) {
+                            const NodeIndex node_idx, std::string& device_type) {
   const auto& node = graph_viewer.GetNode(node_idx);
   const auto& optype = node->OpType();
 
@@ -500,7 +500,7 @@ static bool IsNodeSupported(const std::map<std::string, std::set<std::string>>&
   */
 
   //Check 0
-  if (IsUnsupportedOp(optype, device_id)) {
+  if (IsUnsupportedOp(optype, device_type)) {
 #ifndef NDEBUG
     if (openvino_ep::backend_utils::IsDebugEnabled()) {
       std::cout << "Node is in the unsupported list" << std::endl;
@@ -512,13 +512,13 @@ static bool IsNodeSupported(const std::map<std::string, std::set<std::string>>&
   //Check 1
   bool are_types_supported = true;
 
-  node->ForEachDef([&are_types_supported, &graph_viewer, &device_id](const onnxruntime::NodeArg& node_arg, bool is_input) {
+  node->ForEachDef([&are_types_supported, &graph_viewer, &device_type](const onnxruntime::NodeArg& node_arg, bool is_input) {
     bool is_initializer = false;
     if (is_input) {
       if (graph_viewer.IsConstantInitializer(node_arg.Name(), true))
         is_initializer = true;
     }
-    are_types_supported &= IsTypeSupported(&node_arg, is_initializer, device_id);
+    are_types_supported &= IsTypeSupported(&node_arg, is_initializer, device_type);
   });
 
   if (!are_types_supported) {
@@ -528,7 +528,7 @@ static bool IsNodeSupported(const std::map<std::string, std::set<std::string>>&
   //Check 2
 
   bool has_unsupported_dimension = false;
-  node->ForEachDef([&has_unsupported_dimension, &graph_viewer, &device_id](const onnxruntime::NodeArg& node_arg, bool is_input) {
+  node->ForEachDef([&has_unsupported_dimension, &graph_viewer, &device_type](const onnxruntime::NodeArg& node_arg, bool is_input) {
     if (is_input) {
       if (graph_viewer.IsConstantInitializer(node_arg.Name(), true))
         return;
@@ -603,7 +603,7 @@ GetUnsupportedNodeIndices(const GraphViewer& graph_viewer, std::string device, /
 
 
 std::vector<std::unique_ptr<ComputeCapability>>
-GetCapability_2020_2(const onnxruntime::GraphViewer& graph_viewer, const std::string device_id) {
+GetCapability_2020_2(const onnxruntime::GraphViewer& graph_viewer, const std::string device_type) {
   std::vector<std::unique_ptr<ComputeCapability>> result;
 
   if (graph_viewer.IsSubgraph()) {
@@ -621,7 +621,7 @@ GetCapability_2020_2(const onnxruntime::GraphViewer& graph_viewer, const std::st
   // This is a list of initializers that nGraph considers as constants. Example weights, reshape shape etc.
   std::unordered_set<std::string> ng_required_initializers;
 
-  const auto unsupported_nodes = GetUnsupportedNodeIndices(graph_viewer, device_id, ng_required_initializers);
+  const auto unsupported_nodes = GetUnsupportedNodeIndices(graph_viewer, device_type, ng_required_initializers);
 
   //If all ops are supported, no partitioning is required. Short-circuit and avoid splitting.
   if (unsupported_nodes.empty()) {
@@ -666,7 +666,7 @@ GetCapability_2020_2(const onnxruntime::GraphViewer& graph_viewer, const std::st
     auto connected_clusters = GetConnectedClusters(graph_viewer, ng_clusters);
 
     //Myriad plugin can only load 10 subgraphs
-    if (device_id == "MYRIAD" && connected_clusters.size() > 10) {
+    if (device_type == "MYRIAD" && connected_clusters.size() > 10) {
       std::sort(connected_clusters.begin(), connected_clusters.end(),
                 [](const std::vector<NodeIndex>& v1, const std::vector<NodeIndex>& v2) -> bool {
                   return v1.size() > v2.size();
@@ -675,7 +675,7 @@ GetCapability_2020_2(const onnxruntime::GraphViewer& graph_viewer, const std::st
     int no_of_clusters = 0;
 
     for (const auto& this_cluster : connected_clusters) {
-      if (device_id == "MYRIAD" && no_of_clusters == 10) {
+      if (device_type == "MYRIAD" && no_of_clusters == 10) {
         break;
       }
       std::vector<std::string> cluster_graph_inputs, cluster_inputs, const_inputs, cluster_outputs;
diff --git a/onnxruntime/core/providers/openvino/ov_versions/capability_2020_4.cc b/onnxruntime/core/providers/openvino/ov_versions/capability_2020_4.cc
index 101541b3ff..109d8823c1 100644
--- a/onnxruntime/core/providers/openvino/ov_versions/capability_2020_4.cc
+++ b/onnxruntime/core/providers/openvino/ov_versions/capability_2020_4.cc
@@ -426,7 +426,7 @@ static bool IsUnsupportedOpMode(const Node* node, const onnxruntime::GraphViewer
   return false;
 }
 
-static bool IsTypeSupported(const NodeArg* node_arg, bool is_initializer, const std::string& device_id) {
+static bool IsTypeSupported(const NodeArg* node_arg, bool is_initializer, const std::string& device_type) {
   const auto* type_proto = node_arg->TypeAsProto();
   if (!type_proto) {
     return false;
@@ -466,7 +466,7 @@ static bool IsTypeSupported(const NodeArg* node_arg, bool is_initializer, const
     };
     auto dtype = type_proto->tensor_type().elem_type();
 
-    if (device_id == "CPU" || device_id == "MYRIAD" || device_id == "HDDL") {
+    if (device_type == "CPU" || device_type == "MYRIAD" || device_type == "HDDL") {
       if (supported_types_cpu.find(dtype) != supported_types_cpu.end())
         return true;
       else {
@@ -477,7 +477,7 @@ static bool IsTypeSupported(const NodeArg* node_arg, bool is_initializer, const
 #endif
         return false;
       }
-    } else if (device_id == "GPU") {
+    } else if (device_type == "GPU") {
       if (supported_types_gpu.find(dtype) != supported_types_gpu.end())
         return true;
       else {
@@ -495,7 +495,7 @@ static bool IsTypeSupported(const NodeArg* node_arg, bool is_initializer, const
 
 static bool IsNodeSupported(const std::map<std::string, std::set<std::string>>& op_map,
                             const onnxruntime::GraphViewer& graph_viewer,
-                            const NodeIndex node_idx, std::string& device_id) {
+                            const NodeIndex node_idx, std::string& device_type) {
   const auto& node = graph_viewer.GetNode(node_idx);
   const auto& optype = node->OpType();
 
@@ -517,7 +517,7 @@ static bool IsNodeSupported(const std::map<std::string, std::set<std::string>>&
   */
 
   //Check 0
-  if (!IsOpSupported(optype, device_id)) {
+  if (!IsOpSupported(optype, device_type)) {
 #ifndef NDEBUG
     if (openvino_ep::backend_utils::IsDebugEnabled()) {
       std::cout << "Node is not in the supported ops list" << std::endl;
@@ -529,13 +529,13 @@ static bool IsNodeSupported(const std::map<std::string, std::set<std::string>>&
   //Check 1
   bool are_types_supported = true;
 
-  node->ForEachDef([&are_types_supported, &graph_viewer, &device_id](const onnxruntime::NodeArg& node_arg, bool is_input) {
+  node->ForEachDef([&are_types_supported, &graph_viewer, &device_type](const onnxruntime::NodeArg& node_arg, bool is_input) {
     bool is_initializer = false;
     if (is_input) {
       if (graph_viewer.IsConstantInitializer(node_arg.Name(), true))
         is_initializer = true;
     }
-    are_types_supported &= IsTypeSupported(&node_arg, is_initializer, device_id);
+    are_types_supported &= IsTypeSupported(&node_arg, is_initializer, device_type);
   });
 
   if (!are_types_supported) {
@@ -545,7 +545,7 @@ static bool IsNodeSupported(const std::map<std::string, std::set<std::string>>&
   //Check 2
 
   bool has_unsupported_dimension = false;
-  node->ForEachDef([&has_unsupported_dimension, &graph_viewer, &device_id, &optype](const onnxruntime::NodeArg& node_arg, bool is_input) {
+  node->ForEachDef([&has_unsupported_dimension, &graph_viewer, &device_type, &optype](const onnxruntime::NodeArg& node_arg, bool is_input) {
     if (is_input) {
       if (graph_viewer.IsConstantInitializer(node_arg.Name(), true))
         return;
@@ -624,7 +624,7 @@ GetUnsupportedNodeIndices(const GraphViewer& graph_viewer, std::string device, /
 
 
 std::vector<std::unique_ptr<ComputeCapability>>
-GetCapability_2020_4(const onnxruntime::GraphViewer& graph_viewer, std::string device_id) {
+GetCapability_2020_4(const onnxruntime::GraphViewer& graph_viewer, std::string device_type) {
 
   std::vector<std::unique_ptr<ComputeCapability>> result;
 
@@ -643,7 +643,7 @@ GetCapability_2020_4(const onnxruntime::GraphViewer& graph_viewer, std::string d
   // This is a list of initializers that nGraph considers as constants. Example weights, reshape shape etc.
   std::unordered_set<std::string> ng_required_initializers;
 
-  const auto unsupported_nodes = GetUnsupportedNodeIndices(graph_viewer, device_id, ng_required_initializers);
+  const auto unsupported_nodes = GetUnsupportedNodeIndices(graph_viewer, device_type, ng_required_initializers);
   #ifndef NDEBUG
     if(openvino_ep::backend_utils::IsDebugEnabled()){
       std::cout << "No of unsupported nodes " << unsupported_nodes.size() << std::endl;
@@ -702,7 +702,7 @@ GetCapability_2020_4(const onnxruntime::GraphViewer& graph_viewer, std::string d
     auto connected_clusters = GetConnectedClusters(graph_viewer, ng_clusters);
 
     //Myriad plugin can only load 10 subgraphs
-    if (device_id == "MYRIAD" && connected_clusters.size() > 10) {
+    if (device_type == "MYRIAD" && connected_clusters.size() > 10) {
       std::sort(connected_clusters.begin(), connected_clusters.end(),
                 [](const std::vector<NodeIndex>& v1, const std::vector<NodeIndex>& v2) -> bool {
                   return v1.size() > v2.size();
@@ -711,7 +711,7 @@ GetCapability_2020_4(const onnxruntime::GraphViewer& graph_viewer, std::string d
     int no_of_clusters = 0;
 
     for (auto this_cluster : connected_clusters) {
-      if (device_id == "MYRIAD" && no_of_clusters == 10) {
+      if (device_type == "MYRIAD" && no_of_clusters == 10) {
         break;
       }
       std::vector<std::string> cluster_graph_inputs, cluster_inputs, const_inputs, cluster_outputs;
@@ -744,7 +744,7 @@ GetCapability_2020_4(const onnxruntime::GraphViewer& graph_viewer, std::string d
             node->OpType() == "Cast" || node->OpType() == "Concat" || node->OpType() == "Gather"
             || node->OpType() == "Div" || node->OpType() == "Sub"){
 
-            if((node->OpType() == "Div" || node->OpType() == "Sub") && device_id != "MYRIAD")
+            if((node->OpType() == "Div" || node->OpType() == "Sub") && device_type != "MYRIAD")
               continue;
             for (const auto& input : node->InputDefs()) {
               auto input_name = input->Name();
@@ -769,7 +769,7 @@ GetCapability_2020_4(const onnxruntime::GraphViewer& graph_viewer, std::string d
           const bool is_data_int32 = input->Type()->find("int32") != std::string::npos;
           auto it = find(cluster_graph_inputs.begin(), cluster_graph_inputs.end(), input_name);
           if(it != cluster_graph_inputs.end()){
-            if(device_id == "MYRIAD" && is_data_int32){
+            if(device_type == "MYRIAD" && is_data_int32){
               omit_subgraph = true;
               break;
             }
diff --git a/onnxruntime/python/onnxruntime_pybind_state.cc b/onnxruntime/python/onnxruntime_pybind_state.cc
index 8a5ba51849..e00adfc65e 100644
--- a/onnxruntime/python/onnxruntime_pybind_state.cc
+++ b/onnxruntime/python/onnxruntime_pybind_state.cc
@@ -23,6 +23,10 @@
 #include "core/session/abi_session_options_impl.h"
 #include "core/platform/env.h"
 
+#if USE_OPENVINO
+#include <inference_engine.hpp>
+#endif
+
 struct OrtStatus {
   OrtErrorCode code;
   char msg[1];  // a null-terminated string
@@ -150,7 +154,7 @@ onnxruntime::ArenaExtendStrategy arena_extend_strategy = onnxruntime::ArenaExten
 #endif
 #ifdef USE_OPENVINO
 #include "core/providers/openvino/openvino_provider_factory.h"
-std::string openvino_device;
+std::string openvino_device_type;
 #endif
 #ifdef USE_NUPHAR
 #include "core/providers/nuphar/nuphar_provider_factory.h"
@@ -180,7 +184,9 @@ std::shared_ptr<IExecutionProviderFactory> CreateExecutionProviderFactory_Tensor
 std::shared_ptr<IExecutionProviderFactory> CreateExecutionProviderFactory_MIGraphX(int device_id);
 std::shared_ptr<IExecutionProviderFactory> CreateExecutionProviderFactory_Dnnl(int use_arena);
 std::shared_ptr<IExecutionProviderFactory> CreateExecutionProviderFactory_NGraph(const char* ng_backend_type);
-std::shared_ptr<IExecutionProviderFactory> CreateExecutionProviderFactory_OpenVINO(const char* device);
+std::shared_ptr<IExecutionProviderFactory> CreateExecutionProviderFactory_OpenVINO(const char* device_type, 
+                                                                                    bool enable_vpu_fast_compile,
+                                                                                    const char* device_id);
 std::shared_ptr<IExecutionProviderFactory> CreateExecutionProviderFactory_Nuphar(bool, const char*);
 std::shared_ptr<IExecutionProviderFactory> CreateExecutionProviderFactory_VITISAI(const char* backend_type, int device_id);
 std::shared_ptr<IExecutionProviderFactory> CreateExecutionProviderFactory_ACL(int use_arena);
@@ -556,8 +562,33 @@ void RegisterExecutionProviders(InferenceSession* sess, const std::vector<std::s
 #endif
     } else if (type == kOpenVINOExecutionProvider) {
 #ifdef USE_OPENVINO
-      RegisterExecutionProvider(sess, *onnxruntime::CreateExecutionProviderFactory_OpenVINO(openvino_device.c_str()));
-      openvino_device.clear();
+      bool enable_vpu_fast_compile = false;
+      std::string openvino_device_id;
+      auto it = provider_options_map.find(type);
+      if(it != provider_options_map.end()) {
+        for(auto option : it->second) {
+          if(option.first == "device_type") openvino_device_type = option.second;
+          else if (option.first == "enable_vpu_fast_compile") {
+            if(option.second == "True") {
+              enable_vpu_fast_compile = true;
+            } else if (option.second == "False") {
+              enable_vpu_fast_compile = false;
+            } else {
+              ORT_THROW("Invalid value passed for enable_vpu_fast_compile: ", option.second);
+            }
+
+          }
+          else if (option.first == "device_id")  openvino_device_id = option.second;
+          else {
+            ORT_THROW("Invalid OpenVINO EP option: ", option.first);
+          }
+        }
+      }
+      RegisterExecutionProvider(sess, *onnxruntime::CreateExecutionProviderFactory_OpenVINO(openvino_device_type.c_str(),
+                                                                                            enable_vpu_fast_compile,
+                                                                                            openvino_device_id.c_str()));
+      // Reset global variables config to avoid it being accidentally passed on to the next session
+      openvino_device_type.clear();
 #endif
     } else if (type == kNupharExecutionProvider) {
 #if USE_NUPHAR
@@ -687,13 +718,22 @@ void addGlobalMethods(py::module& m, const Environment& env) {
 
 #ifdef USE_OPENVINO
   m.def(
-      "set_openvino_device", [](const std::string& device) { openvino_device = device; },
-      "Set the prefered OpenVINO device(s) to be used. If left unset, all available devices will be used.");
+      "get_available_openvino_device_ids", []() -> std::vector<std::string> {
+        InferenceEngine::Core ie_core;
+        return ie_core.GetAvailableDevices();
+      },
+      "Lists all OpenVINO device ids available.");
+/*
+* The following APIs to set config options are deprecated. Use Session.set_providers() instead.
+*/
+  m.def(
+      "set_openvino_device", [](const std::string& device_type) { openvino_device_type = device_type; },
+      "Set the prefered OpenVINO device type to be used. If left unset, the device type selected during build time will be used.");
   m.def(
       "get_openvino_device", []() -> std::string {
-        return openvino_device;
+        return openvino_device_type;
       },
-      "");
+      "Gets the dynamically selected OpenVINO device type for inference.");
 #endif
 
 #ifdef onnxruntime_PYBIND_EXPORT_OPSCHEMA
@@ -718,7 +758,7 @@ void addGlobalMethods(py::module& m, const Environment& env) {
             onnxruntime::CreateExecutionProviderFactory_NGraph("CPU"),
 #endif
 #ifdef USE_OPENVINO
-            onnxruntime::CreateExecutionProviderFactory_OpenVINO(openvino_device),
+            onnxruntime::CreateExecutionProviderFactory_OpenVINO(openvino_device_type, false, "");
 #endif
 #ifdef USE_TENSORRT
             onnxruntime::CreateExecutionProviderFactory_Tensorrt(0),
diff --git a/onnxruntime/test/util/default_providers.cc b/onnxruntime/test/util/default_providers.cc
index 50976d4c8b..880ddd9fcc 100644
--- a/onnxruntime/test/util/default_providers.cc
+++ b/onnxruntime/test/util/default_providers.cc
@@ -15,7 +15,7 @@ std::shared_ptr<IExecutionProviderFactory> CreateExecutionProviderFactory_CUDA(O
                                                                                ArenaExtendStrategy arena_extend_strategy = ArenaExtendStrategy::kNextPowerOfTwo);
 std::shared_ptr<IExecutionProviderFactory> CreateExecutionProviderFactory_Dnnl(int use_arena);
 std::shared_ptr<IExecutionProviderFactory> CreateExecutionProviderFactory_NGraph(const char* ng_backend_type);
-std::shared_ptr<IExecutionProviderFactory> CreateExecutionProviderFactory_OpenVINO(const char* device_id);
+std::shared_ptr<IExecutionProviderFactory> CreateExecutionProviderFactory_OpenVINO(const char* device_type, bool enable_vpu_fast_compile, const char* device_id);
 std::shared_ptr<IExecutionProviderFactory> CreateExecutionProviderFactory_Nuphar(bool, const char*);
 std::shared_ptr<IExecutionProviderFactory> CreateExecutionProviderFactory_Nnapi();
 std::shared_ptr<IExecutionProviderFactory> CreateExecutionProviderFactory_Rknpu();
@@ -48,7 +48,7 @@ std::unique_ptr<IExecutionProvider> DefaultMIGraphXExecutionProvider() {
 
 std::unique_ptr<IExecutionProvider> DefaultOpenVINOExecutionProvider() {
 #ifdef USE_OPENVINO
-  return CreateExecutionProviderFactory_OpenVINO("")->CreateProvider();
+  return CreateExecutionProviderFactory_OpenVINO("", false, "")->CreateProvider();
 #else
   return nullptr;
 #endif