Openvino ep ort 5.1 (#17042)

OpenVINO EP ORT 5.1 Branch Changes for the new API to take in OpenVINO Provider Options and compatibility with OV 2023.1 ### Motivation and Context The change is required for the new API to take in OpenVINO Provider Options and make it seamless. --------- Signed-off-by: MaajidKhan <n.maajid.khan@intel.com> Co-authored-by: saurabhintel0 <saurabh1.kale@intel.com> Co-authored-by: MaajidKhan <n.maajid.khan@intel.com> Co-authored-by: Suryaprakash Shanmugam <suryaprakash.shanmugam@intel.com> Co-authored-by: Preetha Veeramalai <preetha.veeramalai@intel.com>
2026-06-21 02:18:09 +00:00 · 2023-08-10 00:20:10 +05:30 · 2023-08-10 00:20:10 +05:30 · 2c5d4dce77
commit 2c5d4dce77
parent 03c3e91b0d
27 changed files with 333 additions and 211 deletions
--- a/cmake/CMakeLists.txt
+++ b/cmake/CMakeLists.txt
@ -1236,9 +1236,12 @@ if (onnxruntime_USE_OPENVINO)
  elseif ($ENV{INTEL_OPENVINO_DIR} MATCHES "2023.0")
    set(OPENVINO_VERSION "2023.0")
    add_definitions(-DOPENVINO_2023_0=1)
+  elseif ($ENV{INTEL_OPENVINO_DIR} MATCHES "2023.1")
+    set(OPENVINO_VERSION "2023.1")
+    add_definitions(-DOPENVINO_2023_1=1)
  elseif ($ENV{INTEL_OPENVINO_DIR} MATCHES "openvino")
-    set(OPENVINO_VERSION "2023.0")
-    add_definitions(-DOPENVINO_2023_0=1)
+    set(OPENVINO_VERSION "2023.1")
+    add_definitions(-DOPENVINO_2023_1=1)
  else()
    message(FATAL_ERROR "Unsupported OpenVINO version: ${INTEL_OPENVINO_DIR}")
  endif()
--- a/docs/python/ReadMeOV.rst
+++ b/docs/python/ReadMeOV.rst
@ -7,6 +7,7 @@ OpenVINO™ Execution Provider for ONNX Runtime accelerates inference across man
 - Intel® CPUs
 - Intel® integrated GPUs
 - Intel® discrete GPUs
+ - Intel® integrated VPUs

 Installation
 ------------
@ -15,12 +16,13 @@ Requirements
 ^^^^^^^^^^^^

 - Ubuntu 18.04, 20.04, RHEL(CPU only) or Windows 10 - 64 bit
- Python 3.8, 3.9 or 3.10 for Linux and only Python3.10 for Windows
+- Python 3.8 or 3.9 or 3.10 for Linux and only Python3.10 for Windows

 This package supports:
 - Intel® CPUs
 - Intel® integrated GPUs
 - Intel® discrete GPUs
+ - Intel® integrated VPUs

 ``pip3 install onnxruntime-openvino``

@ -34,7 +36,7 @@ For more details on build and installation please refer to `Build <https://onnxr
 Usage
 ^^^^^

-By default, Intel® CPU is used to run inference. However, you can change the default option to either Intel® integrated or discrete GPU. 
+By default, Intel® CPU is used to run inference. However, you can change the default option to either Intel® integrated or discrete GPU.
 Invoke `the provider config device type argument <https://onnxruntime.ai/docs/execution-providers/OpenVINO-ExecutionProvider.html#summary-of-options>`_ to change the hardware on which inferencing is done.

 For more API calls and environment variables, see  `Usage <https://onnxruntime.ai/docs/execution-providers/OpenVINO-ExecutionProvider.html#configuration-options>`_.
--- a/onnxruntime/core/providers/openvino/backend_manager.cc
+++ b/onnxruntime/core/providers/openvino/backend_manager.cc
@ -7,9 +7,6 @@
 #include <memory>

 #include "core/providers/shared_library/provider_api.h"
-
-#include <inference_engine.hpp>
-
 #include "contexts.h"
 #include "backend_manager.h"
 #include "ibackend.h"
@ -36,11 +33,11 @@ BackendManager::BackendManager(const onnxruntime::Node& fused_node,
                               const logging::Logger& logger) {
  auto prec_str = GetGlobalContext().precision_str;
  if (prec_str == "FP32") {
-    subgraph_context_.precision = InferenceEngine::Precision::FP32;
+    subgraph_context_.precision = "FP32";
  } else if (prec_str == "FP16") {
-    subgraph_context_.precision = InferenceEngine::Precision::FP16;
+    subgraph_context_.precision = "FP16";
  } else if (prec_str == "U8") {
-    subgraph_context_.precision = InferenceEngine::Precision::U8;
+    subgraph_context_.precision = "U8";
  } else {
    throw std::string("Invalid OpenVINO Precision type: " + prec_str);
  }
@ -78,19 +75,17 @@ BackendManager::BackendManager(const onnxruntime::Node& fused_node,
    LOGS_DEFAULT(INFO) << "[OpenVINO-EP] Model has symbolic input dims";
    if (GetGlobalContext().device_type.find("CPU") != std::string::npos ||
        GetGlobalContext().device_type.find("GPU") != std::string::npos) {
-      if (GetGlobalContext().enable_dynamic_shapes) {
-        LOGS_DEFAULT(INFO) << "[OpenVINO-EP] Starting backend initialization. "
-                           << "Creating backend Dynamic Shapes";
-        try {
-          concrete_backend_ = BackendFactory::MakeBackend(*model_proto_,
-                                                          GetGlobalContext(),
-                                                          subgraph_context_);
-        } catch (std::string const& msg) {
-          throw msg;
-        }
-        LOGS_DEFAULT(INFO) << "[OpenVINO-EP] "
-                           << "Backend created for graph " << subgraph_context_.subgraph_name;
+      LOGS_DEFAULT(INFO) << "[OpenVINO-EP] Starting backend initialization. "
+                         << "Creating backend Dynamic Shapes";
+      try {
+        concrete_backend_ = BackendFactory::MakeBackend(*model_proto_,
+                                                        GetGlobalContext(),
+                                                        subgraph_context_);
+      } catch (std::string const& msg) {
+        throw msg;
      }
+      LOGS_DEFAULT(INFO) << "[OpenVINO-EP] "
+                         << "Backend created for graph " << subgraph_context_.subgraph_name;
    }
  } else {
    LOGS_DEFAULT(INFO) << "[OpenVINO-EP] Model has concrete input dims. Initializing backend for graph " << subgraph_context_.subgraph_name;
@ -257,7 +252,7 @@ void BackendManager::Compute(OrtKernelContext* context) {
  }
 #endif
  bool use_dynamic_backend = true;
-  if (GetGlobalContext().enable_dynamic_shapes && subgraph_context_.has_dynamic_input_shape &&
+  if (subgraph_context_.has_dynamic_input_shape &&
      (GetGlobalContext().device_type.find("CPU") != std::string::npos ||
       GetGlobalContext().device_type.find("GPU") != std::string::npos)) {
    concrete_backend_->Infer(context);
--- a/onnxruntime/core/providers/openvino/backend_utils.cc
+++ b/onnxruntime/core/providers/openvino/backend_utils.cc
@ -8,8 +8,8 @@
 #include <fstream>

 #include "ov_interface.h"
-#include <ngraph/pass/convert_fp32_to_fp16.hpp>
-#include <ngraph/pass/constant_folding.hpp>
+#include "openvino/pass/convert_fp32_to_fp16.hpp"
+#include "openvino/pass/constant_folding.hpp"
 #include "core/providers/shared_library/provider_api.h"
 #include "backend_utils.h"

@ -50,14 +50,14 @@ struct static_cast_int64 {
 std::shared_ptr<OVNetwork>
 CreateOVModel(const ONNX_NAMESPACE::ModelProto& model_proto, const GlobalContext& global_context,
              const SubGraphContext& subgraph_context,
-              std::map<std::string, std::shared_ptr<ngraph::Node>>& const_outputs_map) {
+              std::map<std::string, std::shared_ptr<ov::Node>>& const_outputs_map) {
  if (IsCILogEnabled()) {
    std::cout << "CreateNgraphFunc" << std::endl;
  }
  const std::string model = model_proto.SerializeAsString();
  try {
    auto cnn_network = global_context.ie_core.ReadModel(model);
-    if ((subgraph_context.precision == InferenceEngine::Precision::FP16) &&
+    if ((subgraph_context.precision == "FP16") &&
        (global_context.device_type.find("VPUX") == std::string::npos)) {
      // FP16 transformations
      ov::pass::ConvertFP32ToFP16 pass_obj;
@ -88,7 +88,7 @@ CreateOVModel(const ONNX_NAMESPACE::ModelProto& model_proto, const GlobalContext
      size_t index = results.size() - 1;

      for (auto it = results.rbegin(); it != results.rend(); ++it) {
-        if (auto const_node = std::dynamic_pointer_cast<ngraph::op::Constant>((*it)->input_value(0).get_node_shared_ptr())) {
+        if (auto const_node = std::dynamic_pointer_cast<ov::op::v0::Constant>((*it)->input_value(0).get_node_shared_ptr())) {
          const_outputs_map[(*it)->get_friendly_name()] = const_node;
          results.erase(results.begin() + index);
        }
@ -96,12 +96,11 @@ CreateOVModel(const ONNX_NAMESPACE::ModelProto& model_proto, const GlobalContext
      }
    }
 #ifndef NDEBUG
-#if defined(OPENVINO_2022_3) || (OPENVINO_2023_0)
+#if defined(OPENVINO_2022_3) || (OPENVINO_2023_0) || (OPENVINO_2023_1)
    if (IsDebugEnabled()) {
      std::string name = cnn_network->get_friendly_name();
      ov::pass::Serialize serializer(name + ".xml", name + ".bin");
      serializer.run_on_model(cnn_network);
-      ngraph::plot_graph(cnn_network, name + "_executable" + ".dot");
    }
 #endif
 #endif
@ -111,31 +110,6 @@ CreateOVModel(const ONNX_NAMESPACE::ModelProto& model_proto, const GlobalContext
  }
 }

-InferenceEngine::Precision ConvertPrecisionONNXToOpenVINO(const ONNX_NAMESPACE::TypeProto& onnx_type) {
-  ONNX_NAMESPACE::DataType type_string = ONNX_NAMESPACE::Utils::DataTypeUtils::ToType(onnx_type);
-  if (*type_string == "float" || *type_string == "tensor(float)") {
-    return InferenceEngine::Precision::FP32;
-  } else if (*type_string == "float16" || *type_string == "tensor(float16)") {
-    return InferenceEngine::Precision::FP16;
-  } else if (*type_string == "int32" || *type_string == "tensor(int32)") {
-    return InferenceEngine::Precision::I32;
-  } else if (*type_string == "int16" || *type_string == "tensor(int16)") {
-    return InferenceEngine::Precision::I16;
-  } else if (*type_string == "int8" || *type_string == "tensor(int8)") {
-    return InferenceEngine::Precision::I8;
-  } else if (*type_string == "uint16" || *type_string == "tensor(uint16)") {
-    return InferenceEngine::Precision::U16;
-  } else if (*type_string == "uint8" || *type_string == "tensor(uint8)") {
-    return InferenceEngine::Precision::U8;
-  } else if (*type_string == "bool" || *type_string == "tensor(bool)") {
-    return InferenceEngine::Precision::U8;
-  } else if (*type_string == "int64" || *type_string == "tensor(int64)") {
-    return InferenceEngine::Precision::I32;
-  } else {
-    throw std::string(log_tag + "Unsupported Data type");
-  }
-}
-
 Ort::UnownedValue
 GetOutputTensor(Ort::KernelContext& context, size_t batch_size,
                OVInferRequestPtr infer_request,
@ -166,7 +140,7 @@ Ort::UnownedValue
 GetOutputTensor(Ort::KernelContext& context,
                std::string output_name,
                std::unordered_map<std::string, int> output_names,
-                std::shared_ptr<ngraph::Node> node) {
+                std::shared_ptr<ov::Node> node) {
  // Find position of '/' in the output_name
  int pos = output_name.find("/");
  // Copy the substring from start to pos
@ -210,25 +184,25 @@ int GetFirstAvailableDevice(GlobalContext& global_context) {
  return i;
 }

-void FillOutputsWithConstantData(std::shared_ptr<ngraph::Node> node, Ort::UnownedValue& out_tensor) {
+void FillOutputsWithConstantData(std::shared_ptr<ov::Node> node, Ort::UnownedValue& out_tensor) {
  switch (node->get_element_type()) {
-    case ngraph::element::Type_t::f32: {
+    case ov::element::Type_t::f32: {
      FillOutputHelper<float>(out_tensor, node);
      break;
    }
-    case ngraph::element::Type_t::boolean: {
+    case ov::element::Type_t::boolean: {
      FillOutputHelper<char>(out_tensor, node);
      break;
    }
-    case ngraph::element::Type_t::i32: {
+    case ov::element::Type_t::i32: {
      FillOutputHelper<int32_t>(out_tensor, node);
      break;
    }
-    case ngraph::element::Type_t::i64: {
+    case ov::element::Type_t::i64: {
      FillOutputHelper<int64_t>(out_tensor, node);
      break;
    }
-    case ngraph::element::Type_t::f16: {
+    case ov::element::Type_t::f16: {
      FillOutputHelper<float>(out_tensor, node);
      break;
    }
@ -237,14 +211,22 @@ void FillOutputsWithConstantData(std::shared_ptr<ngraph::Node> node, Ort::Unowne
  }
 }

+#if defined(_MSC_VER)
+#pragma warning(disable : 4127)
+#endif
+
 template <typename T>
-void FillOutputHelper(Ort::UnownedValue& out_tensor, std::shared_ptr<ngraph::Node> node) {
-  auto const_node = std::dynamic_pointer_cast<ngraph::op::Constant>(node);
+void FillOutputHelper(Ort::UnownedValue& out_tensor, std::shared_ptr<ov::Node> node) {
+  auto const_node = std::dynamic_pointer_cast<ov::op::v0::Constant>(node);
  auto res = const_node->cast_vector<T>();
  T* tensor_data = out_tensor.GetTensorMutableData<T>();
  std::copy(res.begin(), res.end(), tensor_data);
 }

+#if defined(_MSC_VER)
+#pragma warning(default : 4127)
+#endif
+
 void FillInputBlob(OVTensorPtr inputBlob, size_t batch_slice_idx,
                   std::string input_name, Ort::KernelContext& context,
                   const SubGraphContext& subgraph_context) {
--- a/onnxruntime/core/providers/openvino/backend_utils.h
+++ b/onnxruntime/core/providers/openvino/backend_utils.h
@ -32,19 +32,16 @@ bool IsCILogEnabled();

 int GetFirstAvailableDevice(GlobalContext& global_context);

-void FillOutputsWithConstantData(std::shared_ptr<ngraph::Node> node, Ort::UnownedValue& out_tensor);
+void FillOutputsWithConstantData(std::shared_ptr<ov::Node> node, Ort::UnownedValue& out_tensor);

 template <typename T>
-void FillOutputHelper(Ort::UnownedValue& out_tensor, std::shared_ptr<ngraph::Node> node);
+void FillOutputHelper(Ort::UnownedValue& out_tensor, std::shared_ptr<ov::Node> node);

 Ort::UnownedValue
 GetOutputTensor(Ort::KernelContext& context,
                std::string output_name,
                std::unordered_map<std::string, int> output_names,
-                std::shared_ptr<ngraph::Node> node);
-
-InferenceEngine::Precision
-ConvertPrecisionONNXToOpenVINO(const ONNX_NAMESPACE::TypeProto& onnx_type);
+                std::shared_ptr<ov::Node> node);

 Ort::UnownedValue
 GetOutputTensor(Ort::KernelContext& context, size_t batch_size,
@ -61,7 +58,7 @@ void FillOutputBlob(OVTensorPtr outputBlob, Ort::UnownedValue& output_tensor,

 std::shared_ptr<OVNetwork>
 CreateOVModel(const ONNX_NAMESPACE::ModelProto& model_proto, const GlobalContext& global_context, const SubGraphContext& subgraph_context,
-              std::map<std::string, std::shared_ptr<ngraph::Node>>& const_outputs_map);
+              std::map<std::string, std::shared_ptr<ov::Node>>& const_outputs_map);

 void printPerformanceCounts(const std::vector<OVProfilingInfo>& performanceMap,
                            std::ostream& stream, std::string deviceName);
--- a/onnxruntime/core/providers/openvino/backends/basic_backend.cc
+++ b/onnxruntime/core/providers/openvino/backends/basic_backend.cc
@ -9,7 +9,7 @@

 #include "core/providers/shared_library/provider_api.h"
 #include "../backend_utils.h"
-#include <ngraph/pass/constant_folding.hpp>
+// #include <ngraph/pass/constant_folding.hpp>
 #include "basic_backend.h"
 #include "../backend_manager.h"

@ -37,6 +37,9 @@ BasicBackend::BasicBackend(const ONNX_NAMESPACE::ModelProto& model_proto,
  // Setting OpenCL queue throttling for GPU
  EnableGPUThrottling(device_config);

+  // Enable streams; default=1 unless ovverriden by user config
+  EnableStreams();
+
 #ifndef NDEBUG
  if (IsDebugEnabled()) {
    std::string file_name = subgraph_context.subgraph_name + "_static.onnx";
@ -45,6 +48,7 @@ BasicBackend::BasicBackend(const ONNX_NAMESPACE::ModelProto& model_proto,
  }
 #endif
  try {
+    std::string dev_prec = global_context.device_type + "_" + global_context_.precision_str;
    if (global_context.is_wholly_supported_graph) {
 #if defined(IO_BUFFER_ENABLED)
      if ((global_context.device_type.find("GPU") != std::string::npos) &&
@ -61,8 +65,8 @@ BasicBackend::BasicBackend(const ONNX_NAMESPACE::ModelProto& model_proto,
        LOGS_DEFAULT(INFO) << log_tag << "Loaded model to the plugin";
      }
 #else
-#if defined(OPENVINO_2023_0)
-      if (subgraph_context.precision != InferenceEngine::Precision::FP16 && global_context_.enable_dynamic_shapes == false) {
+#if defined(OPENVINO_2023_0) || (OPENVINO_2023_1)
+      if (!subgraph_context_.has_dynamic_input_shape && dev_prec != "CPU_FP16") {
        const std::string model = model_proto.SerializeAsString();
        exe_network_ = global_context_.ie_core.LoadNetwork(model, hw_target, device_config, subgraph_context_.subgraph_name);
        LOGS_DEFAULT(INFO) << log_tag << "Loaded model to the plugin";
@ -98,7 +102,7 @@ BasicBackend::BasicBackend(const ONNX_NAMESPACE::ModelProto& model_proto,
  inferRequestsQueue_ = std::unique_ptr<InferRequestsQueue>(new InferRequestsQueue(exe_network_, nireq));
 }

-bool BasicBackend::ValidateSubgraph(std::map<std::string, std::shared_ptr<ngraph::Node>>& const_outputs_map) {
+bool BasicBackend::ValidateSubgraph(std::map<std::string, std::shared_ptr<ov::Node>>& const_outputs_map) {
  if (const_outputs_map.size() == subgraph_context_.output_names.size())
    subgraph_context_.is_constant = true;
  if (subgraph_context_.is_constant) {
@ -109,20 +113,23 @@ bool BasicBackend::ValidateSubgraph(std::map<std::string, std::shared_ptr<ngraph
 }

 void BasicBackend::PopulateConfigValue(ov::AnyMap& device_config) {
-  // Set inference precision if device_type != AUTO
-  // if (global_context_.device_type.find("GPU_FP16")!= std::string::npos){
-  //   device_config.emplace(ov::hint::inference_precision(global_context_.precision_str));
-  // }
  device_config = {};
+  // Set inference precision based on device precision for OV backend
+  if (global_context_.precision_str.find("FP16") != std::string::npos && global_context_.device_type == "GPU") {
+    device_config.emplace(ov::hint::inference_precision("f16"));
+  }
+  if (global_context_.precision_str.find("FP32") != std::string::npos) {
+    device_config.emplace(ov::hint::inference_precision("f32"));
+  }
 #ifndef NDEBUG
  if (openvino_ep::backend_utils::IsDebugEnabled()) {
    device_config.emplace(ov::enable_profiling(true));
  }
 #endif
-#if defined(OPENVINO_2023_0)
+#if defined(OPENVINO_2023_0) || (OPENVINO_2023_1)
  if (global_context_.device_type.find("VPUX") != std::string::npos) {
    std::pair<std::string, ov::Any> device_property;
-    device_property = std::make_pair("VPUX_COMPILER_TYPE", "MLIR");
+    device_property = std::make_pair("VPU_COMPILER_TYPE", "MLIR");
    device_config.emplace(ov::device::properties("VPUX", device_property));
  }
 #endif
@ -147,10 +154,17 @@ void BasicBackend::EnableCaching() {
 void BasicBackend::EnableGPUThrottling(ov::AnyMap& device_config) {
  if (global_context_.enable_opencl_throttling == true && global_context_.device_type.find("GPU") != std::string::npos) {
    LOGS_DEFAULT(INFO) << log_tag << "Enabled OpenCL queue throttling for GPU device";
-    device_config[GPU_CONFIG_KEY(PLUGIN_THROTTLE)] = "1";
+    std::pair<std::string, ov::Any> device_property;
+    device_property = std::make_pair("PLUGIN_THROTTLE", "1");
+    device_config.emplace(ov::device::properties("GPU_CONFIG_KEY", device_property));
+    // device_config[GPU_CONFIG_KEY(PLUGIN_THROTTLE)] = "1";
  }
 }

+void BasicBackend::EnableStreams() {
+  global_context_.ie_core.SetStreams(global_context_.device_type, global_context_.num_streams);
+}
+
 // Starts an asynchronous inference request for data in slice indexed by batch_slice_idx on
 // an Infer Request indexed by infer_req_idx
 void BasicBackend::StartAsyncInference(Ort::KernelContext& context, OVInferRequestPtr infer_request) {
@ -177,7 +191,6 @@ void BasicBackend::StartAsyncInference(Ort::KernelContext& context, OVInferReque
      }
      size_t batch_slice_idx = 0;
      if (subgraph_context_.has_dynamic_input_shape &&
-          global_context_.enable_dynamic_shapes == true &&
          (global_context_.device_type.find("CPU") != std::string::npos ||
           global_context_.device_type.find("GPU") != std::string::npos)) {
        auto tensor = context.GetInput(subgraph_context_.input_names.at(input_name));
--- a/onnxruntime/core/providers/openvino/backends/basic_backend.h
+++ b/onnxruntime/core/providers/openvino/backends/basic_backend.h
@ -31,10 +31,11 @@ class BasicBackend : public IBackend {
 private:
  bool ImportBlob(std::string hw_target, bool vpu_status);
  void PopulateCompiledDirectory(std::string, std::string&, std::string&, bool&);
-  bool ValidateSubgraph(std::map<std::string, std::shared_ptr<ngraph::Node>>& const_outputs_map);
+  bool ValidateSubgraph(std::map<std::string, std::shared_ptr<ov::Node>>& const_outputs_map);
  void PopulateConfigValue(ov::AnyMap& device_config);
  void EnableCaching();
  void EnableGPUThrottling(ov::AnyMap& device_config);
+  void EnableStreams();
  void StartAsyncInference(Ort::KernelContext& context, std::shared_ptr<OVInferRequest> infer_request);

 #ifdef IO_BUFFER_ENABLED
@ -48,7 +49,7 @@ class BasicBackend : public IBackend {
  mutable std::mutex compute_lock_;
  std::shared_ptr<OVNetwork> ie_cnn_network_;
  OVExeNetwork exe_network_;
-  std::map<std::string, std::shared_ptr<ngraph::Node>> const_outputs_map_;
+  std::map<std::string, std::shared_ptr<ov::Node>> const_outputs_map_;
  std::unique_ptr<InferRequestsQueue> inferRequestsQueue_;
 #if defined IO_BUFFER_ENABLED
  OVRemoteContextPtr remote_context_;
--- a/onnxruntime/core/providers/openvino/contexts.h
+++ b/onnxruntime/core/providers/openvino/contexts.h
@ -20,6 +20,7 @@ struct GlobalContext {
  std::string precision_str;
  std::string device_id;
  std::string cache_dir;
+  int num_streams;
  std::vector<bool> deviceAvailableList = {true, true, true, true, true, true, true, true};
  std::vector<std::string> deviceTags = {"0", "1", "2", "3", "4", "5", "6", "7"};
  std::string onnx_model_name;
@ -40,7 +41,7 @@ struct SubGraphContext {
  std::vector<int> input_indexes;
  std::unordered_map<std::string, int> input_names;
  std::unordered_map<std::string, int> output_names;
-  OVPrecision precision;
+  std::string precision;
 };

 }  // namespace openvino_ep
--- a/onnxruntime/core/providers/openvino/openvino_execution_provider.cc
+++ b/onnxruntime/core/providers/openvino/openvino_execution_provider.cc
@ -19,6 +19,7 @@ OpenVINOExecutionProvider::OpenVINOExecutionProvider(const OpenVINOExecutionProv
  openvino_ep::BackendManager::GetGlobalContext().precision_str = info.precision_;
  openvino_ep::BackendManager::GetGlobalContext().enable_vpu_fast_compile = info.enable_vpu_fast_compile_;
  openvino_ep::BackendManager::GetGlobalContext().cache_dir = info.cache_dir_;
+  openvino_ep::BackendManager::GetGlobalContext().num_streams = info.num_streams_;
  openvino_ep::BackendManager::GetGlobalContext().context = info.context_;
  openvino_ep::BackendManager::GetGlobalContext().enable_opencl_throttling = info.enable_opencl_throttling_;
  openvino_ep::BackendManager::GetGlobalContext().enable_dynamic_shapes = info.enable_dynamic_shapes_;
@ -130,6 +131,10 @@ OpenVINOExecutionProvider::GetCapability(const GraphViewer& graph_viewer,
  openvino_ep::GetCapability obj(graph_viewer,
                                 openvino_ep::BackendManager::GetGlobalContext().device_type, "V_2023_0");
  result = obj.Execute();
+#elif defined(OPENVINO_2023_1)
+  openvino_ep::GetCapability obj(graph_viewer,
+                                 openvino_ep::BackendManager::GetGlobalContext().device_type, "V_2023_1");
+  result = obj.Execute();
 #endif

  return result;
--- a/onnxruntime/core/providers/openvino/openvino_execution_provider.h
+++ b/onnxruntime/core/providers/openvino/openvino_execution_provider.h
@ -57,15 +57,16 @@ struct OpenVINOExecutionProviderInfo {
  std::string device_id_;
  size_t num_of_threads_;
  std::string cache_dir_;
+  int num_streams_;
  void* context_;
  bool enable_opencl_throttling_;
  bool enable_dynamic_shapes_;

  explicit OpenVINOExecutionProviderInfo(std::string dev_type, bool enable_vpu_fast_compile, std::string dev_id,
-                                         size_t num_of_threads, std::string cache_dir,
+                                         size_t num_of_threads, std::string cache_dir, int num_streams,
                                         void* context, bool enable_opencl_throttling,
                                         bool enable_dynamic_shapes)
-      : enable_vpu_fast_compile_(enable_vpu_fast_compile), device_id_(dev_id), num_of_threads_(num_of_threads), cache_dir_(cache_dir), context_(context), enable_opencl_throttling_(enable_opencl_throttling), enable_dynamic_shapes_(enable_dynamic_shapes) {
+      : enable_vpu_fast_compile_(enable_vpu_fast_compile), device_id_(dev_id), num_of_threads_(num_of_threads), cache_dir_(cache_dir), num_streams_(num_streams), context_(context), enable_opencl_throttling_(enable_opencl_throttling), enable_dynamic_shapes_(enable_dynamic_shapes) {
    if (dev_type == "") {
      LOGS_DEFAULT(INFO) << "[OpenVINO-EP]"
                         << "No runtime device selection option provided.";
@ -149,7 +150,7 @@ struct OpenVINOExecutionProviderInfo {
                       << "Choosing Device: " << device_type_ << " , Precision: " << precision_;
  }
  OpenVINOExecutionProviderInfo() {
-    OpenVINOExecutionProviderInfo("", false, "", 0, "", NULL, false, false);
+    OpenVINOExecutionProviderInfo("", false, "", 0, "", 1, NULL, false, false);
  }
 };

--- a/onnxruntime/core/providers/openvino/openvino_provider_factory.cc
+++ b/onnxruntime/core/providers/openvino/openvino_provider_factory.cc
@ -3,16 +3,16 @@

 #include "core/providers/shared_library/provider_api.h"
 #include "core/providers/openvino/openvino_provider_factory.h"
-#include "openvino_execution_provider.h"
-#include "openvino_provider_factory_creator.h"
+#include "core/providers/openvino/openvino_execution_provider.h"
+#include "core/providers/openvino/openvino_provider_factory_creator.h"

 namespace onnxruntime {
 struct OpenVINOProviderFactory : IExecutionProviderFactory {
  OpenVINOProviderFactory(const char* device_type, bool enable_vpu_fast_compile,
                          const char* device_id, size_t num_of_threads,
-                          const char* cache_dir, void* context,
+                          const char* cache_dir, int num_streams, void* context,
                          bool enable_opencl_throttling, bool enable_dynamic_shapes)
-      : enable_vpu_fast_compile_(enable_vpu_fast_compile), num_of_threads_(num_of_threads), context_(context), enable_opencl_throttling_(enable_opencl_throttling), enable_dynamic_shapes_(enable_dynamic_shapes) {
+      : enable_vpu_fast_compile_(enable_vpu_fast_compile), num_of_threads_(num_of_threads), num_streams_(num_streams), context_(context), enable_opencl_throttling_(enable_opencl_throttling), enable_dynamic_shapes_(enable_dynamic_shapes) {
    device_type_ = (device_type == nullptr) ? "" : device_type;
    device_id_ = (device_id == nullptr) ? "" : device_id;
    cache_dir_ = (cache_dir == nullptr) ? "" : cache_dir;
@ -28,6 +28,7 @@ struct OpenVINOProviderFactory : IExecutionProviderFactory {
  std::string device_id_;
  size_t num_of_threads_;
  std::string cache_dir_;
+  int num_streams_;
  void* context_;
  bool enable_opencl_throttling_;
  bool enable_dynamic_shapes_;
@ -35,20 +36,11 @@ struct OpenVINOProviderFactory : IExecutionProviderFactory {

 std::unique_ptr<IExecutionProvider> OpenVINOProviderFactory::CreateProvider() {
  OpenVINOExecutionProviderInfo info(device_type_, enable_vpu_fast_compile_, device_id_, num_of_threads_,
-                                     cache_dir_, context_, enable_opencl_throttling_,
+                                     cache_dir_, num_streams_, context_, enable_opencl_throttling_,
                                     enable_dynamic_shapes_);
  return std::make_unique<OpenVINOExecutionProvider>(info);
 }

-std::shared_ptr<IExecutionProviderFactory> CreateExecutionProviderFactory_OpenVINO(
-    const char* device_type, bool enable_vpu_fast_compile, const char* device_id, size_t num_of_threads,
-    const char* cache_dir, void* context, bool enable_opencl_throttling,
-    bool enable_dynamic_shapes) {
-  return std::make_shared<onnxruntime::OpenVINOProviderFactory>(device_type, enable_vpu_fast_compile,
-                                                                device_id, num_of_threads, cache_dir, context, enable_opencl_throttling,
-                                                                enable_dynamic_shapes);
-}
-
 }  // namespace onnxruntime

 namespace onnxruntime {
@ -63,12 +55,81 @@ struct OpenVINO_Provider : Provider {
  void* GetInfo() override { return &g_info; }

  std::shared_ptr<IExecutionProviderFactory> CreateExecutionProviderFactory(const void* void_params) override {
-    auto& params = *reinterpret_cast<const OrtOpenVINOProviderOptions*>(void_params);
-    return std::make_shared<OpenVINOProviderFactory>(params.device_type, params.enable_vpu_fast_compile,
-                                                     params.device_id, params.num_of_threads,
-                                                     params.cache_dir,
-                                                     params.context, params.enable_opencl_throttling,
-                                                     params.enable_dynamic_shapes);
+    auto& provider_options_map = *reinterpret_cast<const ProviderOptions*>(void_params);
+
+    const char* device_type = "";           // [device_type]: Overrides the accelerator hardware type and precision
+                                            //   with these values at runtime.
+    bool enable_vpu_fast_compile = false;   // [enable_vpu_fast_compile]: Fast-compile may be optionally enabled to
+                                            // speeds up the model's compilation to VPU device specific format.
+    const char* device_id = "";             // [device_id]: Selects a particular hardware device for inference.
+    size_t num_of_threads = 8;              // [num_of_threads]: Overrides the accelerator default value of number of
+                                            //  threads with this value at runtime.
+    const char* cache_dir = "";             // [cache_dir]: specify the path to
+                                            // dump and load the blobs for the model caching/kernel caching (GPU)
+                                            // feature. If blob files are already present, it will be directly loaded.
+    int num_streams = 1;                    // [num_streams]: Option that specifies the number of parallel inference
+                                            // requests to be processed on a given `device_type`. Overrides the
+                                            // accelerator default value of number of streams with this value at runtime.
+    bool enable_opencl_throttling = false;  // [enable_opencl_throttling]: Enables OpenCL queue throttling for GPU
+                                            // device (Reduces CPU Utilization when using GPU)
+    bool enable_dynamic_shapes = false;     // [enable_dynamic_shapes]: Enables Dynamic Shapes feature for CPU device)
+    void* context = nullptr;
+
+    if (provider_options_map.find("device_type") != provider_options_map.end()) {
+      device_type = provider_options_map.at("device_type").c_str();
+    }
+    if (provider_options_map.find("device_id") != provider_options_map.end()) {
+      device_id = provider_options_map.at("device_id").c_str();
+    }
+    if (provider_options_map.find("cache_dir") != provider_options_map.end()) {
+      cache_dir = provider_options_map.at("cache_dir").c_str();
+    }
+    if (provider_options_map.find("context") != provider_options_map.end()) {
+      context = (void*)provider_options_map.at("context").c_str();
+    }
+
+    if (provider_options_map.find("num_of_threads") != provider_options_map.end()) {
+      num_of_threads = std::stoi(provider_options_map.at("num_of_threads"));
+    }
+
+    if (provider_options_map.find("num_streams") != provider_options_map.end()) {
+      num_streams = std::stoi(provider_options_map.at("num_streams"));
+    }
+    std::string bool_flag = "";
+    if (provider_options_map.find("enable_vpu_fast_compile") != provider_options_map.end()) {
+      bool_flag = provider_options_map.at("enable_vpu_fast_compile");
+      if (bool_flag == "true" || bool_flag == "True")
+        enable_vpu_fast_compile = true;
+      else if (bool_flag == "false" || bool_flag == "False")
+        enable_vpu_fast_compile = false;
+      bool_flag = "";
+    }
+
+    if (provider_options_map.find("enable_opencl_throttling") != provider_options_map.end()) {
+      bool_flag = provider_options_map.at("enable_opencl_throttling");
+      if (bool_flag == "true" || bool_flag == "True")
+        enable_opencl_throttling = true;
+      else if (bool_flag == "false" || bool_flag == "False")
+        enable_opencl_throttling = false;
+      bool_flag = "";
+    }
+
+    if (provider_options_map.find("enable_dynamic_shapes") != provider_options_map.end()) {
+      bool_flag = provider_options_map.at("enable_dynamic_shapes");
+      if (bool_flag == "true" || bool_flag == "True")
+        enable_dynamic_shapes = true;
+      else if (bool_flag == "false" || bool_flag == "False")
+        enable_dynamic_shapes = false;
+    }
+    return std::make_shared<OpenVINOProviderFactory>(device_type,
+                                                     enable_vpu_fast_compile,
+                                                     device_id,
+                                                     num_of_threads,
+                                                     cache_dir,
+                                                     num_streams,
+                                                     context,
+                                                     enable_opencl_throttling,
+                                                     enable_dynamic_shapes);
  }

  void Initialize() override {
--- a/onnxruntime/core/providers/openvino/openvino_provider_factory_creator.h
+++ b/onnxruntime/core/providers/openvino/openvino_provider_factory_creator.h
@ -6,12 +6,14 @@
 #include <memory>

 #include "core/providers/providers.h"
+#include "core/framework/provider_options.h"

 struct OrtOpenVINOProviderOptions;

 namespace onnxruntime {
 // defined in provider_bridge_ort.cc
 struct OpenVINOProviderFactoryCreator {
+  static std::shared_ptr<IExecutionProviderFactory> Create(const ProviderOptions* provider_options_map);
  static std::shared_ptr<IExecutionProviderFactory> Create(const OrtOpenVINOProviderOptions* provider_options);
 };
 }  // namespace onnxruntime
--- a/onnxruntime/core/providers/openvino/ov_interface.cc
+++ b/onnxruntime/core/providers/openvino/ov_interface.cc
@ -42,7 +42,7 @@ OVExeNetwork OVCore::LoadNetwork(std::shared_ptr<OVNetwork>& ie_cnn_network, std
  }
 }

-#if defined(OPENVINO_2023_0)
+#if defined(OPENVINO_2023_0) || (OPENVINO_2023_1)
 OVExeNetwork OVCore::LoadNetwork(const std::string& model, std::string& hw_target, ov::AnyMap& device_config, std::string name) {
  ov::CompiledModel obj;
  try {
@ -75,8 +75,12 @@ OVExeNetwork OVCore::LoadNetwork(std::shared_ptr<OVNetwork>& model, OVRemoteCont
 #endif

 std::vector<std::string> OVCore::GetAvailableDevices() {
-  auto obj = oe.get_available_devices();
-  return obj;
+  auto available_devices = oe.get_available_devices();
+  return available_devices;
+}
+
+void OVCore::SetStreams(const std::string& device_type, int num_streams) {
+  oe.set_property(device_type, {ov::num_streams(num_streams)});
 }

 OVInferRequest OVExeNetwork::CreateInferRequest() {
--- a/onnxruntime/core/providers/openvino/ov_interface.h
+++ b/onnxruntime/core/providers/openvino/ov_interface.h
@ -5,11 +5,12 @@

 #include <vector>

-#include <inference_engine.hpp>
-#if defined(OPENVINO_2022_1) || (OPENVINO_2022_2) || (OPENVINO_2022_3) || (OPENVINO_2023_0)
+#if defined(OPENVINO_2022_1) || (OPENVINO_2022_2) || (OPENVINO_2022_3) || (OPENVINO_2023_0) || (OPENVINO_2023_1)
 #define OV_API_20
 #include "openvino/openvino.hpp"
 #include "openvino/pass/convert_fp32_to_fp16.hpp"
+#else
+#include <inference_engine.hpp>
 #endif

 #ifdef IO_BUFFER_ENABLED
@ -26,10 +27,8 @@ class OVCore;
 class OVInferRequest;
 class OVExeNetwork;

-typedef InferenceEngine::Precision OVPrecision;
 typedef ov::Tensor OVTensor;
 typedef ov::ProfilingInfo OVProfilingInfo;
-typedef ov::AnyMap OVConfig;
 typedef ov::Model OVNetwork;
 typedef std::shared_ptr<OVInferRequest> OVInferRequestPtr;
 typedef std::shared_ptr<OVTensor> OVTensorPtr;
@ -45,7 +44,7 @@ class OVCore {
 public:
  std::shared_ptr<OVNetwork> ReadModel(const std::string& model_stream) const;
  OVExeNetwork LoadNetwork(std::shared_ptr<OVNetwork>& ie_cnn_network, std::string& hw_target, ov::AnyMap& device_config, std::string name);
-#if defined(OPENVINO_2023_0)
+#if defined(OPENVINO_2023_0) || (OPENVINO_2023_1)
  OVExeNetwork LoadNetwork(const std::string& model_stream, std::string& hw_target, ov::AnyMap& device_config, std::string name);
 #endif
  void SetCache(std::string cache_dir_path);
@ -56,6 +55,7 @@ class OVCore {
  ov::Core& Get() {
    return oe;
  }
+  void SetStreams(const std::string& device_type, int num_streams);
 };

 class OVExeNetwork {
--- a/onnxruntime/core/providers/openvino/ov_versions/capability.cc
+++ b/onnxruntime/core/providers/openvino/ov_versions/capability.cc
@ -33,8 +33,10 @@ GetCapability::GetCapability(const GraphViewer& graph_viewer_param, std::string
    data_ops_ = new DataOps(graph_viewer_, V_2022_3, device_type_);
  } else if (version_param == "V_2023_0") {
    data_ops_ = new DataOps(graph_viewer_, V_2023_0, device_type_);
+  } else if (version_param == "V_2023_1") {
+    data_ops_ = new DataOps(graph_viewer_, V_2023_1, device_type_);
  } else {
-    data_ops_ = new DataOps(graph_viewer_, V_2023_0, device_type_);
+    data_ops_ = new DataOps(graph_viewer_, V_2023_1, device_type_);
  }
 }

@ -46,6 +48,11 @@ std::vector<std::unique_ptr<ComputeCapability>> GetCapability::Execute() {
    return result;
  }

+  // Check if it is a subgraph
+  if (graph_viewer_.IsSubgraph() && graph_viewer_.Name() == "tf2onnx") {
+    return result;
+  }
+
  // This is a list of initializers that nGraph considers as constants. Example weights, reshape shape etc.
  std::unordered_set<std::string> ng_required_initializers;

--- a/onnxruntime/core/providers/openvino/ov_versions/data_ops.cc
+++ b/onnxruntime/core/providers/openvino/ov_versions/data_ops.cc
@ -17,8 +17,8 @@
 #pragma GCC diagnostic push
 #pragma GCC diagnostic ignored "-Wunused-parameter"
 #endif
-#include <ngraph/ngraph.hpp>
-#include <ngraph/frontend/onnx_import/onnx.hpp>
+// #include <ngraph/ngraph.hpp>
+// #include <ngraph/frontend/onnx_import/onnx.hpp>
 #if defined(_MSC_VER)
 #pragma warning(default : 4244 4245)
 #elif __GNUC__
@ -36,6 +36,7 @@ std::set<std::string> ops_supported_only_in_model = {
    "ConstantOfShape",
    "DequantizeLinear",
    "Dropout",
+    "Einsum",
    "Exp",
    "Expand",
    "EyeLike",
@ -127,6 +128,7 @@ std::vector<SupportedOp> supported_op_mode = {
    {"Dropout", V_2023_0, {"VPUX"}},
    {"Elu", V_2020_4, {"CPU", "GPU"}},
    {"Elu", V_2023_0, {"VPUX"}},
+    // {"Einsum", V_2023_0, {"CPU", "GPU"}},
    {"Equal", V_2020_4, {"CPU", "GPU"}},
    {"Equal", V_2023_0, {"VPUX"}},  // Added for whisper decoder model.
    {"Erf", V_2020_4, {"CPU", "GPU"}},
@ -155,6 +157,7 @@ std::vector<SupportedOp> supported_op_mode = {
    {"GreaterOrEqual", V_2022_1, {"CPU", "GPU"}},
    {"GreaterOrEqual", V_2023_0, {"VPUX"}},
    {"GridSample", V_2022_3, {"CPU"}},
+    {"GridSample", V_2023_0, {"GPU"}},
    {"Identity", V_2020_4, {"CPU", "GPU"}},
    {"Identity", V_2023_0, {"VPUX"}},  // NoOP
    {"If", V_2022_3, {"CPU", "GPU"}},
@ -196,6 +199,7 @@ std::vector<SupportedOp> supported_op_mode = {
    {"Neg", V_2023_0, {"VPUX"}},
    {"NonMaxSuppression", V_2021_1, {"CPU", "GPU"}},
    {"NonZero", V_2021_1, {"CPU"}},
+    {"NonZero", V_2023_0, {"GPU"}},
    {"Not", V_2021_1, {"CPU", "GPU"}},
    {"Not", V_2020_4, {"CPU", "GPU"}},
    {"OneHot", V_2020_4, {"CPU", "GPU"}},
@ -210,6 +214,7 @@ std::vector<SupportedOp> supported_op_mode = {
    {"QuantizeLinear", V_2021_4, {"CPU", "GPU"}},
    {"QuantizeLinear", V_2023_0, {"VPUX"}},
    {"RandomNormalLike", V_2023_0, {"CPU", "GPU"}},
+    {"RandomNormal", V_2023_0, {"CPU", "GPU"}},
    {"Range", V_2022_1, {"CPU", "GPU"}},
    {"Range", V_2023_0, {"VPUX"}},
    {"Reciprocal", V_2020_4, {"CPU", "GPU"}},
@ -341,6 +346,7 @@ void DataOps::populate_op_mode_supported() {
  no_dimension_supported_.push_back({"Div", V_2020_4, {"All"}});
  no_dimension_supported_.push_back({"DequantizeLinear", V_2021_4, {"All"}});
  no_dimension_supported_.push_back({"Equal", V_2022_1, {"CPU"}});
+  no_dimension_supported_.push_back({"Equal", V_2023_0, {"GPU"}});
  no_dimension_supported_.push_back({"Floor", V_2020_4, {"All"}});
  no_dimension_supported_.push_back({"Gather", V_2020_4, {"All"}});
  no_dimension_supported_.push_back({"Greater", V_2023_0, {"VPUX"}});
@ -356,6 +362,7 @@ void DataOps::populate_op_mode_supported() {
  no_dimension_supported_.push_back({"ReduceProd", V_2022_1, {"CPU", "GPU"}});
  no_dimension_supported_.push_back({"Reshape", V_2022_1, {"All"}});
  no_dimension_supported_.push_back({"Shape", V_2022_1, {"GPU"}});
+  no_dimension_supported_.push_back({"Shape", V_2023_0, {"CPU"}});
  no_dimension_supported_.push_back({"Squeeze", V_2020_4, {"All"}});
  no_dimension_supported_.push_back({"Sub", V_2020_4, {"All"}});
  no_dimension_supported_.push_back({"Unsqueeze", V_2020_4, {"All"}});
@ -1022,8 +1029,10 @@ bool DataOps::node_is_supported(const std::map<std::string, std::set<std::string
        // Zero dimension check
        for (const auto& dim : shape->dim()) {
          if (utils::HasDimValue(dim) && dim.dim_value() == 0) {
-            if ((device_id_.find("GPU") != std::string::npos) && ((optype == "Expand") ||
-                                                                  (optype == "Slice") || (optype == "Concat") || (optype == "Shape"))) {
+            if (((device_id_.find("CPU") != std::string::npos) || (device_id_.find("GPU") != std::string::npos)) &&
+                ((optype == "Expand") || (optype == "Equal") ||
+                 (optype == "Slice") || (optype == "Concat") ||
+                 (optype == "Shape"))) {
              return;
            }
            has_unsupported_dimension = true;
--- a/onnxruntime/core/providers/openvino/ov_versions/data_ops.h
+++ b/onnxruntime/core/providers/openvino/ov_versions/data_ops.h
@ -18,7 +18,8 @@ enum versionNum {
  V_2022_1,
  V_2022_2,
  V_2022_3,
-  V_2023_0
+  V_2023_0,
+  V_2023_1,
 };

 using VersionNum = enum versionNum;
--- a/onnxruntime/core/providers/openvino/ov_versions/utils.cc
+++ b/onnxruntime/core/providers/openvino/ov_versions/utils.cc
@ -9,8 +9,15 @@
 #pragma GCC diagnostic push
 #pragma GCC diagnostic ignored "-Wunused-parameter"
 #endif
-#include <ngraph/ngraph.hpp>
+
+#include "openvino/core/deprecated.hpp"
+#define IN_OV_COMPONENT
+#define NGRAPH_LEGACY_HEADER_INCLUDED
 #include <ngraph/frontend/onnx_import/onnx.hpp>
+
+#undef NGRAPH_LEGACY_HEADER_INCLUDED
+#undef IN_OV_COMPONENT
+
 #if defined(_MSC_VER)
 #pragma warning(default : 4244 4245)
 #elif __GNUC__
@ -40,6 +47,7 @@ bool IsOpSupportedOnlyInModel(std::string name) {
      "Concat",
      "ConstantOfShape",
      "Dropout",
+      "Einsum",
      "Expand",
      "EyeLike",
      "Exp",
@ -88,6 +96,7 @@ int GetOnnxOpSet(const GraphViewer& graph_viewer) {

 std::map<std::string, std::set<std::string>> GetNgSupportedOps(const int onnx_opset) {
  std::map<std::string, std::set<std::string>> ng_supported_ops;
+  OPENVINO_SUPPRESS_DEPRECATED_START
  ng_supported_ops.emplace(kOnnxDomain, ngraph::onnx_import::get_supported_operators(onnx_opset, kOnnxDomain));

  const std::set<std::string> ng_disabled_ops = {"LSTM"};  // Place-holder for ops not supported.
@ -95,7 +104,7 @@ std::map<std::string, std::set<std::string>> GetNgSupportedOps(const int onnx_op
  for (const auto& disabled_op : ng_disabled_ops) {
    ng_supported_ops.at(kOnnxDomain).erase(disabled_op);
  }
-
+  OPENVINO_SUPPRESS_DEPRECATED_END
  return ng_supported_ops;
 }

--- a/onnxruntime/core/session/provider_bridge_ort.cc
+++ b/onnxruntime/core/session/provider_bridge_ort.cc
@ -1426,8 +1426,44 @@ std::shared_ptr<IExecutionProviderFactory> MIGraphXProviderFactoryCreator::Creat
  return s_library_migraphx.Get().CreateExecutionProviderFactory(provider_options);
 }

+// Adapter to convert the legacy OrtOpenVINOProviderOptions to ProviderOptions
+ProviderOptions OrtOpenVINOProviderOptionsToOrtOpenVINOProviderOptionsV2(const OrtOpenVINOProviderOptions* legacy_ov_options) {
+  ProviderOptions ov_options_converted_map;
+  if (legacy_ov_options->device_type != nullptr)
+    ov_options_converted_map["device_type"] = legacy_ov_options->device_type;
+
+  ov_options_converted_map["enable_vpu_fast_compile"] = legacy_ov_options->enable_vpu_fast_compile;
+
+  if (legacy_ov_options->device_id != nullptr)
+    ov_options_converted_map["device_id"] = legacy_ov_options->device_id;
+
+  ov_options_converted_map["num_of_threads"] = std::to_string(legacy_ov_options->num_of_threads);
+
+  if (legacy_ov_options->cache_dir != nullptr)
+    ov_options_converted_map["cache_dir"] = legacy_ov_options->cache_dir;
+
+  std::stringstream context_string;
+
+  if (legacy_ov_options->context != nullptr)
+    context_string << legacy_ov_options->context;
+  ov_options_converted_map["context"] = context_string.str();
+
+  ov_options_converted_map["enable_opencl_throttling"] = legacy_ov_options->enable_opencl_throttling;
+  ov_options_converted_map["enable_dynamic_shapes"] = legacy_ov_options->enable_dynamic_shapes;
+
+  // Add new provider option below
+  ov_options_converted_map["num_streams"] = "1";
+  return ov_options_converted_map;
+}
+
 std::shared_ptr<IExecutionProviderFactory> OpenVINOProviderFactoryCreator::Create(const OrtOpenVINOProviderOptions* provider_options) {
-  return s_library_openvino.Get().CreateExecutionProviderFactory(provider_options);
+  ProviderOptions ov_options_converted_map = onnxruntime::OrtOpenVINOProviderOptionsToOrtOpenVINOProviderOptionsV2(provider_options);
+  return s_library_openvino.Get().CreateExecutionProviderFactory(&ov_options_converted_map);
+}
+
+std::shared_ptr<IExecutionProviderFactory> OpenVINOProviderFactoryCreator::Create(const ProviderOptions* provider_options_map) {
+  // std::cout << provider_options_map.at("num_streams") << std::endl;
+  return s_library_openvino.Get().CreateExecutionProviderFactory(provider_options_map);
 }

 std::shared_ptr<IExecutionProviderFactory> DnnlProviderFactoryCreator::Create(const OrtDnnlProviderOptions* dnnl_options) {
--- a/onnxruntime/core/session/provider_registration.cc
+++ b/onnxruntime/core/session/provider_registration.cc
@ -10,6 +10,7 @@
 #include "core/session/abi_session_options_impl.h"
 #include "core/session/onnxruntime_c_api.h"
 #include "core/session/ort_apis.h"
+#include "core/providers/openvino/openvino_provider_factory_creator.h"

 using namespace onnxruntime;

@ -71,6 +72,12 @@ ORT_API_STATUS_IMPL(OrtApis::SessionOptionsAppendExecutionProvider,
    options->provider_factories.push_back(QNNProviderFactoryCreator::Create(provider_options, &(options->value)));
 #else
    status = create_not_supported_status();
+#endif
+  } else if (strcmp(provider_name, "OpenVINO") == 0) {
+#if defined(USE_OPENVINO)
+    options->provider_factories.push_back(OpenVINOProviderFactoryCreator::Create(&provider_options));
+#else
+    status = create_not_supported_status();
 #endif
  } else if (strcmp(provider_name, "SNPE") == 0) {
 #if defined(USE_SNPE)
@ -115,7 +122,7 @@ ORT_API_STATUS_IMPL(OrtApis::SessionOptionsAppendExecutionProvider,
  } else {
    ORT_UNUSED_PARAMETER(options);
    status = OrtApis::CreateStatus(ORT_INVALID_ARGUMENT,
-                                   "Unknown provider name. Currently supported values are 'SNPE', 'XNNPACK', and 'AZURE'");
+                                   "Unknown provider name. Currently supported values are 'OPENVINO', 'SNPE', 'XNNPACK', 'QNN', 'WEBNN' and 'AZURE'");
  }

  return status;
--- a/onnxruntime/python/onnxruntime_pybind_schema.cc
+++ b/onnxruntime/python/onnxruntime_pybind_schema.cc
@ -39,8 +39,8 @@ void addGlobalSchemaFunctions(pybind11::module& m) {
 #endif
 #ifdef USE_OPENVINO
            []() {
-              OrtOpenVINOProviderOptions provider_options;
-              return onnxruntime::OpenVINOProviderFactoryCreator::Create(&provider_options);
+              ProviderOptions provider_options_map;
+              return onnxruntime::OpenVINOProviderFactoryCreator::Create(&provider_options_map);
            }(),
 #endif
 #ifdef USE_TENSORRT
--- a/onnxruntime/python/onnxruntime_pybind_state.cc
+++ b/onnxruntime/python/onnxruntime_pybind_state.cc
@ -780,56 +780,53 @@ std::unique_ptr<IExecutionProvider> CreateExecutionProviderInstance(
 #endif
  } else if (type == kOpenVINOExecutionProvider) {
 #ifdef USE_OPENVINO
-    OrtOpenVINOProviderOptions params;
-    params.device_type = openvino_device_type.c_str();
-    std::string cache_dir;
-
+    ProviderOptions OV_provider_options_map;
    auto it = provider_options_map.find(type);
    if (it != provider_options_map.end()) {
      for (auto option : it->second) {
        if (option.first == "device_type") {
-          openvino_device_type = option.second;
-          params.device_type = openvino_device_type.c_str();
+          OV_provider_options_map[option.first] = option.second;
+          continue;
        } else if (option.first == "enable_vpu_fast_compile") {
-          if (option.second == "True") {
-            params.enable_vpu_fast_compile = true;
-          } else if (option.second == "False") {
-            params.enable_vpu_fast_compile = false;
-          } else {
+          if (!(option.second == "True" || option.second == "true" ||
+                option.second == "False" || option.second == "false")) {
            ORT_THROW("Invalid value passed for enable_vpu_fast_compile: ", option.second);
          }
-
+          OV_provider_options_map[option.first] = option.second;
        } else if (option.first == "enable_opencl_throttling") {
-          if (option.second == "True") {
-            params.enable_opencl_throttling = true;
-          } else if (option.second == "False") {
-            params.enable_opencl_throttling = false;
-          } else {
+          if (!(option.second == "True" || option.second == "true" ||
+                option.second == "False" || option.second == "false")) {
            ORT_THROW("Invalid value passed for enable_opencl_throttling: ", option.second);
          }
+          OV_provider_options_map[option.first] = option.second;
        } else if (option.first == "enable_dynamic_shapes") {
-          if (option.second == "True") {
-            params.enable_dynamic_shapes = true;
-          } else if (option.second == "False") {
-            params.enable_dynamic_shapes = false;
-          } else {
+          if (!(option.second == "True" || option.second == "true" ||
+                option.second == "False" || option.second == "false")) {
            ORT_THROW("Invalid value passed for enable_dynamic_shapes: ", option.second);
          }
+          OV_provider_options_map[option.first] = option.second;
        } else if (option.first == "device_id") {
-          params.device_id = option.second.c_str();
+          OV_provider_options_map[option.first] = option.second;
+          continue;
        } else if (option.first == "num_of_threads") {
-          params.num_of_threads = std::stoi(option.second);
+          OV_provider_options_map[option.first] = option.second;
+          continue;
+        } else if (option.first == "num_streams") {
+          OV_provider_options_map[option.first] = option.second;
+          continue;
        } else if (option.first == "cache_dir") {
-          cache_dir = option.second;
-          params.cache_dir = cache_dir.c_str();
+          OV_provider_options_map[option.first] = option.second;
+          continue;
        } else if (option.first == "context") {
-          params.context = (void*)(option.second.c_str());
+          OV_provider_options_map[option.first] = option.second;
+          continue;
        } else {
          ORT_THROW("Invalid OpenVINO EP option: ", option.first);
        }
      }
    }
-    if (std::shared_ptr<IExecutionProviderFactory> openvino_provider_factory = onnxruntime::OpenVINOProviderFactoryCreator::Create(&params)) {
+    if (std::shared_ptr<IExecutionProviderFactory> openvino_provider_factory = onnxruntime::OpenVINOProviderFactoryCreator::Create(
+            &OV_provider_options_map)) {
      auto p = openvino_provider_factory->CreateProvider();
      // Reset global variables config to avoid it being accidentally passed on to the next session
      openvino_device_type.clear();
--- a/onnxruntime/python/onnxruntime_pybind_state_common.h
+++ b/onnxruntime/python/onnxruntime_pybind_state_common.h
@ -440,7 +440,6 @@ std::shared_ptr<IExecutionProviderFactory> CreateExecutionProviderFactory_MIGrap
 std::shared_ptr<IExecutionProviderFactory> CreateExecutionProviderFactory_MIGraphX(int device_id);
 std::shared_ptr<IExecutionProviderFactory> CreateExecutionProviderFactory_Cuda(const OrtCUDAProviderOptions* params);
 std::shared_ptr<IExecutionProviderFactory> CreateExecutionProviderFactory_Dnnl(const OrtDnnlProviderOptions* params);
-std::shared_ptr<IExecutionProviderFactory> CreateExecutionProviderFactory_OpenVINO(const OrtOpenVINOProviderOptions* params);
 #ifdef USE_TVM
 std::shared_ptr<IExecutionProviderFactory> CreateExecutionProviderFactory_Tvm(const tvm::TvmEPOptions& info);
 std::shared_ptr<IExecutionProviderFactory> CreateExecutionProviderFactory_Tvm(const char* params);
--- a/onnxruntime/test/perftest/ort_test_session.cc
+++ b/onnxruntime/test/perftest/ort_test_session.cc
@ -423,24 +423,12 @@ OnnxRuntimeTestSession::OnnxRuntimeTestSession(Ort::Env& env, std::random_device
 #endif
  } else if (provider_name == onnxruntime::kOpenVINOExecutionProvider) {
 #ifdef USE_OPENVINO
-    std::string device_type = "";           // [device_type]: Overrides the accelerator hardware type and precision
-                                            //   with these values at runtime.
-    bool enable_vpu_fast_compile = false;   // [enable_vpu_fast_compile]: Fast-compile may be optionally enabled to
-                                            // speeds up the model's compilation to VPU device specific format.
-    std::string device_id = "";             // [device_id]: Selects a particular hardware device for inference.
-    size_t num_of_threads = 8;              // [num_of_threads]: Overrides the accelerator default value of number of
-                                            //  threads with this value at runtime.
-    std::string cache_dir = "";             // [cache_dir]: specify the path to
-                                            // dump and load the blobs for the model caching/kernel caching (GPU)
-                                            // feature. If blob files are already present, it will be directly loaded.
-    bool enable_opencl_throttling = false;  // [enable_opencl_throttling]: Enables OpenCL queue throttling for GPU
-                                            // device (Reduces CPU Utilization when using GPU)
-    bool enable_dynamic_shapes = false;     // [enable_dynamic_shapes]: Enables Dynamic Shapes feature for CPU device)
 #ifdef _MSC_VER
    std::string ov_string = ToUTF8String(performance_test_config.run_config.ep_runtime_config_string);
 #else
    std::string ov_string = performance_test_config.run_config.ep_runtime_config_string;
 #endif
+    std::unordered_map<std::string, std::string> ov_options;
    std::istringstream ss(ov_string);
    std::string token;
    while (ss >> token) {
@ -461,69 +449,64 @@ OnnxRuntimeTestSession::OnnxRuntimeTestSession(Ort::Env& env, std::random_device
                                                           "GPU.0_FP16", "GPU.1_FP16",
                                                           "VPUX_FP16", "VPUX_U8"};
        if (ov_supported_device_types.find(value) != ov_supported_device_types.end()) {
-          device_type = value;
+          ov_options[key] = value;
        } else if (value.find("HETERO:") == 0) {
-          device_type = value;
+          ov_options[key] = value;
        } else if (value.find("MULTI:") == 0) {
-          device_type = value;
+          ov_options[key] = value;
        } else if (value.find("AUTO:") == 0) {
-          device_type = value;
+          ov_options[key] = value;
        } else {
          ORT_THROW(
              "[ERROR] [OpenVINO] You have selcted wrong configuration value for the key 'device_type'. "
              "Select from 'CPU_FP32', 'CPU_FP16', 'GPU_FP32', 'GPU.0_FP32', 'GPU.1_FP32', 'GPU_FP16', "
-              "'GPU.0_FP16', 'GPU.1_FP16', 'VPUX_FP16', 'VPUX_U8', or from"
+              "'GPU.0_FP16', 'GPU.1_FP16', 'VPUX_FP16', 'VPUX_U8' or from"
              " HETERO/MULTI/AUTO options available. \n");
        }
      } else if (key == "device_id") {
-        device_id = value;
+        ov_options[key] = value;
      } else if (key == "enable_vpu_fast_compile") {
-        if (value == "true" || value == "True") {
-          enable_vpu_fast_compile = true;
-        } else if (value == "false" || value == "False") {
-          enable_vpu_fast_compile = false;
+        if (value == "true" || value == "True" ||
+            value == "false" || value == "False") {
+          ov_options[key] = value;
        } else {
          ORT_THROW("[ERROR] [OpenVINO] The value for the key 'enable_vpu_fast_compile' should be a boolean i.e. true or false. Default value is false.\n");
        }
      } else if (key == "enable_opencl_throttling") {
-        if (value == "true" || value == "True") {
-          enable_opencl_throttling = true;
-        } else if (value == "false" || value == "False") {
-          enable_opencl_throttling = false;
+        if (value == "true" || value == "True" ||
+            value == "false" || value == "False") {
+          ov_options[key] = value;
        } else {
          ORT_THROW("[ERROR] [OpenVINO] The value for the key 'enable_opencl_throttling' should be a boolean i.e. true or false. Default value is false.\n");
        }
      } else if (key == "enable_dynamic_shapes") {
-        if (value == "true" || value == "True") {
-          enable_dynamic_shapes = true;
-        } else if (value == "false" || value == "False") {
-          enable_dynamic_shapes = false;
+        if (value == "true" || value == "True" ||
+            value == "false" || value == "False") {
+          ov_options[key] = value;
        } else {
          ORT_THROW(
              "[ERROR] [OpenVINO] The value for the key 'enable_dynamic_shapes' "
              "should be a boolean i.e. true or false. Default value is false.\n");
        }
      } else if (key == "num_of_threads") {
-        std::stringstream sstream(value);
-        sstream >> num_of_threads;
-        if ((int)num_of_threads <= 0) {
+        if (std::stoi(value) <= 0) {
          ORT_THROW("[ERROR] [OpenVINO] The value for the key 'num_of_threads' should be greater than 0\n");
+        } else {
+          ov_options[key] = value;
        }
      } else if (key == "cache_dir") {
-        cache_dir = value;
+        ov_options[key] = value;
+      } else if (key == "num_streams") {
+        if (std::stoi(value) <= 0 && std::stoi(value) > 8) {
+          ORT_THROW("[ERROR] [OpenVINO] The value for the key 'num_streams' should be in the range of 1-8 \n");
+        } else {
+          ov_options[key] = value;
+        }
      } else {
-        ORT_THROW("[ERROR] [OpenVINO] wrong key type entered. Choose from the following runtime key options that are available for OpenVINO. ['device_type', 'device_id', 'enable_vpu_fast_compile', 'num_of_threads', 'cache_dir', 'enable_opencl_throttling|true'] \n");
+        ORT_THROW("[ERROR] [OpenVINO] wrong key type entered. Choose from the following runtime key options that are available for OpenVINO. ['device_type', 'device_id', 'enable_vpu_fast_compile', 'num_of_threads', 'cache_dir', 'num_streams', 'enable_opencl_throttling|true'] \n");
      }
    }
-    OrtOpenVINOProviderOptions options;
-    options.device_type = device_type.c_str();                    // To set the device_type
-    options.device_id = device_id.c_str();                        // To set the device_id
-    options.enable_vpu_fast_compile = enable_vpu_fast_compile;    // To enable_vpu_fast_compile, default is false
-    options.num_of_threads = num_of_threads;                      // To set number of free InferRequests, default is 8
-    options.cache_dir = cache_dir.c_str();                        // sets the cache_dir, default is ""
-    options.enable_opencl_throttling = enable_opencl_throttling;  // Enables GPU Throttling (Reduces CPU Utilization)
-    options.enable_dynamic_shapes = enable_dynamic_shapes;        // Enables Dynamic Shapes feature
-    session_options.AppendExecutionProvider_OpenVINO(options);
+    session_options.AppendExecutionProvider("OpenVINO", ov_options);
 #else
    ORT_THROW("OpenVINO is not supported in this build\n");
 #endif
@ -579,7 +562,7 @@ OnnxRuntimeTestSession::OnnxRuntimeTestSession(Ort::Env& env, std::random_device
          ORT_THROW("Supported htp_performance_mode: " + str);
        }
      } else {
-        ORT_THROW(R"(Wrong key type entered. Choose from options: ['backend_path', 'qnn_context_cache_enable', 
+        ORT_THROW(R"(Wrong key type entered. Choose from options: ['backend_path', 'qnn_context_cache_enable',
 'qnn_context_cache_path', 'profiling_level', 'rpc_control_latency', 'htp_performance_mode'])");
      }

--- a/onnxruntime/test/providers/cpu/math/einsum_test.cc
+++ b/onnxruntime/test/providers/cpu/math/einsum_test.cc
@ -22,7 +22,7 @@ TEST(Einsum, ExplicitEinsumAsIdentity_1D_input) {
  test.AddAttribute<std::string>("equation", "i->i");
  test.AddInput<float>("x", {5}, {0.9f, 2.5f, 2.3f, 1.5f, -4.5f});
  test.AddOutput<float>("y", {5}, {0.9f, 2.5f, 2.3f, 1.5f, -4.5f});
-  test.Run();
+  test.Run(OpTester::ExpectResult::kExpectSuccess, "", {kOpenVINOExecutionProvider});
 }

 // Implicit
@ -31,7 +31,7 @@ TEST(Einsum, ImplicitEinsumAsIdentity_1D_input) {
  test.AddAttribute<std::string>("equation", "i");
  test.AddInput<float>("x", {5}, {0.9f, 2.5f, 2.3f, 1.5f, -4.5f});
  test.AddOutput<float>("y", {5}, {0.9f, 2.5f, 2.3f, 1.5f, -4.5f});
-  test.Run();
+  test.Run(OpTester::ExpectResult::kExpectSuccess, "", {kOpenVINOExecutionProvider});
 }

 // Theme: Transpose/Permutation
--- a/onnxruntime/test/util/default_providers.cc
+++ b/onnxruntime/test/util/default_providers.cc
@ -86,10 +86,19 @@ std::unique_ptr<IExecutionProvider> MIGraphXExecutionProviderWithOptions(const O
  return nullptr;
 }

+std::unique_ptr<IExecutionProvider> OpenVINOExecutionProviderWithOptions(const OrtOpenVINOProviderOptions* params) {
+#ifdef USE_OPENVINO
+  return OpenVINOProviderFactoryCreator::Create(params)->CreateProvider();
+#else
+  ORT_UNUSED_PARAMETER(params);
+#endif
+  return nullptr;
+}
+
 std::unique_ptr<IExecutionProvider> DefaultOpenVINOExecutionProvider() {
 #ifdef USE_OPENVINO
-  OrtOpenVINOProviderOptions params;
-  return OpenVINOProviderFactoryCreator::Create(&params)->CreateProvider();
+  ProviderOptions provider_options_map;
+  return OpenVINOProviderFactoryCreator::Create(&provider_options_map)->CreateProvider();
 #else
  return nullptr;
 #endif
--- a/onnxruntime/test/util/include/default_providers.h
+++ b/onnxruntime/test/util/include/default_providers.h
@ -18,9 +18,6 @@ std::shared_ptr<IExecutionProviderFactory> CreateExecutionProviderFactory_MIGrap
 std::shared_ptr<IExecutionProviderFactory> CreateExecutionProviderFactory_Nnapi(
    uint32_t flags, const optional<std::string>& partitioning_stop_ops_list);
 // std::shared_ptr<IExecutionProviderFactory> CreateExecutionProviderFactory_Tvm(const char*);
-std::shared_ptr<IExecutionProviderFactory> CreateExecutionProviderFactory_OpenVINO(
-    const char* device_type, bool enable_vpu_fast_compile, const char* device_id, size_t num_of_threads, const char* cache_dir);
-std::shared_ptr<IExecutionProviderFactory> CreateExecutionProviderFactory_OpenVINO(const OrtOpenVINOProviderOptions* params);
 std::shared_ptr<IExecutionProviderFactory> CreateExecutionProviderFactory_Rknpu();
 std::shared_ptr<IExecutionProviderFactory> CreateExecutionProviderFactory_Rocm(const OrtROCMProviderOptions* provider_options);
 std::shared_ptr<IExecutionProviderFactory> CreateExecutionProviderFactory_Tensorrt(const OrtTensorRTProviderOptions* params);
@ -45,6 +42,7 @@ std::unique_ptr<IExecutionProvider> TensorrtExecutionProviderWithOptions(const O
 std::unique_ptr<IExecutionProvider> TensorrtExecutionProviderWithOptions(const OrtTensorRTProviderOptionsV2* params);
 std::unique_ptr<IExecutionProvider> DefaultMIGraphXExecutionProvider();
 std::unique_ptr<IExecutionProvider> MIGraphXExecutionProviderWithOptions(const OrtMIGraphXProviderOptions* params);
+std::unique_ptr<IExecutionProvider> OpenVINOExecutionProviderWithOptions(const OrtOpenVINOProviderOptions* params);
 std::unique_ptr<IExecutionProvider> DefaultOpenVINOExecutionProvider();
 std::unique_ptr<IExecutionProvider> DefaultNnapiExecutionProvider();
 std::unique_ptr<IExecutionProvider> DefaultRknpuExecutionProvider();