From 2c5d4dce773ef9cdfb7e344046aaba9940d2a6ae Mon Sep 17 00:00:00 2001
From: sfatimar <sahar.fatima@intel.com>
Date: Thu, 10 Aug 2023 00:20:10 +0530
Subject: [PATCH] Openvino ep ort 5.1 (#17042)

OpenVINO EP ORT 5.1 Branch
Changes for the new API to take in OpenVINO Provider Options
and compatibility with OV 2023.1


### Motivation and Context
The change is required for the new API to take in OpenVINO Provider
Options
and make it seamless.

---------

Signed-off-by: MaajidKhan <n.maajid.khan@intel.com>
Co-authored-by: saurabhintel0 <saurabh1.kale@intel.com>
Co-authored-by: MaajidKhan <n.maajid.khan@intel.com>
Co-authored-by: Suryaprakash Shanmugam <suryaprakash.shanmugam@intel.com>
Co-authored-by: Preetha Veeramalai <preetha.veeramalai@intel.com>
---
 cmake/CMakeLists.txt                          |   7 +-
 docs/python/ReadMeOV.rst                      |   6 +-
 .../providers/openvino/backend_manager.cc     |  33 +++---
 .../core/providers/openvino/backend_utils.cc  |  64 ++++-------
 .../core/providers/openvino/backend_utils.h   |  11 +-
 .../openvino/backends/basic_backend.cc        |  37 ++++---
 .../openvino/backends/basic_backend.h         |   5 +-
 .../core/providers/openvino/contexts.h        |   3 +-
 .../openvino/openvino_execution_provider.cc   |   5 +
 .../openvino/openvino_execution_provider.h    |   7 +-
 .../openvino/openvino_provider_factory.cc     | 101 ++++++++++++++----
 .../openvino_provider_factory_creator.h       |   2 +
 .../core/providers/openvino/ov_interface.cc   |  10 +-
 .../core/providers/openvino/ov_interface.h    |  10 +-
 .../openvino/ov_versions/capability.cc        |   9 +-
 .../openvino/ov_versions/data_ops.cc          |  17 ++-
 .../providers/openvino/ov_versions/data_ops.h |   3 +-
 .../providers/openvino/ov_versions/utils.cc   |  13 ++-
 .../core/session/provider_bridge_ort.cc       |  38 ++++++-
 .../core/session/provider_registration.cc     |   9 +-
 .../python/onnxruntime_pybind_schema.cc       |   4 +-
 .../python/onnxruntime_pybind_state.cc        |  53 +++++----
 .../python/onnxruntime_pybind_state_common.h  |   1 -
 onnxruntime/test/perftest/ort_test_session.cc |  75 +++++--------
 .../test/providers/cpu/math/einsum_test.cc    |   4 +-
 onnxruntime/test/util/default_providers.cc    |  13 ++-
 .../test/util/include/default_providers.h     |   4 +-
 27 files changed, 333 insertions(+), 211 deletions(-)

diff --git a/cmake/CMakeLists.txt b/cmake/CMakeLists.txt
index d33e8bd4f6..2f80e6ecec 100644
--- a/cmake/CMakeLists.txt
+++ b/cmake/CMakeLists.txt
@@ -1236,9 +1236,12 @@ if (onnxruntime_USE_OPENVINO)
   elseif ($ENV{INTEL_OPENVINO_DIR} MATCHES "2023.0")
     set(OPENVINO_VERSION "2023.0")
     add_definitions(-DOPENVINO_2023_0=1)
+  elseif ($ENV{INTEL_OPENVINO_DIR} MATCHES "2023.1")
+    set(OPENVINO_VERSION "2023.1")
+    add_definitions(-DOPENVINO_2023_1=1)
   elseif ($ENV{INTEL_OPENVINO_DIR} MATCHES "openvino")
-    set(OPENVINO_VERSION "2023.0")
-    add_definitions(-DOPENVINO_2023_0=1)
+    set(OPENVINO_VERSION "2023.1")
+    add_definitions(-DOPENVINO_2023_1=1)
   else()
     message(FATAL_ERROR "Unsupported OpenVINO version: ${INTEL_OPENVINO_DIR}")
   endif()
diff --git a/docs/python/ReadMeOV.rst b/docs/python/ReadMeOV.rst
index a19aa0e86d..f12c01d278 100644
--- a/docs/python/ReadMeOV.rst
+++ b/docs/python/ReadMeOV.rst
@@ -7,6 +7,7 @@ OpenVINO™ Execution Provider for ONNX Runtime accelerates inference across man
  - Intel® CPUs
  - Intel® integrated GPUs
  - Intel® discrete GPUs
+ - Intel® integrated VPUs
 
 Installation
 ------------
@@ -15,12 +16,13 @@ Requirements
 ^^^^^^^^^^^^
 
 - Ubuntu 18.04, 20.04, RHEL(CPU only) or Windows 10 - 64 bit
-- Python 3.8, 3.9 or 3.10 for Linux and only Python3.10 for Windows
+- Python 3.8 or 3.9 or 3.10 for Linux and only Python3.10 for Windows
 
 This package supports:
  - Intel® CPUs
  - Intel® integrated GPUs
  - Intel® discrete GPUs
+ - Intel® integrated VPUs
 
 ``pip3 install onnxruntime-openvino``
 
@@ -34,7 +36,7 @@ For more details on build and installation please refer to `Build <https://onnxr
 Usage
 ^^^^^
 
-By default, Intel® CPU is used to run inference. However, you can change the default option to either Intel® integrated or discrete GPU. 
+By default, Intel® CPU is used to run inference. However, you can change the default option to either Intel® integrated or discrete GPU.
 Invoke `the provider config device type argument <https://onnxruntime.ai/docs/execution-providers/OpenVINO-ExecutionProvider.html#summary-of-options>`_ to change the hardware on which inferencing is done.
 
 For more API calls and environment variables, see  `Usage <https://onnxruntime.ai/docs/execution-providers/OpenVINO-ExecutionProvider.html#configuration-options>`_.
diff --git a/onnxruntime/core/providers/openvino/backend_manager.cc b/onnxruntime/core/providers/openvino/backend_manager.cc
index 5969aaeb44..78467b646b 100644
--- a/onnxruntime/core/providers/openvino/backend_manager.cc
+++ b/onnxruntime/core/providers/openvino/backend_manager.cc
@@ -7,9 +7,6 @@
 #include <memory>
 
 #include "core/providers/shared_library/provider_api.h"
-
-#include <inference_engine.hpp>
-
 #include "contexts.h"
 #include "backend_manager.h"
 #include "ibackend.h"
@@ -36,11 +33,11 @@ BackendManager::BackendManager(const onnxruntime::Node& fused_node,
                                const logging::Logger& logger) {
   auto prec_str = GetGlobalContext().precision_str;
   if (prec_str == "FP32") {
-    subgraph_context_.precision = InferenceEngine::Precision::FP32;
+    subgraph_context_.precision = "FP32";
   } else if (prec_str == "FP16") {
-    subgraph_context_.precision = InferenceEngine::Precision::FP16;
+    subgraph_context_.precision = "FP16";
   } else if (prec_str == "U8") {
-    subgraph_context_.precision = InferenceEngine::Precision::U8;
+    subgraph_context_.precision = "U8";
   } else {
     throw std::string("Invalid OpenVINO Precision type: " + prec_str);
   }
@@ -78,19 +75,17 @@ BackendManager::BackendManager(const onnxruntime::Node& fused_node,
     LOGS_DEFAULT(INFO) << "[OpenVINO-EP] Model has symbolic input dims";
     if (GetGlobalContext().device_type.find("CPU") != std::string::npos ||
         GetGlobalContext().device_type.find("GPU") != std::string::npos) {
-      if (GetGlobalContext().enable_dynamic_shapes) {
-        LOGS_DEFAULT(INFO) << "[OpenVINO-EP] Starting backend initialization. "
-                           << "Creating backend Dynamic Shapes";
-        try {
-          concrete_backend_ = BackendFactory::MakeBackend(*model_proto_,
-                                                          GetGlobalContext(),
-                                                          subgraph_context_);
-        } catch (std::string const& msg) {
-          throw msg;
-        }
-        LOGS_DEFAULT(INFO) << "[OpenVINO-EP] "
-                           << "Backend created for graph " << subgraph_context_.subgraph_name;
+      LOGS_DEFAULT(INFO) << "[OpenVINO-EP] Starting backend initialization. "
+                         << "Creating backend Dynamic Shapes";
+      try {
+        concrete_backend_ = BackendFactory::MakeBackend(*model_proto_,
+                                                        GetGlobalContext(),
+                                                        subgraph_context_);
+      } catch (std::string const& msg) {
+        throw msg;
       }
+      LOGS_DEFAULT(INFO) << "[OpenVINO-EP] "
+                         << "Backend created for graph " << subgraph_context_.subgraph_name;
     }
   } else {
     LOGS_DEFAULT(INFO) << "[OpenVINO-EP] Model has concrete input dims. Initializing backend for graph " << subgraph_context_.subgraph_name;
@@ -257,7 +252,7 @@ void BackendManager::Compute(OrtKernelContext* context) {
   }
 #endif
   bool use_dynamic_backend = true;
-  if (GetGlobalContext().enable_dynamic_shapes && subgraph_context_.has_dynamic_input_shape &&
+  if (subgraph_context_.has_dynamic_input_shape &&
       (GetGlobalContext().device_type.find("CPU") != std::string::npos ||
        GetGlobalContext().device_type.find("GPU") != std::string::npos)) {
     concrete_backend_->Infer(context);
diff --git a/onnxruntime/core/providers/openvino/backend_utils.cc b/onnxruntime/core/providers/openvino/backend_utils.cc
index c5ebdb4131..d49968cdb7 100644
--- a/onnxruntime/core/providers/openvino/backend_utils.cc
+++ b/onnxruntime/core/providers/openvino/backend_utils.cc
@@ -8,8 +8,8 @@
 #include <fstream>
 
 #include "ov_interface.h"
-#include <ngraph/pass/convert_fp32_to_fp16.hpp>
-#include <ngraph/pass/constant_folding.hpp>
+#include "openvino/pass/convert_fp32_to_fp16.hpp"
+#include "openvino/pass/constant_folding.hpp"
 #include "core/providers/shared_library/provider_api.h"
 #include "backend_utils.h"
 
@@ -50,14 +50,14 @@ struct static_cast_int64 {
 std::shared_ptr<OVNetwork>
 CreateOVModel(const ONNX_NAMESPACE::ModelProto& model_proto, const GlobalContext& global_context,
               const SubGraphContext& subgraph_context,
-              std::map<std::string, std::shared_ptr<ngraph::Node>>& const_outputs_map) {
+              std::map<std::string, std::shared_ptr<ov::Node>>& const_outputs_map) {
   if (IsCILogEnabled()) {
     std::cout << "CreateNgraphFunc" << std::endl;
   }
   const std::string model = model_proto.SerializeAsString();
   try {
     auto cnn_network = global_context.ie_core.ReadModel(model);
-    if ((subgraph_context.precision == InferenceEngine::Precision::FP16) &&
+    if ((subgraph_context.precision == "FP16") &&
         (global_context.device_type.find("VPUX") == std::string::npos)) {
       // FP16 transformations
       ov::pass::ConvertFP32ToFP16 pass_obj;
@@ -88,7 +88,7 @@ CreateOVModel(const ONNX_NAMESPACE::ModelProto& model_proto, const GlobalContext
       size_t index = results.size() - 1;
 
       for (auto it = results.rbegin(); it != results.rend(); ++it) {
-        if (auto const_node = std::dynamic_pointer_cast<ngraph::op::Constant>((*it)->input_value(0).get_node_shared_ptr())) {
+        if (auto const_node = std::dynamic_pointer_cast<ov::op::v0::Constant>((*it)->input_value(0).get_node_shared_ptr())) {
           const_outputs_map[(*it)->get_friendly_name()] = const_node;
           results.erase(results.begin() + index);
         }
@@ -96,12 +96,11 @@ CreateOVModel(const ONNX_NAMESPACE::ModelProto& model_proto, const GlobalContext
       }
     }
 #ifndef NDEBUG
-#if defined(OPENVINO_2022_3) || (OPENVINO_2023_0)
+#if defined(OPENVINO_2022_3) || (OPENVINO_2023_0) || (OPENVINO_2023_1)
     if (IsDebugEnabled()) {
       std::string name = cnn_network->get_friendly_name();
       ov::pass::Serialize serializer(name + ".xml", name + ".bin");
       serializer.run_on_model(cnn_network);
-      ngraph::plot_graph(cnn_network, name + "_executable" + ".dot");
     }
 #endif
 #endif
@@ -111,31 +110,6 @@ CreateOVModel(const ONNX_NAMESPACE::ModelProto& model_proto, const GlobalContext
   }
 }
 
-InferenceEngine::Precision ConvertPrecisionONNXToOpenVINO(const ONNX_NAMESPACE::TypeProto& onnx_type) {
-  ONNX_NAMESPACE::DataType type_string = ONNX_NAMESPACE::Utils::DataTypeUtils::ToType(onnx_type);
-  if (*type_string == "float" || *type_string == "tensor(float)") {
-    return InferenceEngine::Precision::FP32;
-  } else if (*type_string == "float16" || *type_string == "tensor(float16)") {
-    return InferenceEngine::Precision::FP16;
-  } else if (*type_string == "int32" || *type_string == "tensor(int32)") {
-    return InferenceEngine::Precision::I32;
-  } else if (*type_string == "int16" || *type_string == "tensor(int16)") {
-    return InferenceEngine::Precision::I16;
-  } else if (*type_string == "int8" || *type_string == "tensor(int8)") {
-    return InferenceEngine::Precision::I8;
-  } else if (*type_string == "uint16" || *type_string == "tensor(uint16)") {
-    return InferenceEngine::Precision::U16;
-  } else if (*type_string == "uint8" || *type_string == "tensor(uint8)") {
-    return InferenceEngine::Precision::U8;
-  } else if (*type_string == "bool" || *type_string == "tensor(bool)") {
-    return InferenceEngine::Precision::U8;
-  } else if (*type_string == "int64" || *type_string == "tensor(int64)") {
-    return InferenceEngine::Precision::I32;
-  } else {
-    throw std::string(log_tag + "Unsupported Data type");
-  }
-}
-
 Ort::UnownedValue
 GetOutputTensor(Ort::KernelContext& context, size_t batch_size,
                 OVInferRequestPtr infer_request,
@@ -166,7 +140,7 @@ Ort::UnownedValue
 GetOutputTensor(Ort::KernelContext& context,
                 std::string output_name,
                 std::unordered_map<std::string, int> output_names,
-                std::shared_ptr<ngraph::Node> node) {
+                std::shared_ptr<ov::Node> node) {
   // Find position of '/' in the output_name
   int pos = output_name.find("/");
   // Copy the substring from start to pos
@@ -210,25 +184,25 @@ int GetFirstAvailableDevice(GlobalContext& global_context) {
   return i;
 }
 
-void FillOutputsWithConstantData(std::shared_ptr<ngraph::Node> node, Ort::UnownedValue& out_tensor) {
+void FillOutputsWithConstantData(std::shared_ptr<ov::Node> node, Ort::UnownedValue& out_tensor) {
   switch (node->get_element_type()) {
-    case ngraph::element::Type_t::f32: {
+    case ov::element::Type_t::f32: {
       FillOutputHelper<float>(out_tensor, node);
       break;
     }
-    case ngraph::element::Type_t::boolean: {
+    case ov::element::Type_t::boolean: {
       FillOutputHelper<char>(out_tensor, node);
       break;
     }
-    case ngraph::element::Type_t::i32: {
+    case ov::element::Type_t::i32: {
       FillOutputHelper<int32_t>(out_tensor, node);
       break;
     }
-    case ngraph::element::Type_t::i64: {
+    case ov::element::Type_t::i64: {
       FillOutputHelper<int64_t>(out_tensor, node);
       break;
     }
-    case ngraph::element::Type_t::f16: {
+    case ov::element::Type_t::f16: {
       FillOutputHelper<float>(out_tensor, node);
       break;
     }
@@ -237,14 +211,22 @@ void FillOutputsWithConstantData(std::shared_ptr<ngraph::Node> node, Ort::Unowne
   }
 }
 
+#if defined(_MSC_VER)
+#pragma warning(disable : 4127)
+#endif
+
 template <typename T>
-void FillOutputHelper(Ort::UnownedValue& out_tensor, std::shared_ptr<ngraph::Node> node) {
-  auto const_node = std::dynamic_pointer_cast<ngraph::op::Constant>(node);
+void FillOutputHelper(Ort::UnownedValue& out_tensor, std::shared_ptr<ov::Node> node) {
+  auto const_node = std::dynamic_pointer_cast<ov::op::v0::Constant>(node);
   auto res = const_node->cast_vector<T>();
   T* tensor_data = out_tensor.GetTensorMutableData<T>();
   std::copy(res.begin(), res.end(), tensor_data);
 }
 
+#if defined(_MSC_VER)
+#pragma warning(default : 4127)
+#endif
+
 void FillInputBlob(OVTensorPtr inputBlob, size_t batch_slice_idx,
                    std::string input_name, Ort::KernelContext& context,
                    const SubGraphContext& subgraph_context) {
diff --git a/onnxruntime/core/providers/openvino/backend_utils.h b/onnxruntime/core/providers/openvino/backend_utils.h
index e0fdc6f55a..de78a150fe 100644
--- a/onnxruntime/core/providers/openvino/backend_utils.h
+++ b/onnxruntime/core/providers/openvino/backend_utils.h
@@ -32,19 +32,16 @@ bool IsCILogEnabled();
 
 int GetFirstAvailableDevice(GlobalContext& global_context);
 
-void FillOutputsWithConstantData(std::shared_ptr<ngraph::Node> node, Ort::UnownedValue& out_tensor);
+void FillOutputsWithConstantData(std::shared_ptr<ov::Node> node, Ort::UnownedValue& out_tensor);
 
 template <typename T>
-void FillOutputHelper(Ort::UnownedValue& out_tensor, std::shared_ptr<ngraph::Node> node);
+void FillOutputHelper(Ort::UnownedValue& out_tensor, std::shared_ptr<ov::Node> node);
 
 Ort::UnownedValue
 GetOutputTensor(Ort::KernelContext& context,
                 std::string output_name,
                 std::unordered_map<std::string, int> output_names,
-                std::shared_ptr<ngraph::Node> node);
-
-InferenceEngine::Precision
-ConvertPrecisionONNXToOpenVINO(const ONNX_NAMESPACE::TypeProto& onnx_type);
+                std::shared_ptr<ov::Node> node);
 
 Ort::UnownedValue
 GetOutputTensor(Ort::KernelContext& context, size_t batch_size,
@@ -61,7 +58,7 @@ void FillOutputBlob(OVTensorPtr outputBlob, Ort::UnownedValue& output_tensor,
 
 std::shared_ptr<OVNetwork>
 CreateOVModel(const ONNX_NAMESPACE::ModelProto& model_proto, const GlobalContext& global_context, const SubGraphContext& subgraph_context,
-              std::map<std::string, std::shared_ptr<ngraph::Node>>& const_outputs_map);
+              std::map<std::string, std::shared_ptr<ov::Node>>& const_outputs_map);
 
 void printPerformanceCounts(const std::vector<OVProfilingInfo>& performanceMap,
                             std::ostream& stream, std::string deviceName);
diff --git a/onnxruntime/core/providers/openvino/backends/basic_backend.cc b/onnxruntime/core/providers/openvino/backends/basic_backend.cc
index 31ec8db03b..f9517d7942 100644
--- a/onnxruntime/core/providers/openvino/backends/basic_backend.cc
+++ b/onnxruntime/core/providers/openvino/backends/basic_backend.cc
@@ -9,7 +9,7 @@
 
 #include "core/providers/shared_library/provider_api.h"
 #include "../backend_utils.h"
-#include <ngraph/pass/constant_folding.hpp>
+// #include <ngraph/pass/constant_folding.hpp>
 #include "basic_backend.h"
 #include "../backend_manager.h"
 
@@ -37,6 +37,9 @@ BasicBackend::BasicBackend(const ONNX_NAMESPACE::ModelProto& model_proto,
   // Setting OpenCL queue throttling for GPU
   EnableGPUThrottling(device_config);
 
+  // Enable streams; default=1 unless ovverriden by user config
+  EnableStreams();
+
 #ifndef NDEBUG
   if (IsDebugEnabled()) {
     std::string file_name = subgraph_context.subgraph_name + "_static.onnx";
@@ -45,6 +48,7 @@ BasicBackend::BasicBackend(const ONNX_NAMESPACE::ModelProto& model_proto,
   }
 #endif
   try {
+    std::string dev_prec = global_context.device_type + "_" + global_context_.precision_str;
     if (global_context.is_wholly_supported_graph) {
 #if defined(IO_BUFFER_ENABLED)
       if ((global_context.device_type.find("GPU") != std::string::npos) &&
@@ -61,8 +65,8 @@ BasicBackend::BasicBackend(const ONNX_NAMESPACE::ModelProto& model_proto,
         LOGS_DEFAULT(INFO) << log_tag << "Loaded model to the plugin";
       }
 #else
-#if defined(OPENVINO_2023_0)
-      if (subgraph_context.precision != InferenceEngine::Precision::FP16 && global_context_.enable_dynamic_shapes == false) {
+#if defined(OPENVINO_2023_0) || (OPENVINO_2023_1)
+      if (!subgraph_context_.has_dynamic_input_shape && dev_prec != "CPU_FP16") {
         const std::string model = model_proto.SerializeAsString();
         exe_network_ = global_context_.ie_core.LoadNetwork(model, hw_target, device_config, subgraph_context_.subgraph_name);
         LOGS_DEFAULT(INFO) << log_tag << "Loaded model to the plugin";
@@ -98,7 +102,7 @@ BasicBackend::BasicBackend(const ONNX_NAMESPACE::ModelProto& model_proto,
   inferRequestsQueue_ = std::unique_ptr<InferRequestsQueue>(new InferRequestsQueue(exe_network_, nireq));
 }
 
-bool BasicBackend::ValidateSubgraph(std::map<std::string, std::shared_ptr<ngraph::Node>>& const_outputs_map) {
+bool BasicBackend::ValidateSubgraph(std::map<std::string, std::shared_ptr<ov::Node>>& const_outputs_map) {
   if (const_outputs_map.size() == subgraph_context_.output_names.size())
     subgraph_context_.is_constant = true;
   if (subgraph_context_.is_constant) {
@@ -109,20 +113,23 @@ bool BasicBackend::ValidateSubgraph(std::map<std::string, std::shared_ptr<ngraph
 }
 
 void BasicBackend::PopulateConfigValue(ov::AnyMap& device_config) {
-  // Set inference precision if device_type != AUTO
-  // if (global_context_.device_type.find("GPU_FP16")!= std::string::npos){
-  //   device_config.emplace(ov::hint::inference_precision(global_context_.precision_str));
-  // }
   device_config = {};
+  // Set inference precision based on device precision for OV backend
+  if (global_context_.precision_str.find("FP16") != std::string::npos && global_context_.device_type == "GPU") {
+    device_config.emplace(ov::hint::inference_precision("f16"));
+  }
+  if (global_context_.precision_str.find("FP32") != std::string::npos) {
+    device_config.emplace(ov::hint::inference_precision("f32"));
+  }
 #ifndef NDEBUG
   if (openvino_ep::backend_utils::IsDebugEnabled()) {
     device_config.emplace(ov::enable_profiling(true));
   }
 #endif
-#if defined(OPENVINO_2023_0)
+#if defined(OPENVINO_2023_0) || (OPENVINO_2023_1)
   if (global_context_.device_type.find("VPUX") != std::string::npos) {
     std::pair<std::string, ov::Any> device_property;
-    device_property = std::make_pair("VPUX_COMPILER_TYPE", "MLIR");
+    device_property = std::make_pair("VPU_COMPILER_TYPE", "MLIR");
     device_config.emplace(ov::device::properties("VPUX", device_property));
   }
 #endif
@@ -147,10 +154,17 @@ void BasicBackend::EnableCaching() {
 void BasicBackend::EnableGPUThrottling(ov::AnyMap& device_config) {
   if (global_context_.enable_opencl_throttling == true && global_context_.device_type.find("GPU") != std::string::npos) {
     LOGS_DEFAULT(INFO) << log_tag << "Enabled OpenCL queue throttling for GPU device";
-    device_config[GPU_CONFIG_KEY(PLUGIN_THROTTLE)] = "1";
+    std::pair<std::string, ov::Any> device_property;
+    device_property = std::make_pair("PLUGIN_THROTTLE", "1");
+    device_config.emplace(ov::device::properties("GPU_CONFIG_KEY", device_property));
+    // device_config[GPU_CONFIG_KEY(PLUGIN_THROTTLE)] = "1";
   }
 }
 
+void BasicBackend::EnableStreams() {
+  global_context_.ie_core.SetStreams(global_context_.device_type, global_context_.num_streams);
+}
+
 // Starts an asynchronous inference request for data in slice indexed by batch_slice_idx on
 // an Infer Request indexed by infer_req_idx
 void BasicBackend::StartAsyncInference(Ort::KernelContext& context, OVInferRequestPtr infer_request) {
@@ -177,7 +191,6 @@ void BasicBackend::StartAsyncInference(Ort::KernelContext& context, OVInferReque
       }
       size_t batch_slice_idx = 0;
       if (subgraph_context_.has_dynamic_input_shape &&
-          global_context_.enable_dynamic_shapes == true &&
           (global_context_.device_type.find("CPU") != std::string::npos ||
            global_context_.device_type.find("GPU") != std::string::npos)) {
         auto tensor = context.GetInput(subgraph_context_.input_names.at(input_name));
diff --git a/onnxruntime/core/providers/openvino/backends/basic_backend.h b/onnxruntime/core/providers/openvino/backends/basic_backend.h
index 8cdb758fe7..2f1d603640 100644
--- a/onnxruntime/core/providers/openvino/backends/basic_backend.h
+++ b/onnxruntime/core/providers/openvino/backends/basic_backend.h
@@ -31,10 +31,11 @@ class BasicBackend : public IBackend {
  private:
   bool ImportBlob(std::string hw_target, bool vpu_status);
   void PopulateCompiledDirectory(std::string, std::string&, std::string&, bool&);
-  bool ValidateSubgraph(std::map<std::string, std::shared_ptr<ngraph::Node>>& const_outputs_map);
+  bool ValidateSubgraph(std::map<std::string, std::shared_ptr<ov::Node>>& const_outputs_map);
   void PopulateConfigValue(ov::AnyMap& device_config);
   void EnableCaching();
   void EnableGPUThrottling(ov::AnyMap& device_config);
+  void EnableStreams();
   void StartAsyncInference(Ort::KernelContext& context, std::shared_ptr<OVInferRequest> infer_request);
 
 #ifdef IO_BUFFER_ENABLED
@@ -48,7 +49,7 @@ class BasicBackend : public IBackend {
   mutable std::mutex compute_lock_;
   std::shared_ptr<OVNetwork> ie_cnn_network_;
   OVExeNetwork exe_network_;
-  std::map<std::string, std::shared_ptr<ngraph::Node>> const_outputs_map_;
+  std::map<std::string, std::shared_ptr<ov::Node>> const_outputs_map_;
   std::unique_ptr<InferRequestsQueue> inferRequestsQueue_;
 #if defined IO_BUFFER_ENABLED
   OVRemoteContextPtr remote_context_;
diff --git a/onnxruntime/core/providers/openvino/contexts.h b/onnxruntime/core/providers/openvino/contexts.h
index a6011590fa..b61dcf8ca4 100644
--- a/onnxruntime/core/providers/openvino/contexts.h
+++ b/onnxruntime/core/providers/openvino/contexts.h
@@ -20,6 +20,7 @@ struct GlobalContext {
   std::string precision_str;
   std::string device_id;
   std::string cache_dir;
+  int num_streams;
   std::vector<bool> deviceAvailableList = {true, true, true, true, true, true, true, true};
   std::vector<std::string> deviceTags = {"0", "1", "2", "3", "4", "5", "6", "7"};
   std::string onnx_model_name;
@@ -40,7 +41,7 @@ struct SubGraphContext {
   std::vector<int> input_indexes;
   std::unordered_map<std::string, int> input_names;
   std::unordered_map<std::string, int> output_names;
-  OVPrecision precision;
+  std::string precision;
 };
 
 }  // namespace openvino_ep
diff --git a/onnxruntime/core/providers/openvino/openvino_execution_provider.cc b/onnxruntime/core/providers/openvino/openvino_execution_provider.cc
index 6a4b039683..9908099262 100644
--- a/onnxruntime/core/providers/openvino/openvino_execution_provider.cc
+++ b/onnxruntime/core/providers/openvino/openvino_execution_provider.cc
@@ -19,6 +19,7 @@ OpenVINOExecutionProvider::OpenVINOExecutionProvider(const OpenVINOExecutionProv
   openvino_ep::BackendManager::GetGlobalContext().precision_str = info.precision_;
   openvino_ep::BackendManager::GetGlobalContext().enable_vpu_fast_compile = info.enable_vpu_fast_compile_;
   openvino_ep::BackendManager::GetGlobalContext().cache_dir = info.cache_dir_;
+  openvino_ep::BackendManager::GetGlobalContext().num_streams = info.num_streams_;
   openvino_ep::BackendManager::GetGlobalContext().context = info.context_;
   openvino_ep::BackendManager::GetGlobalContext().enable_opencl_throttling = info.enable_opencl_throttling_;
   openvino_ep::BackendManager::GetGlobalContext().enable_dynamic_shapes = info.enable_dynamic_shapes_;
@@ -130,6 +131,10 @@ OpenVINOExecutionProvider::GetCapability(const GraphViewer& graph_viewer,
   openvino_ep::GetCapability obj(graph_viewer,
                                  openvino_ep::BackendManager::GetGlobalContext().device_type, "V_2023_0");
   result = obj.Execute();
+#elif defined(OPENVINO_2023_1)
+  openvino_ep::GetCapability obj(graph_viewer,
+                                 openvino_ep::BackendManager::GetGlobalContext().device_type, "V_2023_1");
+  result = obj.Execute();
 #endif
 
   return result;
diff --git a/onnxruntime/core/providers/openvino/openvino_execution_provider.h b/onnxruntime/core/providers/openvino/openvino_execution_provider.h
index 5eae9c78c9..a4fc09362f 100644
--- a/onnxruntime/core/providers/openvino/openvino_execution_provider.h
+++ b/onnxruntime/core/providers/openvino/openvino_execution_provider.h
@@ -57,15 +57,16 @@ struct OpenVINOExecutionProviderInfo {
   std::string device_id_;
   size_t num_of_threads_;
   std::string cache_dir_;
+  int num_streams_;
   void* context_;
   bool enable_opencl_throttling_;
   bool enable_dynamic_shapes_;
 
   explicit OpenVINOExecutionProviderInfo(std::string dev_type, bool enable_vpu_fast_compile, std::string dev_id,
-                                         size_t num_of_threads, std::string cache_dir,
+                                         size_t num_of_threads, std::string cache_dir, int num_streams,
                                          void* context, bool enable_opencl_throttling,
                                          bool enable_dynamic_shapes)
-      : enable_vpu_fast_compile_(enable_vpu_fast_compile), device_id_(dev_id), num_of_threads_(num_of_threads), cache_dir_(cache_dir), context_(context), enable_opencl_throttling_(enable_opencl_throttling), enable_dynamic_shapes_(enable_dynamic_shapes) {
+      : enable_vpu_fast_compile_(enable_vpu_fast_compile), device_id_(dev_id), num_of_threads_(num_of_threads), cache_dir_(cache_dir), num_streams_(num_streams), context_(context), enable_opencl_throttling_(enable_opencl_throttling), enable_dynamic_shapes_(enable_dynamic_shapes) {
     if (dev_type == "") {
       LOGS_DEFAULT(INFO) << "[OpenVINO-EP]"
                          << "No runtime device selection option provided.";
@@ -149,7 +150,7 @@ struct OpenVINOExecutionProviderInfo {
                        << "Choosing Device: " << device_type_ << " , Precision: " << precision_;
   }
   OpenVINOExecutionProviderInfo() {
-    OpenVINOExecutionProviderInfo("", false, "", 0, "", NULL, false, false);
+    OpenVINOExecutionProviderInfo("", false, "", 0, "", 1, NULL, false, false);
   }
 };
 
diff --git a/onnxruntime/core/providers/openvino/openvino_provider_factory.cc b/onnxruntime/core/providers/openvino/openvino_provider_factory.cc
index d118b37f8a..463f985d8c 100644
--- a/onnxruntime/core/providers/openvino/openvino_provider_factory.cc
+++ b/onnxruntime/core/providers/openvino/openvino_provider_factory.cc
@@ -3,16 +3,16 @@
 
 #include "core/providers/shared_library/provider_api.h"
 #include "core/providers/openvino/openvino_provider_factory.h"
-#include "openvino_execution_provider.h"
-#include "openvino_provider_factory_creator.h"
+#include "core/providers/openvino/openvino_execution_provider.h"
+#include "core/providers/openvino/openvino_provider_factory_creator.h"
 
 namespace onnxruntime {
 struct OpenVINOProviderFactory : IExecutionProviderFactory {
   OpenVINOProviderFactory(const char* device_type, bool enable_vpu_fast_compile,
                           const char* device_id, size_t num_of_threads,
-                          const char* cache_dir, void* context,
+                          const char* cache_dir, int num_streams, void* context,
                           bool enable_opencl_throttling, bool enable_dynamic_shapes)
-      : enable_vpu_fast_compile_(enable_vpu_fast_compile), num_of_threads_(num_of_threads), context_(context), enable_opencl_throttling_(enable_opencl_throttling), enable_dynamic_shapes_(enable_dynamic_shapes) {
+      : enable_vpu_fast_compile_(enable_vpu_fast_compile), num_of_threads_(num_of_threads), num_streams_(num_streams), context_(context), enable_opencl_throttling_(enable_opencl_throttling), enable_dynamic_shapes_(enable_dynamic_shapes) {
     device_type_ = (device_type == nullptr) ? "" : device_type;
     device_id_ = (device_id == nullptr) ? "" : device_id;
     cache_dir_ = (cache_dir == nullptr) ? "" : cache_dir;
@@ -28,6 +28,7 @@ struct OpenVINOProviderFactory : IExecutionProviderFactory {
   std::string device_id_;
   size_t num_of_threads_;
   std::string cache_dir_;
+  int num_streams_;
   void* context_;
   bool enable_opencl_throttling_;
   bool enable_dynamic_shapes_;
@@ -35,20 +36,11 @@ struct OpenVINOProviderFactory : IExecutionProviderFactory {
 
 std::unique_ptr<IExecutionProvider> OpenVINOProviderFactory::CreateProvider() {
   OpenVINOExecutionProviderInfo info(device_type_, enable_vpu_fast_compile_, device_id_, num_of_threads_,
-                                     cache_dir_, context_, enable_opencl_throttling_,
+                                     cache_dir_, num_streams_, context_, enable_opencl_throttling_,
                                      enable_dynamic_shapes_);
   return std::make_unique<OpenVINOExecutionProvider>(info);
 }
 
-std::shared_ptr<IExecutionProviderFactory> CreateExecutionProviderFactory_OpenVINO(
-    const char* device_type, bool enable_vpu_fast_compile, const char* device_id, size_t num_of_threads,
-    const char* cache_dir, void* context, bool enable_opencl_throttling,
-    bool enable_dynamic_shapes) {
-  return std::make_shared<onnxruntime::OpenVINOProviderFactory>(device_type, enable_vpu_fast_compile,
-                                                                device_id, num_of_threads, cache_dir, context, enable_opencl_throttling,
-                                                                enable_dynamic_shapes);
-}
-
 }  // namespace onnxruntime
 
 namespace onnxruntime {
@@ -63,12 +55,81 @@ struct OpenVINO_Provider : Provider {
   void* GetInfo() override { return &g_info; }
 
   std::shared_ptr<IExecutionProviderFactory> CreateExecutionProviderFactory(const void* void_params) override {
-    auto& params = *reinterpret_cast<const OrtOpenVINOProviderOptions*>(void_params);
-    return std::make_shared<OpenVINOProviderFactory>(params.device_type, params.enable_vpu_fast_compile,
-                                                     params.device_id, params.num_of_threads,
-                                                     params.cache_dir,
-                                                     params.context, params.enable_opencl_throttling,
-                                                     params.enable_dynamic_shapes);
+    auto& provider_options_map = *reinterpret_cast<const ProviderOptions*>(void_params);
+
+    const char* device_type = "";           // [device_type]: Overrides the accelerator hardware type and precision
+                                            //   with these values at runtime.
+    bool enable_vpu_fast_compile = false;   // [enable_vpu_fast_compile]: Fast-compile may be optionally enabled to
+                                            // speeds up the model's compilation to VPU device specific format.
+    const char* device_id = "";             // [device_id]: Selects a particular hardware device for inference.
+    size_t num_of_threads = 8;              // [num_of_threads]: Overrides the accelerator default value of number of
+                                            //  threads with this value at runtime.
+    const char* cache_dir = "";             // [cache_dir]: specify the path to
+                                            // dump and load the blobs for the model caching/kernel caching (GPU)
+                                            // feature. If blob files are already present, it will be directly loaded.
+    int num_streams = 1;                    // [num_streams]: Option that specifies the number of parallel inference
+                                            // requests to be processed on a given `device_type`. Overrides the
+                                            // accelerator default value of number of streams with this value at runtime.
+    bool enable_opencl_throttling = false;  // [enable_opencl_throttling]: Enables OpenCL queue throttling for GPU
+                                            // device (Reduces CPU Utilization when using GPU)
+    bool enable_dynamic_shapes = false;     // [enable_dynamic_shapes]: Enables Dynamic Shapes feature for CPU device)
+    void* context = nullptr;
+
+    if (provider_options_map.find("device_type") != provider_options_map.end()) {
+      device_type = provider_options_map.at("device_type").c_str();
+    }
+    if (provider_options_map.find("device_id") != provider_options_map.end()) {
+      device_id = provider_options_map.at("device_id").c_str();
+    }
+    if (provider_options_map.find("cache_dir") != provider_options_map.end()) {
+      cache_dir = provider_options_map.at("cache_dir").c_str();
+    }
+    if (provider_options_map.find("context") != provider_options_map.end()) {
+      context = (void*)provider_options_map.at("context").c_str();
+    }
+
+    if (provider_options_map.find("num_of_threads") != provider_options_map.end()) {
+      num_of_threads = std::stoi(provider_options_map.at("num_of_threads"));
+    }
+
+    if (provider_options_map.find("num_streams") != provider_options_map.end()) {
+      num_streams = std::stoi(provider_options_map.at("num_streams"));
+    }
+    std::string bool_flag = "";
+    if (provider_options_map.find("enable_vpu_fast_compile") != provider_options_map.end()) {
+      bool_flag = provider_options_map.at("enable_vpu_fast_compile");
+      if (bool_flag == "true" || bool_flag == "True")
+        enable_vpu_fast_compile = true;
+      else if (bool_flag == "false" || bool_flag == "False")
+        enable_vpu_fast_compile = false;
+      bool_flag = "";
+    }
+
+    if (provider_options_map.find("enable_opencl_throttling") != provider_options_map.end()) {
+      bool_flag = provider_options_map.at("enable_opencl_throttling");
+      if (bool_flag == "true" || bool_flag == "True")
+        enable_opencl_throttling = true;
+      else if (bool_flag == "false" || bool_flag == "False")
+        enable_opencl_throttling = false;
+      bool_flag = "";
+    }
+
+    if (provider_options_map.find("enable_dynamic_shapes") != provider_options_map.end()) {
+      bool_flag = provider_options_map.at("enable_dynamic_shapes");
+      if (bool_flag == "true" || bool_flag == "True")
+        enable_dynamic_shapes = true;
+      else if (bool_flag == "false" || bool_flag == "False")
+        enable_dynamic_shapes = false;
+    }
+    return std::make_shared<OpenVINOProviderFactory>(device_type,
+                                                     enable_vpu_fast_compile,
+                                                     device_id,
+                                                     num_of_threads,
+                                                     cache_dir,
+                                                     num_streams,
+                                                     context,
+                                                     enable_opencl_throttling,
+                                                     enable_dynamic_shapes);
   }
 
   void Initialize() override {
diff --git a/onnxruntime/core/providers/openvino/openvino_provider_factory_creator.h b/onnxruntime/core/providers/openvino/openvino_provider_factory_creator.h
index 5781d3a3ab..4df653b022 100644
--- a/onnxruntime/core/providers/openvino/openvino_provider_factory_creator.h
+++ b/onnxruntime/core/providers/openvino/openvino_provider_factory_creator.h
@@ -6,12 +6,14 @@
 #include <memory>
 
 #include "core/providers/providers.h"
+#include "core/framework/provider_options.h"
 
 struct OrtOpenVINOProviderOptions;
 
 namespace onnxruntime {
 // defined in provider_bridge_ort.cc
 struct OpenVINOProviderFactoryCreator {
+  static std::shared_ptr<IExecutionProviderFactory> Create(const ProviderOptions* provider_options_map);
   static std::shared_ptr<IExecutionProviderFactory> Create(const OrtOpenVINOProviderOptions* provider_options);
 };
 }  // namespace onnxruntime
diff --git a/onnxruntime/core/providers/openvino/ov_interface.cc b/onnxruntime/core/providers/openvino/ov_interface.cc
index 9175f51b12..3914488fc5 100644
--- a/onnxruntime/core/providers/openvino/ov_interface.cc
+++ b/onnxruntime/core/providers/openvino/ov_interface.cc
@@ -42,7 +42,7 @@ OVExeNetwork OVCore::LoadNetwork(std::shared_ptr<OVNetwork>& ie_cnn_network, std
   }
 }
 
-#if defined(OPENVINO_2023_0)
+#if defined(OPENVINO_2023_0) || (OPENVINO_2023_1)
 OVExeNetwork OVCore::LoadNetwork(const std::string& model, std::string& hw_target, ov::AnyMap& device_config, std::string name) {
   ov::CompiledModel obj;
   try {
@@ -75,8 +75,12 @@ OVExeNetwork OVCore::LoadNetwork(std::shared_ptr<OVNetwork>& model, OVRemoteCont
 #endif
 
 std::vector<std::string> OVCore::GetAvailableDevices() {
-  auto obj = oe.get_available_devices();
-  return obj;
+  auto available_devices = oe.get_available_devices();
+  return available_devices;
+}
+
+void OVCore::SetStreams(const std::string& device_type, int num_streams) {
+  oe.set_property(device_type, {ov::num_streams(num_streams)});
 }
 
 OVInferRequest OVExeNetwork::CreateInferRequest() {
diff --git a/onnxruntime/core/providers/openvino/ov_interface.h b/onnxruntime/core/providers/openvino/ov_interface.h
index 84268ab6dc..ed9583033a 100644
--- a/onnxruntime/core/providers/openvino/ov_interface.h
+++ b/onnxruntime/core/providers/openvino/ov_interface.h
@@ -5,11 +5,12 @@
 
 #include <vector>
 
-#include <inference_engine.hpp>
-#if defined(OPENVINO_2022_1) || (OPENVINO_2022_2) || (OPENVINO_2022_3) || (OPENVINO_2023_0)
+#if defined(OPENVINO_2022_1) || (OPENVINO_2022_2) || (OPENVINO_2022_3) || (OPENVINO_2023_0) || (OPENVINO_2023_1)
 #define OV_API_20
 #include "openvino/openvino.hpp"
 #include "openvino/pass/convert_fp32_to_fp16.hpp"
+#else
+#include <inference_engine.hpp>
 #endif
 
 #ifdef IO_BUFFER_ENABLED
@@ -26,10 +27,8 @@ class OVCore;
 class OVInferRequest;
 class OVExeNetwork;
 
-typedef InferenceEngine::Precision OVPrecision;
 typedef ov::Tensor OVTensor;
 typedef ov::ProfilingInfo OVProfilingInfo;
-typedef ov::AnyMap OVConfig;
 typedef ov::Model OVNetwork;
 typedef std::shared_ptr<OVInferRequest> OVInferRequestPtr;
 typedef std::shared_ptr<OVTensor> OVTensorPtr;
@@ -45,7 +44,7 @@ class OVCore {
  public:
   std::shared_ptr<OVNetwork> ReadModel(const std::string& model_stream) const;
   OVExeNetwork LoadNetwork(std::shared_ptr<OVNetwork>& ie_cnn_network, std::string& hw_target, ov::AnyMap& device_config, std::string name);
-#if defined(OPENVINO_2023_0)
+#if defined(OPENVINO_2023_0) || (OPENVINO_2023_1)
   OVExeNetwork LoadNetwork(const std::string& model_stream, std::string& hw_target, ov::AnyMap& device_config, std::string name);
 #endif
   void SetCache(std::string cache_dir_path);
@@ -56,6 +55,7 @@ class OVCore {
   ov::Core& Get() {
     return oe;
   }
+  void SetStreams(const std::string& device_type, int num_streams);
 };
 
 class OVExeNetwork {
diff --git a/onnxruntime/core/providers/openvino/ov_versions/capability.cc b/onnxruntime/core/providers/openvino/ov_versions/capability.cc
index 251f475525..865e74aa1f 100644
--- a/onnxruntime/core/providers/openvino/ov_versions/capability.cc
+++ b/onnxruntime/core/providers/openvino/ov_versions/capability.cc
@@ -33,8 +33,10 @@ GetCapability::GetCapability(const GraphViewer& graph_viewer_param, std::string
     data_ops_ = new DataOps(graph_viewer_, V_2022_3, device_type_);
   } else if (version_param == "V_2023_0") {
     data_ops_ = new DataOps(graph_viewer_, V_2023_0, device_type_);
+  } else if (version_param == "V_2023_1") {
+    data_ops_ = new DataOps(graph_viewer_, V_2023_1, device_type_);
   } else {
-    data_ops_ = new DataOps(graph_viewer_, V_2023_0, device_type_);
+    data_ops_ = new DataOps(graph_viewer_, V_2023_1, device_type_);
   }
 }
 
@@ -46,6 +48,11 @@ std::vector<std::unique_ptr<ComputeCapability>> GetCapability::Execute() {
     return result;
   }
 
+  // Check if it is a subgraph
+  if (graph_viewer_.IsSubgraph() && graph_viewer_.Name() == "tf2onnx") {
+    return result;
+  }
+
   // This is a list of initializers that nGraph considers as constants. Example weights, reshape shape etc.
   std::unordered_set<std::string> ng_required_initializers;
 
diff --git a/onnxruntime/core/providers/openvino/ov_versions/data_ops.cc b/onnxruntime/core/providers/openvino/ov_versions/data_ops.cc
index 44bf96788e..70118c94f9 100644
--- a/onnxruntime/core/providers/openvino/ov_versions/data_ops.cc
+++ b/onnxruntime/core/providers/openvino/ov_versions/data_ops.cc
@@ -17,8 +17,8 @@
 #pragma GCC diagnostic push
 #pragma GCC diagnostic ignored "-Wunused-parameter"
 #endif
-#include <ngraph/ngraph.hpp>
-#include <ngraph/frontend/onnx_import/onnx.hpp>
+// #include <ngraph/ngraph.hpp>
+// #include <ngraph/frontend/onnx_import/onnx.hpp>
 #if defined(_MSC_VER)
 #pragma warning(default : 4244 4245)
 #elif __GNUC__
@@ -36,6 +36,7 @@ std::set<std::string> ops_supported_only_in_model = {
     "ConstantOfShape",
     "DequantizeLinear",
     "Dropout",
+    "Einsum",
     "Exp",
     "Expand",
     "EyeLike",
@@ -127,6 +128,7 @@ std::vector<SupportedOp> supported_op_mode = {
     {"Dropout", V_2023_0, {"VPUX"}},
     {"Elu", V_2020_4, {"CPU", "GPU"}},
     {"Elu", V_2023_0, {"VPUX"}},
+    // {"Einsum", V_2023_0, {"CPU", "GPU"}},
     {"Equal", V_2020_4, {"CPU", "GPU"}},
     {"Equal", V_2023_0, {"VPUX"}},  // Added for whisper decoder model.
     {"Erf", V_2020_4, {"CPU", "GPU"}},
@@ -155,6 +157,7 @@ std::vector<SupportedOp> supported_op_mode = {
     {"GreaterOrEqual", V_2022_1, {"CPU", "GPU"}},
     {"GreaterOrEqual", V_2023_0, {"VPUX"}},
     {"GridSample", V_2022_3, {"CPU"}},
+    {"GridSample", V_2023_0, {"GPU"}},
     {"Identity", V_2020_4, {"CPU", "GPU"}},
     {"Identity", V_2023_0, {"VPUX"}},  // NoOP
     {"If", V_2022_3, {"CPU", "GPU"}},
@@ -196,6 +199,7 @@ std::vector<SupportedOp> supported_op_mode = {
     {"Neg", V_2023_0, {"VPUX"}},
     {"NonMaxSuppression", V_2021_1, {"CPU", "GPU"}},
     {"NonZero", V_2021_1, {"CPU"}},
+    {"NonZero", V_2023_0, {"GPU"}},
     {"Not", V_2021_1, {"CPU", "GPU"}},
     {"Not", V_2020_4, {"CPU", "GPU"}},
     {"OneHot", V_2020_4, {"CPU", "GPU"}},
@@ -210,6 +214,7 @@ std::vector<SupportedOp> supported_op_mode = {
     {"QuantizeLinear", V_2021_4, {"CPU", "GPU"}},
     {"QuantizeLinear", V_2023_0, {"VPUX"}},
     {"RandomNormalLike", V_2023_0, {"CPU", "GPU"}},
+    {"RandomNormal", V_2023_0, {"CPU", "GPU"}},
     {"Range", V_2022_1, {"CPU", "GPU"}},
     {"Range", V_2023_0, {"VPUX"}},
     {"Reciprocal", V_2020_4, {"CPU", "GPU"}},
@@ -341,6 +346,7 @@ void DataOps::populate_op_mode_supported() {
   no_dimension_supported_.push_back({"Div", V_2020_4, {"All"}});
   no_dimension_supported_.push_back({"DequantizeLinear", V_2021_4, {"All"}});
   no_dimension_supported_.push_back({"Equal", V_2022_1, {"CPU"}});
+  no_dimension_supported_.push_back({"Equal", V_2023_0, {"GPU"}});
   no_dimension_supported_.push_back({"Floor", V_2020_4, {"All"}});
   no_dimension_supported_.push_back({"Gather", V_2020_4, {"All"}});
   no_dimension_supported_.push_back({"Greater", V_2023_0, {"VPUX"}});
@@ -356,6 +362,7 @@ void DataOps::populate_op_mode_supported() {
   no_dimension_supported_.push_back({"ReduceProd", V_2022_1, {"CPU", "GPU"}});
   no_dimension_supported_.push_back({"Reshape", V_2022_1, {"All"}});
   no_dimension_supported_.push_back({"Shape", V_2022_1, {"GPU"}});
+  no_dimension_supported_.push_back({"Shape", V_2023_0, {"CPU"}});
   no_dimension_supported_.push_back({"Squeeze", V_2020_4, {"All"}});
   no_dimension_supported_.push_back({"Sub", V_2020_4, {"All"}});
   no_dimension_supported_.push_back({"Unsqueeze", V_2020_4, {"All"}});
@@ -1022,8 +1029,10 @@ bool DataOps::node_is_supported(const std::map<std::string, std::set<std::string
         // Zero dimension check
         for (const auto& dim : shape->dim()) {
           if (utils::HasDimValue(dim) && dim.dim_value() == 0) {
-            if ((device_id_.find("GPU") != std::string::npos) && ((optype == "Expand") ||
-                                                                  (optype == "Slice") || (optype == "Concat") || (optype == "Shape"))) {
+            if (((device_id_.find("CPU") != std::string::npos) || (device_id_.find("GPU") != std::string::npos)) &&
+                ((optype == "Expand") || (optype == "Equal") ||
+                 (optype == "Slice") || (optype == "Concat") ||
+                 (optype == "Shape"))) {
               return;
             }
             has_unsupported_dimension = true;
diff --git a/onnxruntime/core/providers/openvino/ov_versions/data_ops.h b/onnxruntime/core/providers/openvino/ov_versions/data_ops.h
index b26d1653be..cc968d02ea 100644
--- a/onnxruntime/core/providers/openvino/ov_versions/data_ops.h
+++ b/onnxruntime/core/providers/openvino/ov_versions/data_ops.h
@@ -18,7 +18,8 @@ enum versionNum {
   V_2022_1,
   V_2022_2,
   V_2022_3,
-  V_2023_0
+  V_2023_0,
+  V_2023_1,
 };
 
 using VersionNum = enum versionNum;
diff --git a/onnxruntime/core/providers/openvino/ov_versions/utils.cc b/onnxruntime/core/providers/openvino/ov_versions/utils.cc
index 53b2c3b460..be509b6743 100644
--- a/onnxruntime/core/providers/openvino/ov_versions/utils.cc
+++ b/onnxruntime/core/providers/openvino/ov_versions/utils.cc
@@ -9,8 +9,15 @@
 #pragma GCC diagnostic push
 #pragma GCC diagnostic ignored "-Wunused-parameter"
 #endif
-#include <ngraph/ngraph.hpp>
+
+#include "openvino/core/deprecated.hpp"
+#define IN_OV_COMPONENT
+#define NGRAPH_LEGACY_HEADER_INCLUDED
 #include <ngraph/frontend/onnx_import/onnx.hpp>
+
+#undef NGRAPH_LEGACY_HEADER_INCLUDED
+#undef IN_OV_COMPONENT
+
 #if defined(_MSC_VER)
 #pragma warning(default : 4244 4245)
 #elif __GNUC__
@@ -40,6 +47,7 @@ bool IsOpSupportedOnlyInModel(std::string name) {
       "Concat",
       "ConstantOfShape",
       "Dropout",
+      "Einsum",
       "Expand",
       "EyeLike",
       "Exp",
@@ -88,6 +96,7 @@ int GetOnnxOpSet(const GraphViewer& graph_viewer) {
 
 std::map<std::string, std::set<std::string>> GetNgSupportedOps(const int onnx_opset) {
   std::map<std::string, std::set<std::string>> ng_supported_ops;
+  OPENVINO_SUPPRESS_DEPRECATED_START
   ng_supported_ops.emplace(kOnnxDomain, ngraph::onnx_import::get_supported_operators(onnx_opset, kOnnxDomain));
 
   const std::set<std::string> ng_disabled_ops = {"LSTM"};  // Place-holder for ops not supported.
@@ -95,7 +104,7 @@ std::map<std::string, std::set<std::string>> GetNgSupportedOps(const int onnx_op
   for (const auto& disabled_op : ng_disabled_ops) {
     ng_supported_ops.at(kOnnxDomain).erase(disabled_op);
   }
-
+  OPENVINO_SUPPRESS_DEPRECATED_END
   return ng_supported_ops;
 }
 
diff --git a/onnxruntime/core/session/provider_bridge_ort.cc b/onnxruntime/core/session/provider_bridge_ort.cc
index 255c7e36b3..8f0a5aeaa3 100644
--- a/onnxruntime/core/session/provider_bridge_ort.cc
+++ b/onnxruntime/core/session/provider_bridge_ort.cc
@@ -1426,8 +1426,44 @@ std::shared_ptr<IExecutionProviderFactory> MIGraphXProviderFactoryCreator::Creat
   return s_library_migraphx.Get().CreateExecutionProviderFactory(provider_options);
 }
 
+// Adapter to convert the legacy OrtOpenVINOProviderOptions to ProviderOptions
+ProviderOptions OrtOpenVINOProviderOptionsToOrtOpenVINOProviderOptionsV2(const OrtOpenVINOProviderOptions* legacy_ov_options) {
+  ProviderOptions ov_options_converted_map;
+  if (legacy_ov_options->device_type != nullptr)
+    ov_options_converted_map["device_type"] = legacy_ov_options->device_type;
+
+  ov_options_converted_map["enable_vpu_fast_compile"] = legacy_ov_options->enable_vpu_fast_compile;
+
+  if (legacy_ov_options->device_id != nullptr)
+    ov_options_converted_map["device_id"] = legacy_ov_options->device_id;
+
+  ov_options_converted_map["num_of_threads"] = std::to_string(legacy_ov_options->num_of_threads);
+
+  if (legacy_ov_options->cache_dir != nullptr)
+    ov_options_converted_map["cache_dir"] = legacy_ov_options->cache_dir;
+
+  std::stringstream context_string;
+
+  if (legacy_ov_options->context != nullptr)
+    context_string << legacy_ov_options->context;
+  ov_options_converted_map["context"] = context_string.str();
+
+  ov_options_converted_map["enable_opencl_throttling"] = legacy_ov_options->enable_opencl_throttling;
+  ov_options_converted_map["enable_dynamic_shapes"] = legacy_ov_options->enable_dynamic_shapes;
+
+  // Add new provider option below
+  ov_options_converted_map["num_streams"] = "1";
+  return ov_options_converted_map;
+}
+
 std::shared_ptr<IExecutionProviderFactory> OpenVINOProviderFactoryCreator::Create(const OrtOpenVINOProviderOptions* provider_options) {
-  return s_library_openvino.Get().CreateExecutionProviderFactory(provider_options);
+  ProviderOptions ov_options_converted_map = onnxruntime::OrtOpenVINOProviderOptionsToOrtOpenVINOProviderOptionsV2(provider_options);
+  return s_library_openvino.Get().CreateExecutionProviderFactory(&ov_options_converted_map);
+}
+
+std::shared_ptr<IExecutionProviderFactory> OpenVINOProviderFactoryCreator::Create(const ProviderOptions* provider_options_map) {
+  // std::cout << provider_options_map.at("num_streams") << std::endl;
+  return s_library_openvino.Get().CreateExecutionProviderFactory(provider_options_map);
 }
 
 std::shared_ptr<IExecutionProviderFactory> DnnlProviderFactoryCreator::Create(const OrtDnnlProviderOptions* dnnl_options) {
diff --git a/onnxruntime/core/session/provider_registration.cc b/onnxruntime/core/session/provider_registration.cc
index 4cea84a590..9326c6eaff 100644
--- a/onnxruntime/core/session/provider_registration.cc
+++ b/onnxruntime/core/session/provider_registration.cc
@@ -10,6 +10,7 @@
 #include "core/session/abi_session_options_impl.h"
 #include "core/session/onnxruntime_c_api.h"
 #include "core/session/ort_apis.h"
+#include "core/providers/openvino/openvino_provider_factory_creator.h"
 
 using namespace onnxruntime;
 
@@ -71,6 +72,12 @@ ORT_API_STATUS_IMPL(OrtApis::SessionOptionsAppendExecutionProvider,
     options->provider_factories.push_back(QNNProviderFactoryCreator::Create(provider_options, &(options->value)));
 #else
     status = create_not_supported_status();
+#endif
+  } else if (strcmp(provider_name, "OpenVINO") == 0) {
+#if defined(USE_OPENVINO)
+    options->provider_factories.push_back(OpenVINOProviderFactoryCreator::Create(&provider_options));
+#else
+    status = create_not_supported_status();
 #endif
   } else if (strcmp(provider_name, "SNPE") == 0) {
 #if defined(USE_SNPE)
@@ -115,7 +122,7 @@ ORT_API_STATUS_IMPL(OrtApis::SessionOptionsAppendExecutionProvider,
   } else {
     ORT_UNUSED_PARAMETER(options);
     status = OrtApis::CreateStatus(ORT_INVALID_ARGUMENT,
-                                   "Unknown provider name. Currently supported values are 'SNPE', 'XNNPACK', and 'AZURE'");
+                                   "Unknown provider name. Currently supported values are 'OPENVINO', 'SNPE', 'XNNPACK', 'QNN', 'WEBNN' and 'AZURE'");
   }
 
   return status;
diff --git a/onnxruntime/python/onnxruntime_pybind_schema.cc b/onnxruntime/python/onnxruntime_pybind_schema.cc
index 61d4feb182..a8c217b0ff 100644
--- a/onnxruntime/python/onnxruntime_pybind_schema.cc
+++ b/onnxruntime/python/onnxruntime_pybind_schema.cc
@@ -39,8 +39,8 @@ void addGlobalSchemaFunctions(pybind11::module& m) {
 #endif
 #ifdef USE_OPENVINO
             []() {
-              OrtOpenVINOProviderOptions provider_options;
-              return onnxruntime::OpenVINOProviderFactoryCreator::Create(&provider_options);
+              ProviderOptions provider_options_map;
+              return onnxruntime::OpenVINOProviderFactoryCreator::Create(&provider_options_map);
             }(),
 #endif
 #ifdef USE_TENSORRT
diff --git a/onnxruntime/python/onnxruntime_pybind_state.cc b/onnxruntime/python/onnxruntime_pybind_state.cc
index 826c996c22..5ac20739c4 100644
--- a/onnxruntime/python/onnxruntime_pybind_state.cc
+++ b/onnxruntime/python/onnxruntime_pybind_state.cc
@@ -780,56 +780,53 @@ std::unique_ptr<IExecutionProvider> CreateExecutionProviderInstance(
 #endif
   } else if (type == kOpenVINOExecutionProvider) {
 #ifdef USE_OPENVINO
-    OrtOpenVINOProviderOptions params;
-    params.device_type = openvino_device_type.c_str();
-    std::string cache_dir;
-
+    ProviderOptions OV_provider_options_map;
     auto it = provider_options_map.find(type);
     if (it != provider_options_map.end()) {
       for (auto option : it->second) {
         if (option.first == "device_type") {
-          openvino_device_type = option.second;
-          params.device_type = openvino_device_type.c_str();
+          OV_provider_options_map[option.first] = option.second;
+          continue;
         } else if (option.first == "enable_vpu_fast_compile") {
-          if (option.second == "True") {
-            params.enable_vpu_fast_compile = true;
-          } else if (option.second == "False") {
-            params.enable_vpu_fast_compile = false;
-          } else {
+          if (!(option.second == "True" || option.second == "true" ||
+                option.second == "False" || option.second == "false")) {
             ORT_THROW("Invalid value passed for enable_vpu_fast_compile: ", option.second);
           }
-
+          OV_provider_options_map[option.first] = option.second;
         } else if (option.first == "enable_opencl_throttling") {
-          if (option.second == "True") {
-            params.enable_opencl_throttling = true;
-          } else if (option.second == "False") {
-            params.enable_opencl_throttling = false;
-          } else {
+          if (!(option.second == "True" || option.second == "true" ||
+                option.second == "False" || option.second == "false")) {
             ORT_THROW("Invalid value passed for enable_opencl_throttling: ", option.second);
           }
+          OV_provider_options_map[option.first] = option.second;
         } else if (option.first == "enable_dynamic_shapes") {
-          if (option.second == "True") {
-            params.enable_dynamic_shapes = true;
-          } else if (option.second == "False") {
-            params.enable_dynamic_shapes = false;
-          } else {
+          if (!(option.second == "True" || option.second == "true" ||
+                option.second == "False" || option.second == "false")) {
             ORT_THROW("Invalid value passed for enable_dynamic_shapes: ", option.second);
           }
+          OV_provider_options_map[option.first] = option.second;
         } else if (option.first == "device_id") {
-          params.device_id = option.second.c_str();
+          OV_provider_options_map[option.first] = option.second;
+          continue;
         } else if (option.first == "num_of_threads") {
-          params.num_of_threads = std::stoi(option.second);
+          OV_provider_options_map[option.first] = option.second;
+          continue;
+        } else if (option.first == "num_streams") {
+          OV_provider_options_map[option.first] = option.second;
+          continue;
         } else if (option.first == "cache_dir") {
-          cache_dir = option.second;
-          params.cache_dir = cache_dir.c_str();
+          OV_provider_options_map[option.first] = option.second;
+          continue;
         } else if (option.first == "context") {
-          params.context = (void*)(option.second.c_str());
+          OV_provider_options_map[option.first] = option.second;
+          continue;
         } else {
           ORT_THROW("Invalid OpenVINO EP option: ", option.first);
         }
       }
     }
-    if (std::shared_ptr<IExecutionProviderFactory> openvino_provider_factory = onnxruntime::OpenVINOProviderFactoryCreator::Create(&params)) {
+    if (std::shared_ptr<IExecutionProviderFactory> openvino_provider_factory = onnxruntime::OpenVINOProviderFactoryCreator::Create(
+            &OV_provider_options_map)) {
       auto p = openvino_provider_factory->CreateProvider();
       // Reset global variables config to avoid it being accidentally passed on to the next session
       openvino_device_type.clear();
diff --git a/onnxruntime/python/onnxruntime_pybind_state_common.h b/onnxruntime/python/onnxruntime_pybind_state_common.h
index 1260e9ea71..18a9079b5c 100644
--- a/onnxruntime/python/onnxruntime_pybind_state_common.h
+++ b/onnxruntime/python/onnxruntime_pybind_state_common.h
@@ -440,7 +440,6 @@ std::shared_ptr<IExecutionProviderFactory> CreateExecutionProviderFactory_MIGrap
 std::shared_ptr<IExecutionProviderFactory> CreateExecutionProviderFactory_MIGraphX(int device_id);
 std::shared_ptr<IExecutionProviderFactory> CreateExecutionProviderFactory_Cuda(const OrtCUDAProviderOptions* params);
 std::shared_ptr<IExecutionProviderFactory> CreateExecutionProviderFactory_Dnnl(const OrtDnnlProviderOptions* params);
-std::shared_ptr<IExecutionProviderFactory> CreateExecutionProviderFactory_OpenVINO(const OrtOpenVINOProviderOptions* params);
 #ifdef USE_TVM
 std::shared_ptr<IExecutionProviderFactory> CreateExecutionProviderFactory_Tvm(const tvm::TvmEPOptions& info);
 std::shared_ptr<IExecutionProviderFactory> CreateExecutionProviderFactory_Tvm(const char* params);
diff --git a/onnxruntime/test/perftest/ort_test_session.cc b/onnxruntime/test/perftest/ort_test_session.cc
index d283d9df62..454ef5dfb2 100644
--- a/onnxruntime/test/perftest/ort_test_session.cc
+++ b/onnxruntime/test/perftest/ort_test_session.cc
@@ -423,24 +423,12 @@ OnnxRuntimeTestSession::OnnxRuntimeTestSession(Ort::Env& env, std::random_device
 #endif
   } else if (provider_name == onnxruntime::kOpenVINOExecutionProvider) {
 #ifdef USE_OPENVINO
-    std::string device_type = "";           // [device_type]: Overrides the accelerator hardware type and precision
-                                            //   with these values at runtime.
-    bool enable_vpu_fast_compile = false;   // [enable_vpu_fast_compile]: Fast-compile may be optionally enabled to
-                                            // speeds up the model's compilation to VPU device specific format.
-    std::string device_id = "";             // [device_id]: Selects a particular hardware device for inference.
-    size_t num_of_threads = 8;              // [num_of_threads]: Overrides the accelerator default value of number of
-                                            //  threads with this value at runtime.
-    std::string cache_dir = "";             // [cache_dir]: specify the path to
-                                            // dump and load the blobs for the model caching/kernel caching (GPU)
-                                            // feature. If blob files are already present, it will be directly loaded.
-    bool enable_opencl_throttling = false;  // [enable_opencl_throttling]: Enables OpenCL queue throttling for GPU
-                                            // device (Reduces CPU Utilization when using GPU)
-    bool enable_dynamic_shapes = false;     // [enable_dynamic_shapes]: Enables Dynamic Shapes feature for CPU device)
 #ifdef _MSC_VER
     std::string ov_string = ToUTF8String(performance_test_config.run_config.ep_runtime_config_string);
 #else
     std::string ov_string = performance_test_config.run_config.ep_runtime_config_string;
 #endif
+    std::unordered_map<std::string, std::string> ov_options;
     std::istringstream ss(ov_string);
     std::string token;
     while (ss >> token) {
@@ -461,69 +449,64 @@ OnnxRuntimeTestSession::OnnxRuntimeTestSession(Ort::Env& env, std::random_device
                                                            "GPU.0_FP16", "GPU.1_FP16",
                                                            "VPUX_FP16", "VPUX_U8"};
         if (ov_supported_device_types.find(value) != ov_supported_device_types.end()) {
-          device_type = value;
+          ov_options[key] = value;
         } else if (value.find("HETERO:") == 0) {
-          device_type = value;
+          ov_options[key] = value;
         } else if (value.find("MULTI:") == 0) {
-          device_type = value;
+          ov_options[key] = value;
         } else if (value.find("AUTO:") == 0) {
-          device_type = value;
+          ov_options[key] = value;
         } else {
           ORT_THROW(
               "[ERROR] [OpenVINO] You have selcted wrong configuration value for the key 'device_type'. "
               "Select from 'CPU_FP32', 'CPU_FP16', 'GPU_FP32', 'GPU.0_FP32', 'GPU.1_FP32', 'GPU_FP16', "
-              "'GPU.0_FP16', 'GPU.1_FP16', 'VPUX_FP16', 'VPUX_U8', or from"
+              "'GPU.0_FP16', 'GPU.1_FP16', 'VPUX_FP16', 'VPUX_U8' or from"
               " HETERO/MULTI/AUTO options available. \n");
         }
       } else if (key == "device_id") {
-        device_id = value;
+        ov_options[key] = value;
       } else if (key == "enable_vpu_fast_compile") {
-        if (value == "true" || value == "True") {
-          enable_vpu_fast_compile = true;
-        } else if (value == "false" || value == "False") {
-          enable_vpu_fast_compile = false;
+        if (value == "true" || value == "True" ||
+            value == "false" || value == "False") {
+          ov_options[key] = value;
         } else {
           ORT_THROW("[ERROR] [OpenVINO] The value for the key 'enable_vpu_fast_compile' should be a boolean i.e. true or false. Default value is false.\n");
         }
       } else if (key == "enable_opencl_throttling") {
-        if (value == "true" || value == "True") {
-          enable_opencl_throttling = true;
-        } else if (value == "false" || value == "False") {
-          enable_opencl_throttling = false;
+        if (value == "true" || value == "True" ||
+            value == "false" || value == "False") {
+          ov_options[key] = value;
         } else {
           ORT_THROW("[ERROR] [OpenVINO] The value for the key 'enable_opencl_throttling' should be a boolean i.e. true or false. Default value is false.\n");
         }
       } else if (key == "enable_dynamic_shapes") {
-        if (value == "true" || value == "True") {
-          enable_dynamic_shapes = true;
-        } else if (value == "false" || value == "False") {
-          enable_dynamic_shapes = false;
+        if (value == "true" || value == "True" ||
+            value == "false" || value == "False") {
+          ov_options[key] = value;
         } else {
           ORT_THROW(
               "[ERROR] [OpenVINO] The value for the key 'enable_dynamic_shapes' "
               "should be a boolean i.e. true or false. Default value is false.\n");
         }
       } else if (key == "num_of_threads") {
-        std::stringstream sstream(value);
-        sstream >> num_of_threads;
-        if ((int)num_of_threads <= 0) {
+        if (std::stoi(value) <= 0) {
           ORT_THROW("[ERROR] [OpenVINO] The value for the key 'num_of_threads' should be greater than 0\n");
+        } else {
+          ov_options[key] = value;
         }
       } else if (key == "cache_dir") {
-        cache_dir = value;
+        ov_options[key] = value;
+      } else if (key == "num_streams") {
+        if (std::stoi(value) <= 0 && std::stoi(value) > 8) {
+          ORT_THROW("[ERROR] [OpenVINO] The value for the key 'num_streams' should be in the range of 1-8 \n");
+        } else {
+          ov_options[key] = value;
+        }
       } else {
-        ORT_THROW("[ERROR] [OpenVINO] wrong key type entered. Choose from the following runtime key options that are available for OpenVINO. ['device_type', 'device_id', 'enable_vpu_fast_compile', 'num_of_threads', 'cache_dir', 'enable_opencl_throttling|true'] \n");
+        ORT_THROW("[ERROR] [OpenVINO] wrong key type entered. Choose from the following runtime key options that are available for OpenVINO. ['device_type', 'device_id', 'enable_vpu_fast_compile', 'num_of_threads', 'cache_dir', 'num_streams', 'enable_opencl_throttling|true'] \n");
       }
     }
-    OrtOpenVINOProviderOptions options;
-    options.device_type = device_type.c_str();                    // To set the device_type
-    options.device_id = device_id.c_str();                        // To set the device_id
-    options.enable_vpu_fast_compile = enable_vpu_fast_compile;    // To enable_vpu_fast_compile, default is false
-    options.num_of_threads = num_of_threads;                      // To set number of free InferRequests, default is 8
-    options.cache_dir = cache_dir.c_str();                        // sets the cache_dir, default is ""
-    options.enable_opencl_throttling = enable_opencl_throttling;  // Enables GPU Throttling (Reduces CPU Utilization)
-    options.enable_dynamic_shapes = enable_dynamic_shapes;        // Enables Dynamic Shapes feature
-    session_options.AppendExecutionProvider_OpenVINO(options);
+    session_options.AppendExecutionProvider("OpenVINO", ov_options);
 #else
     ORT_THROW("OpenVINO is not supported in this build\n");
 #endif
@@ -579,7 +562,7 @@ OnnxRuntimeTestSession::OnnxRuntimeTestSession(Ort::Env& env, std::random_device
           ORT_THROW("Supported htp_performance_mode: " + str);
         }
       } else {
-        ORT_THROW(R"(Wrong key type entered. Choose from options: ['backend_path', 'qnn_context_cache_enable', 
+        ORT_THROW(R"(Wrong key type entered. Choose from options: ['backend_path', 'qnn_context_cache_enable',
 'qnn_context_cache_path', 'profiling_level', 'rpc_control_latency', 'htp_performance_mode'])");
       }
 
diff --git a/onnxruntime/test/providers/cpu/math/einsum_test.cc b/onnxruntime/test/providers/cpu/math/einsum_test.cc
index b7758fd2fd..05b936a41e 100644
--- a/onnxruntime/test/providers/cpu/math/einsum_test.cc
+++ b/onnxruntime/test/providers/cpu/math/einsum_test.cc
@@ -22,7 +22,7 @@ TEST(Einsum, ExplicitEinsumAsIdentity_1D_input) {
   test.AddAttribute<std::string>("equation", "i->i");
   test.AddInput<float>("x", {5}, {0.9f, 2.5f, 2.3f, 1.5f, -4.5f});
   test.AddOutput<float>("y", {5}, {0.9f, 2.5f, 2.3f, 1.5f, -4.5f});
-  test.Run();
+  test.Run(OpTester::ExpectResult::kExpectSuccess, "", {kOpenVINOExecutionProvider});
 }
 
 // Implicit
@@ -31,7 +31,7 @@ TEST(Einsum, ImplicitEinsumAsIdentity_1D_input) {
   test.AddAttribute<std::string>("equation", "i");
   test.AddInput<float>("x", {5}, {0.9f, 2.5f, 2.3f, 1.5f, -4.5f});
   test.AddOutput<float>("y", {5}, {0.9f, 2.5f, 2.3f, 1.5f, -4.5f});
-  test.Run();
+  test.Run(OpTester::ExpectResult::kExpectSuccess, "", {kOpenVINOExecutionProvider});
 }
 
 // Theme: Transpose/Permutation
diff --git a/onnxruntime/test/util/default_providers.cc b/onnxruntime/test/util/default_providers.cc
index aea59e0667..bc85ae03d0 100644
--- a/onnxruntime/test/util/default_providers.cc
+++ b/onnxruntime/test/util/default_providers.cc
@@ -86,10 +86,19 @@ std::unique_ptr<IExecutionProvider> MIGraphXExecutionProviderWithOptions(const O
   return nullptr;
 }
 
+std::unique_ptr<IExecutionProvider> OpenVINOExecutionProviderWithOptions(const OrtOpenVINOProviderOptions* params) {
+#ifdef USE_OPENVINO
+  return OpenVINOProviderFactoryCreator::Create(params)->CreateProvider();
+#else
+  ORT_UNUSED_PARAMETER(params);
+#endif
+  return nullptr;
+}
+
 std::unique_ptr<IExecutionProvider> DefaultOpenVINOExecutionProvider() {
 #ifdef USE_OPENVINO
-  OrtOpenVINOProviderOptions params;
-  return OpenVINOProviderFactoryCreator::Create(&params)->CreateProvider();
+  ProviderOptions provider_options_map;
+  return OpenVINOProviderFactoryCreator::Create(&provider_options_map)->CreateProvider();
 #else
   return nullptr;
 #endif
diff --git a/onnxruntime/test/util/include/default_providers.h b/onnxruntime/test/util/include/default_providers.h
index d6c9339af0..1325f7aa43 100644
--- a/onnxruntime/test/util/include/default_providers.h
+++ b/onnxruntime/test/util/include/default_providers.h
@@ -18,9 +18,6 @@ std::shared_ptr<IExecutionProviderFactory> CreateExecutionProviderFactory_MIGrap
 std::shared_ptr<IExecutionProviderFactory> CreateExecutionProviderFactory_Nnapi(
     uint32_t flags, const optional<std::string>& partitioning_stop_ops_list);
 // std::shared_ptr<IExecutionProviderFactory> CreateExecutionProviderFactory_Tvm(const char*);
-std::shared_ptr<IExecutionProviderFactory> CreateExecutionProviderFactory_OpenVINO(
-    const char* device_type, bool enable_vpu_fast_compile, const char* device_id, size_t num_of_threads, const char* cache_dir);
-std::shared_ptr<IExecutionProviderFactory> CreateExecutionProviderFactory_OpenVINO(const OrtOpenVINOProviderOptions* params);
 std::shared_ptr<IExecutionProviderFactory> CreateExecutionProviderFactory_Rknpu();
 std::shared_ptr<IExecutionProviderFactory> CreateExecutionProviderFactory_Rocm(const OrtROCMProviderOptions* provider_options);
 std::shared_ptr<IExecutionProviderFactory> CreateExecutionProviderFactory_Tensorrt(const OrtTensorRTProviderOptions* params);
@@ -45,6 +42,7 @@ std::unique_ptr<IExecutionProvider> TensorrtExecutionProviderWithOptions(const O
 std::unique_ptr<IExecutionProvider> TensorrtExecutionProviderWithOptions(const OrtTensorRTProviderOptionsV2* params);
 std::unique_ptr<IExecutionProvider> DefaultMIGraphXExecutionProvider();
 std::unique_ptr<IExecutionProvider> MIGraphXExecutionProviderWithOptions(const OrtMIGraphXProviderOptions* params);
+std::unique_ptr<IExecutionProvider> OpenVINOExecutionProviderWithOptions(const OrtOpenVINOProviderOptions* params);
 std::unique_ptr<IExecutionProvider> DefaultOpenVINOExecutionProvider();
 std::unique_ptr<IExecutionProvider> DefaultNnapiExecutionProvider();
 std::unique_ptr<IExecutionProvider> DefaultRknpuExecutionProvider();