Openvino ep ort 23.1 (#17911)

### Description Integration to OpenVINO 2023.1 ### Motivation and Context - Alignment with latest OpenVINO Version. - Device name change from VPUX to NPU and Remove from supported list until official public support is available. --------- Co-authored-by: Sahar Fatima <sfatima.3001@gmail.com> Co-authored-by: Saurabh Kale <saurabh1.kale@intel.com> Co-authored-by: Suryaprakash Shanmugam <suryaprakash.shanmugam@intel.com> Co-authored-by: sfatimar <sahar.fatima@intel.com>
2026-06-06 00:03:22 +00:00 · 2023-11-01 08:39:39 -07:00 · 2023-11-01 08:39:39 -07:00 · d87216bcb1
commit d87216bcb1
parent 69f029797d
35 changed files with 563 additions and 357 deletions
--- a/cmake/CMakeLists.txt
+++ b/cmake/CMakeLists.txt
@ -1282,14 +1282,6 @@ if (onnxruntime_USE_OPENVINO)
    add_definitions(-DOPENVINO_CONFIG_CPU_FP16=1)
  endif()

-  if (onnxruntime_USE_OPENVINO_VPUX_FP16)
-    add_definitions(-DOPENVINO_CONFIG_VPUX_FP16=1)
-  endif()
-
-  if (onnxruntime_USE_OPENVINO_VPUX_U8)
-    add_definitions(-DOPENVINO_CONFIG_VPUX_U8=1)
-  endif()
-
  if (onnxruntime_USE_OPENVINO_GPU_FP32_NP)
    add_definitions(-DOPENVINO_CONFIG_GPU_FP32=1)
    add_definitions(-DOPENVINO_DISABLE_GRAPH_PARTITION=1)
@ -1310,16 +1302,6 @@ if (onnxruntime_USE_OPENVINO)
    add_definitions(-DOPENVINO_DISABLE_GRAPH_PARTITION=1)
  endif()

-  if (onnxruntime_USE_OPENVINO_VPUX_FP32_NP)
-    add_definitions(-DOPENVINO_CONFIG_VPUX_FP32=1)
-    add_definitions(-DOPENVINO_DISABLE_GRAPH_PARTITION=1)
-  endif()
-
-  if (onnxruntime_USE_OPENVINO_VPUX_FP16_NP)
-    add_definitions(-DOPENVINO_CONFIG_VPUX_FP16=1)
-    add_definitions(-DOPENVINO_DISABLE_GRAPH_PARTITION=1)
-  endif()
-
  if (onnxruntime_USE_OPENVINO_HETERO)
    add_definitions(-DOPENVINO_CONFIG_HETERO=1)
    add_definitions(-DDEVICE_NAME="${onnxruntime_USE_OPENVINO_DEVICE}")
--- a/docs/python/ReadMeOV.rst
+++ b/docs/python/ReadMeOV.rst
@ -7,7 +7,6 @@ OpenVINO™ Execution Provider for ONNX Runtime accelerates inference across man
 - Intel® CPUs
 - Intel® integrated GPUs
 - Intel® discrete GPUs
- - Intel® integrated VPUs

 Installation
 ------------
@ -22,7 +21,6 @@ This package supports:
 - Intel® CPUs
 - Intel® integrated GPUs
 - Intel® discrete GPUs
- - Intel® integrated VPUs

 ``pip3 install onnxruntime-openvino``

--- a/include/onnxruntime/core/session/onnxruntime_c_api.h
+++ b/include/onnxruntime/core/session/onnxruntime_c_api.h
@ -611,7 +611,7 @@ typedef struct OrtMIGraphXProviderOptions {
 typedef struct OrtOpenVINOProviderOptions {
 #ifdef __cplusplus
  OrtOpenVINOProviderOptions() : device_type{},
-                                 enable_vpu_fast_compile{},
+                                 enable_npu_fast_compile{},
                                 device_id{},
                                 num_of_threads{},
                                 cache_dir{},
@ -624,7 +624,7 @@ typedef struct OrtOpenVINOProviderOptions {
   * Valid settings are one of: "CPU_FP32", "CPU_FP16", "GPU_FP32", "GPU_FP16"
   */
  const char* device_type;
-  unsigned char enable_vpu_fast_compile;  ///< 0 = disabled, nonzero = enabled
+  unsigned char enable_npu_fast_compile;  ///< 0 = disabled, nonzero = enabled
  const char* device_id;
  size_t num_of_threads;  ///< 0 = Use default number of threads
  const char* cache_dir;  // path is set to empty by default
--- a/onnxruntime/core/providers/openvino/backend_manager.cc
+++ b/onnxruntime/core/providers/openvino/backend_manager.cc
@ -2,9 +2,7 @@
 // Licensed under the MIT License

 #include <fstream>
-#include <vector>
-#include <string>
-#include <memory>
+#include <utility>

 #include "core/providers/shared_library/provider_api.h"
 #include "contexts.h"
@ -18,7 +16,8 @@ namespace openvino_ep {
 static std::unique_ptr<GlobalContext> g_global_context;

 GlobalContext& BackendManager::GetGlobalContext() {
-  // This is not thread safe to call for the first time, but it is first called on the main thread by the constructor so it is safe.
+  // This is not thread safe to call for the first time,
+  // but it is first called on the main thread by the constructor so it is safe.
  if (!g_global_context)
    g_global_context = std::make_unique<GlobalContext>();
  return *g_global_context;
@ -88,7 +87,9 @@ BackendManager::BackendManager(const onnxruntime::Node& fused_node,
                         << "Backend created for graph " << subgraph_context_.subgraph_name;
    }
  } else {
-    LOGS_DEFAULT(INFO) << "[OpenVINO-EP] Model has concrete input dims. Initializing backend for graph " << subgraph_context_.subgraph_name;
+    LOGS_DEFAULT(INFO) << "[OpenVINO-EP] Model has concrete input dims. "
+                       << "Initializing backend for graph "
+                       << subgraph_context_.subgraph_name;

    subgraph_context_.has_dynamic_input_shape = false;
    try {
@ -104,7 +105,7 @@ BackendManager::BackendManager(const onnxruntime::Node& fused_node,
 bool BackendManager::ModelHasBatchedInputs(const ONNX_NAMESPACE::ModelProto& model_proto) const {
  bool has_batched_inputs = true;

-  for (int i = 0; i < (int)subgraph_context_.input_indexes.size(); i++) {
+  for (int i = 0; i < static_cast<int>(subgraph_context_.input_indexes.size()); i++) {
    auto& input = model_proto.graph().input(subgraph_context_.input_indexes[i]);

    // Batch-process only raw image inputs (NCHW or NHWC layouts)
@ -215,7 +216,10 @@ BackendManager::ReWriteInputShapeInfo(const ONNX_NAMESPACE::ModelProto& model_pr
  auto graph_proto = model_copy->mutable_graph();

  for (size_t i = 0, limit = input_shapes.size(); i < limit; i++) {
-    auto g_in_shape = graph_proto->mutable_input((int)i)->mutable_type()->mutable_tensor_type()->mutable_shape();
+    auto g_in_shape = graph_proto->mutable_input(static_cast<int>(i))
+                          ->mutable_type()
+                          ->mutable_tensor_type()
+                          ->mutable_shape();
    g_in_shape->clear_dim();
    const auto& shape = input_shapes[i];
    for (size_t dim = 0, end = shape.size(); dim < end; dim++) {
@ -234,7 +238,11 @@ BackendManager::ReWriteBatchDimWithOne(const ONNX_NAMESPACE::ModelProto& model_p
  auto graph_proto = model_copy->mutable_graph();

  for (int i = 0; i < graph_proto->input_size(); i++) {
-    ONNX_NAMESPACE::TensorShapeProto* g_in_shape = graph_proto->mutable_input((int)i)->mutable_type()->mutable_tensor_type()->mutable_shape();
+    ONNX_NAMESPACE::TensorShapeProto* g_in_shape =
+        graph_proto->mutable_input(static_cast<int>(i))
+            ->mutable_type()
+            ->mutable_tensor_type()
+            ->mutable_shape();
    g_in_shape->mutable_dim(0)->clear_dim_value();
    g_in_shape->mutable_dim(0)->set_dim_value(1);
  }
--- a/onnxruntime/core/providers/openvino/backend_manager.h
+++ b/onnxruntime/core/providers/openvino/backend_manager.h
@ -3,6 +3,11 @@

 #pragma once

+#include <vector>
+#include <map>
+#include <memory>
+#include <string>
+
 #include "ov_interface.h"
 #include "contexts.h"
 #include "ibackend.h"
@ -13,7 +18,9 @@ namespace openvino_ep {
 // Singleton class that manages all the backends
 class BackendManager {
 public:
-  BackendManager(const onnxruntime::Node& fused_node, const onnxruntime::GraphViewer& subgraph, const logging::Logger& logger);
+  BackendManager(const onnxruntime::Node& fused_node,
+                 const onnxruntime::GraphViewer& subgraph,
+                 const logging::Logger& logger);
  void Compute(OrtKernelContext* context);
  void ShutdownBackendManager();
  static GlobalContext& GetGlobalContext();
@ -21,7 +28,9 @@ class BackendManager {

 private:
  std::unique_ptr<ONNX_NAMESPACE::ModelProto> GetModelProtoFromFusedNode(
-      const onnxruntime::Node& fused_node, const onnxruntime::GraphViewer& subgraph, const logging::Logger& logger) const;
+      const onnxruntime::Node& fused_node,
+      const onnxruntime::GraphViewer& subgraph,
+      const logging::Logger& logger) const;
  bool ModelHasSymbolicInputDims(const onnxruntime::GraphViewer& subgraph) const;
  bool ModelHasBatchedInputs(const ONNX_NAMESPACE::ModelProto& model_proto) const;

--- a/onnxruntime/core/providers/openvino/backend_utils.cc
+++ b/onnxruntime/core/providers/openvino/backend_utils.cc
@ -1,9 +1,7 @@
 // Copyright (C) 2019-2022 Intel Corporation
 // Licensed under the MIT License

-#include <map>
-#include <string>
-#include <memory>
+#include <algorithm>
 #include <sstream>
 #include <fstream>

@ -58,7 +56,7 @@ CreateOVModel(const ONNX_NAMESPACE::ModelProto& model_proto, const GlobalContext
  try {
    auto cnn_network = global_context.ie_core.ReadModel(model);
    if ((subgraph_context.precision == "FP16") &&
-        (global_context.device_type.find("VPUX") == std::string::npos)) {
+        (global_context.device_type.find("NPU") == std::string::npos)) {
      // FP16 transformations
      ov::pass::ConvertFP32ToFP16 pass_obj;
      pass_obj.run_on_model(cnn_network);
@ -88,7 +86,8 @@ CreateOVModel(const ONNX_NAMESPACE::ModelProto& model_proto, const GlobalContext
      size_t index = results.size() - 1;

      for (auto it = results.rbegin(); it != results.rend(); ++it) {
-        if (auto const_node = std::dynamic_pointer_cast<ov::op::v0::Constant>((*it)->input_value(0).get_node_shared_ptr())) {
+        if (auto const_node =
+                std::dynamic_pointer_cast<ov::op::v0::Constant>((*it)->input_value(0).get_node_shared_ptr())) {
          const_outputs_map[(*it)->get_friendly_name()] = const_node;
          results.erase(results.begin() + index);
        }
@ -254,7 +253,7 @@ void FillOutputBlob(OVTensorPtr outputBlob, Ort::UnownedValue& output_tensor,

 void printPerformanceCounts(const std::vector<OVProfilingInfo>& performanceMap,
                            std::ostream& stream, std::string deviceName) {
-  long long totalTime = 0;
+  int64_t totalTime = 0;
  // Print performance counts
  stream << std::endl
         << "performance counts:" << std::endl
--- a/onnxruntime/core/providers/openvino/backend_utils.h
+++ b/onnxruntime/core/providers/openvino/backend_utils.h
@ -4,9 +4,15 @@
 #pragma once

 #define ORT_API_MANUAL_INIT
+#include <iomanip>
+#include <unordered_map>
+#include <map>
+#include <memory>
+#include <vector>
+#include <string>
+
 #include "core/session/onnxruntime_cxx_api.h"
 #include "contexts.h"
-#include <iomanip>
 #include "ov_interface.h"
 #ifdef _WIN32
 #include <direct.h>
@ -57,7 +63,9 @@ void FillOutputBlob(OVTensorPtr outputBlob, Ort::UnownedValue& output_tensor,
                    size_t batch_slice_idx);

 std::shared_ptr<OVNetwork>
-CreateOVModel(const ONNX_NAMESPACE::ModelProto& model_proto, const GlobalContext& global_context, const SubGraphContext& subgraph_context,
+CreateOVModel(const ONNX_NAMESPACE::ModelProto& model_proto,
+              const GlobalContext& global_context,
+              const SubGraphContext& subgraph_context,
              std::map<std::string, std::shared_ptr<ov::Node>>& const_outputs_map);

 void printPerformanceCounts(const std::vector<OVProfilingInfo>& performanceMap,
--- a/onnxruntime/core/providers/openvino/backends/backend_factory.cc
+++ b/onnxruntime/core/providers/openvino/backends/backend_factory.cc
@ -16,7 +16,7 @@ BackendFactory::MakeBackend(const ONNX_NAMESPACE::ModelProto& model_proto,
                            const SubGraphContext& subgraph_context) {
  std::string type = global_context.device_type;
  if (type == "CPU" || type.find("GPU") != std::string::npos ||
-      type.find("VPUX") != std::string::npos ||
+      type.find("NPU") != std::string::npos ||
      type.find("HETERO") != std::string::npos ||
      type.find("MULTI") != std::string::npos ||
      type.find("AUTO") != std::string::npos) {
--- a/onnxruntime/core/providers/openvino/backends/basic_backend.cc
+++ b/onnxruntime/core/providers/openvino/backends/basic_backend.cc
@ -6,10 +6,10 @@
 #include <memory>
 #include <sstream>
 #include <fstream>
+#include <utility>

 #include "core/providers/shared_library/provider_api.h"
 #include "../backend_utils.h"
-// #include <ngraph/pass/constant_folding.hpp>
 #include "basic_backend.h"
 #include "../backend_manager.h"

@ -57,33 +57,39 @@ BasicBackend::BasicBackend(const ONNX_NAMESPACE::ModelProto& model_proto,
        cl_context ctx = static_cast<cl_context>(global_context_.context);
        remote_context_ = new ov::intel_gpu::ocl::ClContext(global_context_.ie_core.Get(), ctx);
        ie_cnn_network_ = CreateOVModel(model_proto, global_context_, subgraph_context_, const_outputs_map_);
-        exe_network_ = global_context_.ie_core.LoadNetwork(ie_cnn_network_, remote_context_, subgraph_context_.subgraph_name);
+        exe_network_ = global_context_.ie_core.LoadNetwork(
+            ie_cnn_network_, remote_context_, subgraph_context_.subgraph_name);
        LOGS_DEFAULT(INFO) << log_tag << "Loaded model to the plugin";
      } else {
        ie_cnn_network_ = CreateOVModel(model_proto, global_context_, subgraph_context_, const_outputs_map_);
-        exe_network_ = global_context_.ie_core.LoadNetwork(ie_cnn_network_, hw_target, device_config, subgraph_context_.subgraph_name);
+        exe_network_ = global_context_.ie_core.LoadNetwork(
+            ie_cnn_network_, hw_target, device_config, subgraph_context_.subgraph_name);
        LOGS_DEFAULT(INFO) << log_tag << "Loaded model to the plugin";
      }
 #else
 #if defined(OPENVINO_2023_0) || (OPENVINO_2023_1)
      if (!subgraph_context_.has_dynamic_input_shape && dev_prec != "CPU_FP16") {
        const std::string model = model_proto.SerializeAsString();
-        exe_network_ = global_context_.ie_core.LoadNetwork(model, hw_target, device_config, subgraph_context_.subgraph_name);
+        exe_network_ = global_context_.ie_core.LoadNetwork(
+            model, hw_target, device_config, subgraph_context_.subgraph_name);
        LOGS_DEFAULT(INFO) << log_tag << "Loaded model to the plugin";
      } else {
        ie_cnn_network_ = CreateOVModel(model_proto, global_context_, subgraph_context_, const_outputs_map_);
-        exe_network_ = global_context_.ie_core.LoadNetwork(ie_cnn_network_, hw_target, device_config, subgraph_context_.subgraph_name);
+        exe_network_ = global_context_.ie_core.LoadNetwork(
+            ie_cnn_network_, hw_target, device_config, subgraph_context_.subgraph_name);
        LOGS_DEFAULT(INFO) << log_tag << "Loaded model to the plugin";
      }
 #else
      ie_cnn_network_ = CreateOVModel(model_proto, global_context_, subgraph_context_, const_outputs_map_);
-      exe_network_ = global_context_.ie_core.LoadNetwork(ie_cnn_network_, hw_target, device_config, subgraph_context_.subgraph_name);
+      exe_network_ = global_context_.ie_core.LoadNetwork(
+          ie_cnn_network_, hw_target, device_config, subgraph_context_.subgraph_name);
      LOGS_DEFAULT(INFO) << log_tag << "Loaded model to the plugin";
 #endif
 #endif
    } else {
      ie_cnn_network_ = CreateOVModel(model_proto, global_context_, subgraph_context_, const_outputs_map_);
-      exe_network_ = global_context_.ie_core.LoadNetwork(ie_cnn_network_, hw_target, device_config, subgraph_context_.subgraph_name);
+      exe_network_ = global_context_.ie_core.LoadNetwork(
+          ie_cnn_network_, hw_target, device_config, subgraph_context_.subgraph_name);
      LOGS_DEFAULT(INFO) << log_tag << "Loaded model to the plugin";
    }
  } catch (const char* msg) {
@ -127,10 +133,10 @@ void BasicBackend::PopulateConfigValue(ov::AnyMap& device_config) {
  }
 #endif
 #if defined(OPENVINO_2023_0) || (OPENVINO_2023_1)
-  if (global_context_.device_type.find("VPUX") != std::string::npos) {
+  if (global_context_.device_type.find("NPU") != std::string::npos) {
    std::pair<std::string, ov::Any> device_property;
-    device_property = std::make_pair("VPU_COMPILER_TYPE", "MLIR");
-    device_config.emplace(ov::device::properties("VPUX", device_property));
+    device_property = std::make_pair("NPU_COMPILER_TYPE", "DRIVER");
+    device_config.emplace(ov::device::properties("NPU", device_property));
  }
 #endif
 }
@ -152,12 +158,12 @@ void BasicBackend::EnableCaching() {
 }

 void BasicBackend::EnableGPUThrottling(ov::AnyMap& device_config) {
-  if (global_context_.enable_opencl_throttling == true && global_context_.device_type.find("GPU") != std::string::npos) {
+  if (global_context_.enable_opencl_throttling == true &&
+      global_context_.device_type.find("GPU") != std::string::npos) {
    LOGS_DEFAULT(INFO) << log_tag << "Enabled OpenCL queue throttling for GPU device";
    std::pair<std::string, ov::Any> device_property;
    device_property = std::make_pair("PLUGIN_THROTTLE", "1");
    device_config.emplace(ov::device::properties("GPU_CONFIG_KEY", device_property));
-    // device_config[GPU_CONFIG_KEY(PLUGIN_THROTTLE)] = "1";
  }
 }

@ -187,7 +193,9 @@ void BasicBackend::StartAsyncInference(Ort::KernelContext& context, OVInferReque
      if (input_names.find(onnx_input_name) != input_names.end()) {
        input_name = onnx_input_name;
      } else {
-        throw(log_tag + "Input names mismatch between OpenVINO and ONNX. " + onnx_input_name + " doesn't exist in the list of OpenVINO input tensor names");
+        throw(log_tag +
+              "Input names mismatch between OpenVINO and ONNX. " + onnx_input_name +
+              " doesn't exist in the list of OpenVINO input tensor names");
      }
      size_t batch_slice_idx = 0;
      if (subgraph_context_.has_dynamic_input_shape &&
@ -197,6 +205,7 @@ void BasicBackend::StartAsyncInference(Ort::KernelContext& context, OVInferReque
        auto tensor_info = tensor.GetTensorTypeAndShapeInfo();
        auto tensor_shape = tensor_info.GetShape();
        auto tensor_size = tensor_shape.size();
+        const char* tensor_data = tensor.GetTensorData<char>();
        auto tensor_iter = 0;
        ov::Shape input_tensor_shape = ov::Shape(tensor_size, 0);
        for (auto i = tensor_shape.begin(); i != tensor_shape.end(); ++i) {
@ -204,8 +213,16 @@ void BasicBackend::StartAsyncInference(Ort::KernelContext& context, OVInferReque
          tensor_iter += 1;
        }
        auto input = ie_cnn_network_->get_parameters().at(input_idx);
-        OVTensorPtr tensor_ptr = std::make_shared<ov::Tensor>(input->get_element_type(), input_tensor_shape);
-        FillInputBlob(tensor_ptr, batch_slice_idx, input_name, context, subgraph_context_);
+        OVTensorPtr tensor_ptr;
+        // avoid input copies on the CPU device
+        if (global_context_.device_type.find("CPU") != std::string::npos) {
+          tensor_ptr = std::make_shared<ov::Tensor>(input->get_element_type(), input_tensor_shape,
+                                                    (void*)tensor_data);
+        } else {
+          tensor_ptr = std::make_shared<ov::Tensor>(input->get_element_type(), input_tensor_shape);
+          FillInputBlob(tensor_ptr, batch_slice_idx, input_name, context, subgraph_context_);
+        }
+
        try {
          infer_request->SetTensor(input_name, tensor_ptr);
        } catch (const char* msg) {
@ -251,7 +268,10 @@ void BasicBackend::StartRemoteAsyncInference(Ort::KernelContext& context, OVInfe
      if (input_names.find(onnx_input_name) != input_names.end()) {
        input_name = onnx_input_name;
      } else {
-        throw(log_tag + "Input names mismatch between OpenVINO and ONNX. " + onnx_input_name + " doesn't exist in the list of OpenVINO input tensor names");
+        throw(log_tag +
+              "Input names mismatch between OpenVINO and ONNX. " +
+              onnx_input_name +
+              " doesn't exist in the list of OpenVINO input tensor names");
      }
      input_idx++;
      // Kernel Context Input Buffer
@ -264,9 +284,10 @@ void BasicBackend::StartRemoteAsyncInference(Ort::KernelContext& context, OVInfe
        const cl::Buffer* shared_buffer_const = static_cast<const cl::Buffer*>(tensor_data);
        // Create an Input Remote Blob
        auto input = ie_cnn_network_->get_parameters().at(0);
-        auto remote_blob = remote_context_->create_tensor(input->get_element_type(), input->get_shape(), *shared_buffer_const);
-        ov::Tensor tensor = static_cast<ov::Tensor>(remote_blob);
-        OVTensorPtr tensor_ptr = std::make_shared<ov::Tensor>(tensor);
+        auto remote_blob = remote_context_->create_tensor(
+            input->get_element_type(), input->get_shape(), *shared_buffer_const);
+        ov::Tensor tensor_remote = static_cast<ov::Tensor>(remote_blob);
+        OVTensorPtr tensor_ptr = std::make_shared<ov::Tensor>(tensor_remote);
        infer_request->SetTensor(input_name, tensor_ptr);
      } else {
        OVTensorPtr graph_input_blob;
@ -295,7 +316,10 @@ void BasicBackend::StartRemoteAsyncInference(Ort::KernelContext& context, OVInfe
        }
      }
      if (!output_name_found) {
-        throw std::string(log_tag + "Output names mismatch between OpenVINO and ONNX. [ONNX Output: ] " + onnx_output_name + " doesn't exist in the list of OpenVINO output tensor names");
+        throw std::string(
+            log_tag +
+            "Output names mismatch between OpenVINO and ONNX. [ONNX Output: ] " +
+            onnx_output_name + " doesn't exist in the list of OpenVINO output tensor names");
      }

      size_t batch_size = 1;
@ -307,9 +331,10 @@ void BasicBackend::StartRemoteAsyncInference(Ort::KernelContext& context, OVInfe
        const cl::Buffer* shared_buffer_const = static_cast<const cl::Buffer*>(tensor_data);
        // Create a shared Blob, set the Infer Request Output Blob
        auto output = ie_cnn_network_->get_results().at(0);
-        auto remote_tensor = remote_context_->create_tensor(output->get_element_type(), output->get_shape(), *shared_buffer_const);
-        ov::Tensor tensor = static_cast<ov::Tensor>(remote_tensor);
-        OVTensorPtr tensor_ptr = std::make_shared<ov::Tensor>(tensor);
+        auto remote_tensor =
+            remote_context_->create_tensor(output->get_element_type(), output->get_shape(), *shared_buffer_const);
+        ov::Tensor tensor_t = static_cast<ov::Tensor>(remote_tensor);
+        OVTensorPtr tensor_ptr = std::make_shared<ov::Tensor>(tensor_t);
        try {
          infer_request->SetTensor(output_name, tensor_ptr);
        } catch (const char* msg) {
@ -364,7 +389,8 @@ void BasicBackend::CompleteAsyncInference(Ort::KernelContext& context, OVInferRe
        throw(msg);
      }
      size_t batch_size = 1;
-      auto output_tensor = GetOutputTensor(context, batch_size, infer_request, output_name, subgraph_context_.output_names);
+      auto output_tensor =
+          GetOutputTensor(context, batch_size, infer_request, output_name, subgraph_context_.output_names);
      auto mem_info = output_tensor.GetTensorMemoryInfo();
      if (mem_info.GetAllocatorName() == OpenVINO_GPU) {
        return;
@ -465,7 +491,8 @@ void BasicBackend::Infer(OrtKernelContext* ctx) {
 #ifndef IO_BUFFER_ENABLED  // Printing performance counts is disabled when IO_BUFFER_ENABLED
    if (openvino_ep::backend_utils::IsDebugEnabled()) {
      inferRequestsQueue_->printstatus();  // Printing the elements of infer_requests_ vector pool only in debug mode
-      std::string& hw_target = (global_context_.device_id != "") ? global_context_.device_id : global_context_.device_type;
+      std::string& hw_target =
+          (global_context_.device_id != "") ? global_context_.device_id : global_context_.device_type;
      printPerformanceCounts(infer_request, std::cout, hw_target);
    }
 #endif
--- a/onnxruntime/core/providers/openvino/backends/basic_backend.h
+++ b/onnxruntime/core/providers/openvino/backends/basic_backend.h
@ -6,16 +6,17 @@
 #include <memory>

 #define ORT_API_MANUAL_INIT
-#include "core/session/onnxruntime_cxx_api.h"
-#include "core/providers/openvino/contexts.h"
-#include "core/providers/openvino/ibackend.h"
-#include "core/providers/openvino/ov_interface.h"
 #include <vector>
 #include <iostream>
 #include <string>
 #include <condition_variable>
 #include <mutex>

+#include "core/session/onnxruntime_cxx_api.h"
+#include "core/providers/openvino/contexts.h"
+#include "core/providers/openvino/ibackend.h"
+#include "core/providers/openvino/ov_interface.h"
+
 namespace onnxruntime {
 namespace openvino_ep {

@ -29,7 +30,7 @@ class BasicBackend : public IBackend {
  void Infer(OrtKernelContext* context) override;

 private:
-  bool ImportBlob(std::string hw_target, bool vpu_status);
+  bool ImportBlob(std::string hw_target, bool npu_status);
  void PopulateCompiledDirectory(std::string, std::string&, std::string&, bool&);
  bool ValidateSubgraph(std::map<std::string, std::shared_ptr<ov::Node>>& const_outputs_map);
  void PopulateConfigValue(ov::AnyMap& device_config);
--- a/onnxruntime/core/providers/openvino/contexts.h
+++ b/onnxruntime/core/providers/openvino/contexts.h
@ -3,6 +3,9 @@

 #pragma once

+#include <vector>
+#include <unordered_map>
+#include <string>
 #include "ov_interface.h"

 namespace onnxruntime {
@ -12,7 +15,7 @@ namespace openvino_ep {
 struct GlobalContext {
  OVCore ie_core;
  bool is_wholly_supported_graph = false;
-  bool enable_vpu_fast_compile = false;
+  bool enable_npu_fast_compile = false;
  bool enable_opencl_throttling = false;
  bool enable_dynamic_shapes = false;
  size_t num_of_threads;
@ -34,7 +37,7 @@ struct GlobalContext {
 struct SubGraphContext {
  bool has_dynamic_input_shape = false;
  bool enable_batching = false;
-  bool set_vpu_config = false;
+  bool set_npu_config = false;
  bool is_constant = false;
  void* context = 0;
  std::string subgraph_name;
--- a/onnxruntime/core/providers/openvino/openvino_execution_provider.cc
+++ b/onnxruntime/core/providers/openvino/openvino_execution_provider.cc
@ -17,17 +17,18 @@ OpenVINOExecutionProvider::OpenVINOExecutionProvider(const OpenVINOExecutionProv

  openvino_ep::BackendManager::GetGlobalContext().device_type = info.device_type_;
  openvino_ep::BackendManager::GetGlobalContext().precision_str = info.precision_;
-  openvino_ep::BackendManager::GetGlobalContext().enable_vpu_fast_compile = info.enable_vpu_fast_compile_;
+  openvino_ep::BackendManager::GetGlobalContext().enable_npu_fast_compile = info.enable_npu_fast_compile_;
  openvino_ep::BackendManager::GetGlobalContext().cache_dir = info.cache_dir_;
  openvino_ep::BackendManager::GetGlobalContext().num_streams = info.num_streams_;
  openvino_ep::BackendManager::GetGlobalContext().context = info.context_;
  openvino_ep::BackendManager::GetGlobalContext().enable_opencl_throttling = info.enable_opencl_throttling_;
  openvino_ep::BackendManager::GetGlobalContext().enable_dynamic_shapes = info.enable_dynamic_shapes_;

-  if ((int)info.num_of_threads_ <= 0) {
+  if (static_cast<int>(info.num_of_threads_) <= 0) {
    openvino_ep::BackendManager::GetGlobalContext().num_of_threads = 8;
-  } else if ((int)info.num_of_threads_ > 8) {
-    std::string err_msg = std::string("\n [ERROR] num_of_threads configured during runtime is: ") + std::to_string(info.num_of_threads_) + "\nnum_of_threads configured should be >0 and <=8.\n";
+  } else if (static_cast<int>(info.num_of_threads_) > 8) {
+    std::string err_msg = std::string("\n [ERROR] num_of_threads configured during runtime is: ") +
+                          std::to_string(info.num_of_threads_) + "\nnum_of_threads configured should be >0 and <=8.\n";
    ORT_THROW(err_msg);
  } else {
    openvino_ep::BackendManager::GetGlobalContext().num_of_threads = info.num_of_threads_;
@ -56,7 +57,8 @@ OpenVINOExecutionProvider::OpenVINOExecutionProvider(const OpenVINOExecutionProv
              device_found = true;
              break;
            }
-            if (info.device_type_.find("VPUX") != std::string::npos && (info.precision_ == "FP16" || info.precision_ == "U8")) {
+            if ((info.device_type_.find("NPU") != std::string::npos) &&
+                (info.precision_ == "FP16" || info.precision_ == "U8")) {
              device_found = true;
              break;
            }
@ -109,11 +111,14 @@ OpenVINOExecutionProvider::GetCapability(const GraphViewer& graph_viewer,
  openvino_ep::BackendManager::GetGlobalContext().onnx_model_name = graph_viewer.Name();
 #ifdef _WIN32
  std::wstring onnx_path = graph_viewer.ModelPath().ToPathString();
-  openvino_ep::BackendManager::GetGlobalContext().onnx_model_path_name = std::string(onnx_path.begin(), onnx_path.end());
+  openvino_ep::BackendManager::GetGlobalContext().onnx_model_path_name =
+      std::string(onnx_path.begin(), onnx_path.end());
 #else
-  openvino_ep::BackendManager::GetGlobalContext().onnx_model_path_name = graph_viewer.ModelPath().ToPathString();
+  openvino_ep::BackendManager::GetGlobalContext().onnx_model_path_name =
+      graph_viewer.ModelPath().ToPathString();
 #endif
-  openvino_ep::BackendManager::GetGlobalContext().onnx_opset_version = graph_viewer.DomainToVersionMap().at(kOnnxDomain);
+  openvino_ep::BackendManager::GetGlobalContext().onnx_opset_version =
+      graph_viewer.DomainToVersionMap().at(kOnnxDomain);

 #if defined(OPENVINO_2022_1)
  openvino_ep::GetCapability obj(graph_viewer,
@ -151,7 +156,8 @@ common::Status OpenVINOExecutionProvider::Compile(

    openvino_ep::BackendManager::GetGlobalContext().use_api_2 = true;

-    std::shared_ptr<openvino_ep::BackendManager> backend_manager = std::make_shared<openvino_ep::BackendManager>(fused_node, graph_body_viewer, *GetLogger());
+    std::shared_ptr<openvino_ep::BackendManager> backend_manager =
+        std::make_shared<openvino_ep::BackendManager>(fused_node, graph_body_viewer, *GetLogger());

    compute_info.create_state_func =
        [backend_manager](ComputeContext* context, FunctionState* state) {
--- a/onnxruntime/core/providers/openvino/openvino_execution_provider.h
+++ b/onnxruntime/core/providers/openvino/openvino_execution_provider.h
@ -3,19 +3,28 @@

 #pragma once

-#include "backend_manager.h"
 #include <map>
 #include <algorithm>
 #include <iostream>
+#include <string>
+#include <memory>
+#include <vector>
+
+#include "backend_manager.h"

 namespace onnxruntime {

 static void print_build_options() {
  std::cout << "[ERROR] INVALID DEVICE BUILD TYPE SPECIFIED" << std::endl;
-  std::cout << "Specify the keyword HETERO (or) MULTI (or) AUTO followed by the devices in the order of priority you want to build" << std::endl;
-  std::cout << "The different hardware devices that can be added with HETERO/MULTI/AUTO build ";
-  std::cout << "are ['CPU','GPU','VPUX']" << std::endl;
-  std::cout << "An example of how to specify the HETERO or MULTI or AUTO build type. Ex: HETERO:GPU,CPU  Ex: MULTI:GPU,CPU Ex: AUTO:GPU,CPU" << std::endl;
+  std::cout << "Specify the keyword HETERO (or) MULTI (or) AUTO followed by the devices in the order of priority "
+            << "you want to build"
+            << std::endl;
+  std::cout << "The different hardware devices that can be added with HETERO/MULTI/AUTO build "
+            << "are ['CPU','GPU']"
+            << std::endl;
+  std::cout << "An example of how to specify the HETERO or MULTI or AUTO build type. "
+            << "Ex: HETERO:GPU,CPU  Ex: MULTI:GPU,CPU Ex: AUTO:GPU,CPU"
+            << std::endl;
 }

 static std::vector<std::string> split(const std::string& s, char delim) {
@ -39,7 +48,7 @@ static std::vector<std::string> parseDevices(const std::string& device_string) {
    print_build_options();
    ORT_THROW("Invalid device string: " + device_string);
  }
-  std::vector<std::string> dev_options = {"CPU", "GPU", "VPUX"};
+  std::vector<std::string> dev_options = {"CPU", "GPU"};
  for (std::string dev : devices) {
    if (!std::count(dev_options.begin(), dev_options.end(), dev)) {
      print_build_options();
@ -53,7 +62,7 @@ static std::vector<std::string> parseDevices(const std::string& device_string) {
 struct OpenVINOExecutionProviderInfo {
  std::string device_type_;
  std::string precision_;
-  bool enable_vpu_fast_compile_;
+  bool enable_npu_fast_compile_;
  std::string device_id_;
  size_t num_of_threads_;
  std::string cache_dir_;
@ -62,11 +71,18 @@ struct OpenVINOExecutionProviderInfo {
  bool enable_opencl_throttling_;
  bool enable_dynamic_shapes_;

-  explicit OpenVINOExecutionProviderInfo(std::string dev_type, bool enable_vpu_fast_compile, std::string dev_id,
+  explicit OpenVINOExecutionProviderInfo(std::string dev_type, bool enable_npu_fast_compile, std::string dev_id,
                                         size_t num_of_threads, std::string cache_dir, int num_streams,
                                         void* context, bool enable_opencl_throttling,
                                         bool enable_dynamic_shapes)
-      : enable_vpu_fast_compile_(enable_vpu_fast_compile), device_id_(dev_id), num_of_threads_(num_of_threads), cache_dir_(cache_dir), num_streams_(num_streams), context_(context), enable_opencl_throttling_(enable_opencl_throttling), enable_dynamic_shapes_(enable_dynamic_shapes) {
+      : enable_npu_fast_compile_(enable_npu_fast_compile),
+        device_id_(dev_id),
+        num_of_threads_(num_of_threads),
+        cache_dir_(cache_dir),
+        num_streams_(num_streams),
+        context_(context),
+        enable_opencl_throttling_(enable_opencl_throttling),
+        enable_dynamic_shapes_(enable_dynamic_shapes) {
    if (dev_type == "") {
      LOGS_DEFAULT(INFO) << "[OpenVINO-EP]"
                         << "No runtime device selection option provided.";
@ -82,11 +98,11 @@ struct OpenVINOExecutionProviderInfo {
 #elif defined OPENVINO_CONFIG_GPU_FP16
      device_type_ = "GPU";
      precision_ = "FP16";
-#elif defined OPENVINO_CONFIG_VPUX_FP16
-      device_type_ = "VPUX";
+#elif defined OPENVINO_CONFIG_NPU_FP16
+      device_type_ = "NPU";
      precision_ = "FP16";
-#elif defined OPENVINO_CONFIG_VPUX_U8
-      device_type_ = "VPUX";
+#elif defined OPENVINO_CONFIG_NPU_U8
+      device_type_ = "NPU";
      precision_ = "U8";
 #elif defined OPENVINO_CONFIG_HETERO || defined OPENVINO_CONFIG_MULTI || defined OPENVINO_CONFIG_AUTO
 #ifdef DEVICE_NAME
@ -126,11 +142,11 @@ struct OpenVINOExecutionProviderInfo {
    } else if (dev_type == "GPU.1_FP16") {
      device_type_ = "GPU.1";
      precision_ = "FP16";
-    } else if (dev_type == "VPUX_FP16") {
-      device_type_ = "VPUX";
+    } else if (dev_type == "NPU_FP16") {
+      device_type_ = "NPU";
      precision_ = "FP16";
-    } else if (dev_type == "VPUX_U8") {
-      device_type_ = "VPUX";
+    } else if (dev_type == "NPU_U8") {
+      device_type_ = "NPU";
      precision_ = "U8";
    } else if (dev_type.find("HETERO") == 0 || dev_type.find("MULTI") == 0) {
      std::vector<std::string> devices = parseDevices(dev_type);
--- a/onnxruntime/core/providers/openvino/openvino_provider_factory.cc
+++ b/onnxruntime/core/providers/openvino/openvino_provider_factory.cc
@ -8,11 +8,16 @@

 namespace onnxruntime {
 struct OpenVINOProviderFactory : IExecutionProviderFactory {
-  OpenVINOProviderFactory(const char* device_type, bool enable_vpu_fast_compile,
+  OpenVINOProviderFactory(const char* device_type, bool enable_npu_fast_compile,
                          const char* device_id, size_t num_of_threads,
                          const char* cache_dir, int num_streams, void* context,
                          bool enable_opencl_throttling, bool enable_dynamic_shapes)
-      : enable_vpu_fast_compile_(enable_vpu_fast_compile), num_of_threads_(num_of_threads), num_streams_(num_streams), context_(context), enable_opencl_throttling_(enable_opencl_throttling), enable_dynamic_shapes_(enable_dynamic_shapes) {
+      : enable_npu_fast_compile_(enable_npu_fast_compile),
+        num_of_threads_(num_of_threads),
+        num_streams_(num_streams),
+        context_(context),
+        enable_opencl_throttling_(enable_opencl_throttling),
+        enable_dynamic_shapes_(enable_dynamic_shapes) {
    device_type_ = (device_type == nullptr) ? "" : device_type;
    device_id_ = (device_id == nullptr) ? "" : device_id;
    cache_dir_ = (cache_dir == nullptr) ? "" : cache_dir;
@ -24,7 +29,7 @@ struct OpenVINOProviderFactory : IExecutionProviderFactory {

 private:
  std::string device_type_;
-  bool enable_vpu_fast_compile_;
+  bool enable_npu_fast_compile_;
  std::string device_id_;
  size_t num_of_threads_;
  std::string cache_dir_;
@ -35,7 +40,7 @@ struct OpenVINOProviderFactory : IExecutionProviderFactory {
 };

 std::unique_ptr<IExecutionProvider> OpenVINOProviderFactory::CreateProvider() {
-  OpenVINOExecutionProviderInfo info(device_type_, enable_vpu_fast_compile_, device_id_, num_of_threads_,
+  OpenVINOExecutionProviderInfo info(device_type_, enable_npu_fast_compile_, device_id_, num_of_threads_,
                                     cache_dir_, num_streams_, context_, enable_opencl_throttling_,
                                     enable_dynamic_shapes_);
  return std::make_unique<OpenVINOExecutionProvider>(info);
@ -59,17 +64,18 @@ struct OpenVINO_Provider : Provider {

    std::string device_type = "";           // [device_type]: Overrides the accelerator hardware type and precision
                                            //   with these values at runtime.
-    bool enable_vpu_fast_compile = false;   // [enable_vpu_fast_compile]: Fast-compile may be optionally enabled to
-                                            // speeds up the model's compilation to VPU device specific format.
+    bool enable_npu_fast_compile = false;   // [enable_npu_fast_compile]: Fast-compile may be optionally enabled to
+                                            // speeds up the model's compilation to NPU device specific format.
    const char* device_id = "";             // [device_id]: Selects a particular hardware device for inference.
-    size_t num_of_threads = 8;              // [num_of_threads]: Overrides the accelerator default value of number of
+    int num_of_threads = 8;                 // [num_of_threads]: Overrides the accelerator default value of number of
                                            //  threads with this value at runtime.
    const char* cache_dir = "";             // [cache_dir]: specify the path to
                                            // dump and load the blobs for the model caching/kernel caching (GPU)
                                            // feature. If blob files are already present, it will be directly loaded.
    int num_streams = 1;                    // [num_streams]: Option that specifies the number of parallel inference
                                            // requests to be processed on a given `device_type`. Overrides the
-                                            // accelerator default value of number of streams with this value at runtime.
+                                            // accelerator default value of number of streams
+                                            // with this value at runtime.
    bool enable_opencl_throttling = false;  // [enable_opencl_throttling]: Enables OpenCL queue throttling for GPU
                                            // device (Reduces CPU Utilization when using GPU)
    bool enable_dynamic_shapes = false;     // [enable_dynamic_shapes]: Enables Dynamic Shapes feature for CPU device)
@ -80,14 +86,15 @@ struct OpenVINO_Provider : Provider {

      std::set<std::string> ov_supported_device_types = {"CPU_FP32", "CPU_FP16", "GPU_FP32",
                                                         "GPU.0_FP32", "GPU.1_FP32", "GPU_FP16",
-                                                         "GPU.0_FP16", "GPU.1_FP16",
-                                                         "VPUX_FP16", "VPUX_U8"};
+                                                         "GPU.0_FP16", "GPU.1_FP16"};
      if (!((ov_supported_device_types.find(device_type) != ov_supported_device_types.end()) ||
-            (device_type.find("HETERO:") == 0) || (device_type.find("MULTI:") == 0) || (device_type.find("AUTO:") == 0))) {
+            (device_type.find("HETERO:") == 0) ||
+            (device_type.find("MULTI:") == 0) ||
+            (device_type.find("AUTO:") == 0))) {
        ORT_THROW(
            "[ERROR] [OpenVINO] You have selcted wrong configuration value for the key 'device_type'. "
            "Select from 'CPU_FP32', 'CPU_FP16', 'GPU_FP32', 'GPU.0_FP32', 'GPU.1_FP32', 'GPU_FP16', "
-            "'GPU.0_FP16', 'GPU.1_FP16', 'VPUX_FP16', 'VPUX_U8' or from"
+            "'GPU.0_FP16', 'GPU.1_FP16' or from"
            " HETERO/MULTI/AUTO options available. \n");
      }
    }
@ -97,30 +104,37 @@ struct OpenVINO_Provider : Provider {
    if (provider_options_map.find("cache_dir") != provider_options_map.end()) {
      cache_dir = provider_options_map.at("cache_dir").c_str();
    }
+
    if (provider_options_map.find("context") != provider_options_map.end()) {
-      context = (void*)provider_options_map.at("context").c_str();
+      std::string str = provider_options_map.at("context");
+      uint64_t number = std::strtoull(str.c_str(), nullptr, 16);
+      context = reinterpret_cast<void*>(number);
    }

    if (provider_options_map.find("num_of_threads") != provider_options_map.end()) {
      num_of_threads = std::stoi(provider_options_map.at("num_of_threads"));
      if (num_of_threads <= 0) {
        num_of_threads = 1;
+        LOGS_DEFAULT(WARNING) << "[OpenVINO-EP] The value for the key 'num_threads' should be in the positive range.\n "
+                              << "Executing with num_threads=1";
      }
    }

    if (provider_options_map.find("num_streams") != provider_options_map.end()) {
      num_streams = std::stoi(provider_options_map.at("num_streams"));
-      if (num_streams <= 0 && num_streams > 8) {
-        ORT_THROW("[ERROR] [OpenVINO] The value for the key 'num_streams' should be in the range of 1-8 \n");
+      if (num_streams <= 0) {
+        num_streams = 1;
+        LOGS_DEFAULT(WARNING) << "[OpenVINO-EP] The value for the key 'num_streams' should be in the range of 1-8.\n "
+                              << "Executing with num_streams=1";
      }
    }
    std::string bool_flag = "";
-    if (provider_options_map.find("enable_vpu_fast_compile") != provider_options_map.end()) {
-      bool_flag = provider_options_map.at("enable_vpu_fast_compile");
+    if (provider_options_map.find("enable_npu_fast_compile") != provider_options_map.end()) {
+      bool_flag = provider_options_map.at("enable_npu_fast_compile");
      if (bool_flag == "true" || bool_flag == "True")
-        enable_vpu_fast_compile = true;
+        enable_npu_fast_compile = true;
      else if (bool_flag == "false" || bool_flag == "False")
-        enable_vpu_fast_compile = false;
+        enable_npu_fast_compile = false;
      bool_flag = "";
    }

@ -141,7 +155,7 @@ struct OpenVINO_Provider : Provider {
        enable_dynamic_shapes = false;
    }
    return std::make_shared<OpenVINOProviderFactory>(const_cast<char*>(device_type.c_str()),
-                                                     enable_vpu_fast_compile,
+                                                     enable_npu_fast_compile,
                                                     device_id,
                                                     num_of_threads,
                                                     cache_dir,
@ -157,7 +171,6 @@ struct OpenVINO_Provider : Provider {
  void Shutdown() override {
    openvino_ep::BackendManager::ReleaseGlobalContext();
  }
-
 } g_provider;

 }  // namespace onnxruntime
--- a/onnxruntime/core/providers/openvino/ov_interface.cc
+++ b/onnxruntime/core/providers/openvino/ov_interface.cc
@ -29,7 +29,10 @@ std::shared_ptr<OVNetwork> OVCore::ReadModel(const std::string& model) const {
  }
 }

-OVExeNetwork OVCore::LoadNetwork(std::shared_ptr<OVNetwork>& ie_cnn_network, std::string& hw_target, ov::AnyMap& device_config, std::string name) {
+OVExeNetwork OVCore::LoadNetwork(std::shared_ptr<OVNetwork>& ie_cnn_network,
+                                 std::string& hw_target,
+                                 ov::AnyMap& device_config,
+                                 std::string name) {
  ov::CompiledModel obj;
  try {
    obj = oe.compile_model(ie_cnn_network, hw_target, device_config);
@ -43,7 +46,10 @@ OVExeNetwork OVCore::LoadNetwork(std::shared_ptr<OVNetwork>& ie_cnn_network, std
 }

 #if defined(OPENVINO_2023_0) || (OPENVINO_2023_1)
-OVExeNetwork OVCore::LoadNetwork(const std::string& model, std::string& hw_target, ov::AnyMap& device_config, std::string name) {
+OVExeNetwork OVCore::LoadNetwork(const std::string& model,
+                                 std::string& hw_target,
+                                 ov::AnyMap& device_config,
+                                 std::string name) {
  ov::CompiledModel obj;
  try {
    obj = oe.compile_model(model, ov::Tensor(), hw_target, device_config);
--- a/onnxruntime/core/providers/openvino/ov_interface.h
+++ b/onnxruntime/core/providers/openvino/ov_interface.h
@ -4,6 +4,7 @@
 #pragma once

 #include <vector>
+#include <memory>

 #if defined(OPENVINO_2022_1) || (OPENVINO_2022_2) || (OPENVINO_2022_3) || (OPENVINO_2023_0) || (OPENVINO_2023_1)
 #define OV_API_20
@ -43,9 +44,15 @@ class OVCore {

 public:
  std::shared_ptr<OVNetwork> ReadModel(const std::string& model_stream) const;
-  OVExeNetwork LoadNetwork(std::shared_ptr<OVNetwork>& ie_cnn_network, std::string& hw_target, ov::AnyMap& device_config, std::string name);
+  OVExeNetwork LoadNetwork(std::shared_ptr<OVNetwork>& ie_cnn_network,
+                           std::string& hw_target,
+                           ov::AnyMap& device_config,
+                           std::string name);
 #if defined(OPENVINO_2023_0) || (OPENVINO_2023_1)
-  OVExeNetwork LoadNetwork(const std::string& model_stream, std::string& hw_target, ov::AnyMap& device_config, std::string name);
+  OVExeNetwork LoadNetwork(const std::string& model_stream,
+                           std::string& hw_target,
+                           ov::AnyMap& device_config,
+                           std::string name);
 #endif
  void SetCache(std::string cache_dir_path);
 #ifdef IO_BUFFER_ENABLED
@ -62,7 +69,7 @@ class OVExeNetwork {
  ov::CompiledModel obj;

 public:
-  OVExeNetwork(ov::CompiledModel md) { obj = md; }
+  explicit OVExeNetwork(ov::CompiledModel md) { obj = md; }
  OVExeNetwork() { obj = ov::CompiledModel(); }
  ov::CompiledModel& Get() { return obj; }
  OVInferRequest CreateInferRequest();
--- a/onnxruntime/core/providers/openvino/ov_versions/capabilities.h
+++ b/onnxruntime/core/providers/openvino/ov_versions/capabilities.h
@ -3,6 +3,8 @@

 #pragma once
 #include <vector>
+#include <string>
+#include <memory>
 #include "data_ops.h"

 namespace onnxruntime {
--- a/onnxruntime/core/providers/openvino/ov_versions/capability.cc
+++ b/onnxruntime/core/providers/openvino/ov_versions/capability.cc
@ -24,7 +24,8 @@ namespace openvino_ep {

 // Constructor
 GetCapability::GetCapability(const GraphViewer& graph_viewer_param, std::string device_type_param,
-                             const std::string version_param) : graph_viewer_(graph_viewer_param), device_type_(device_type_param) {
+                             const std::string version_param)
+    : graph_viewer_(graph_viewer_param), device_type_(device_type_param) {
  if (version_param == "V_2022_1") {
    data_ops_ = new DataOps(graph_viewer_, V_2022_1, device_type_);
  } else if (version_param == "V_2022_2") {
@ -114,11 +115,11 @@ std::vector<std::unique_ptr<ComputeCapability>> GetCapability::Execute() {
    }
    openvino_ep::BackendManager::GetGlobalContext().is_wholly_supported_graph = true;

-  } else {  // unsupported_nodes_idx.empty()
-
+  } else {                                     // unsupported_nodes_idx.empty()
 #if defined(OPENVINO_DISABLE_GRAPH_PARTITION)  // disables graph partition at build time
    LOGS_DEFAULT(INFO) << "[OpenVINO-EP] DISABLE_GRAPH_PARTITION option is set";
-    LOGS_DEFAULT(INFO) << "[OpenVINO-EP] Model is not fully supported by OpenVINO, so making the full model fall back to default CPU Execution Provider";
+    LOGS_DEFAULT(INFO) << "[OpenVINO-EP] Model is not fully supported by OpenVINO, "
+                       << "so making the full model fall back to default CPU Execution Provider";
    return result;
 #endif

@ -159,7 +160,13 @@ std::vector<std::unique_ptr<ComputeCapability>> GetCapability::Execute() {

      std::vector<std::string> cluster_graph_inputs, cluster_inputs, const_inputs, cluster_outputs;

-      GetInputsOutputsOfCluster(graph_viewer_, this_cluster, ng_required_initializers, cluster_graph_inputs, cluster_inputs, const_inputs, cluster_outputs);
+      GetInputsOutputsOfCluster(graph_viewer_,
+                                this_cluster,
+                                ng_required_initializers,
+                                cluster_graph_inputs,
+                                cluster_inputs,
+                                const_inputs,
+                                cluster_outputs);

      bool omit_subgraph = false;
      // Omitting zero dim subgraphs
--- a/onnxruntime/core/providers/openvino/ov_versions/data_ops.cc
+++ b/onnxruntime/core/providers/openvino/ov_versions/data_ops.cc
@ -2,11 +2,15 @@
 // Licensed under the MIT License

 #include <unordered_set>
+#include <string>
+#include <vector>
+#include <utility>
+#include <map>
+#include <set>
+
 #include "core/providers/shared_library/provider_api.h"
 #include "../backend_utils.h"
 #include "../backend_manager.h"
-#include <string>
-#include <vector>
 #include "data_ops.h"
 #include "capabilities.h"
 #include "utils.h"
@ -72,269 +76,355 @@ std::set<std::string> ops_supported_as_function = {

 std::vector<SupportedOp> supported_op_mode = {
    {"Abs", V_2020_4, {"CPU", "GPU"}},
-    {"Abs", V_2023_0, {"VPUX"}},
+    {"Abs", V_2023_0, {"NPU"}},
    {"Acos", V_2020_4, {"CPU"}},
    {"Acos", V_2022_1, {"GPU"}},
+    {"Acos", V_2023_1, {"NPU"}},
    {"Acosh", V_2020_4, {"CPU"}},
    {"Acosh", V_2022_1, {"GPU"}},
+    {"Acosh", V_2023_1, {"NPU"}},
    {"Add", V_2020_4, {"CPU", "GPU"}},
-    {"Add", V_2023_0, {"VPUX"}},
+    {"Add", V_2023_0, {"NPU"}},
    {"And", V_2020_4, {"CPU", "GPU"}},
+    {"And", V_2023_1, {"NPU"}},
    {"ArgMax", V_2020_4, {"CPU"}},
    {"ArgMax", V_2021_1, {"GPU"}},
    {"ArgMin", V_2020_4, {"CPU"}},
    {"ArgMin", V_2022_1, {"GPU"}},
    {"Asin", V_2020_4, {"CPU", "GPU"}},
+    {"Asin", V_2023_1, {"NPU"}},
    {"Asinh", V_2020_4, {"CPU", "GPU"}},
+    {"Asinh", V_2023_1, {"NPU"}},
    {"Atan", V_2020_4, {"CPU", "GPU"}},
+    {"Atan", V_2023_1, {"NPU"}},
    {"Atanh", V_2020_4, {"CPU"}},
    {"Atanh", V_2022_1, {"GPU"}},
+    {"Atanh", V_2023_1, {"NPU"}},
    {"AveragePool", V_2020_4, {"CPU", "GPU"}},
-    {"AveragePool", V_2023_0, {"VPUX"}},
+    {"AveragePool", V_2023_0, {"NPU"}},
    {"BatchNormalization", V_2020_4, {"CPU", "GPU"}},
-    {"BatchNormalization", V_2023_0, {"VPUX"}},
+    {"BatchNormalization", V_2023_0, {"NPU"}},
    {"BitShift", V_2022_1, {"CPU"}},
+    {"BitShift", V_2023_1, {"NPU"}},
    {"Cast", V_2020_4, {"CPU", "GPU"}},
-    {"Cast", V_2023_0, {"VPUX"}},
+    {"Cast", V_2023_0, {"NPU"}},
+    {"CastLike", V_2023_1, {"CPU", "GPU", "NPU"}},
    {"Ceil", V_2020_4, {"GPU"}},
    {"Ceil", V_2021_4, {"CPU"}},
+    {"Ceil", V_2023_1, {"NPU"}},
    {"Celu", V_2022_1, {"CPU", "GPU"}},
    {"Clip", V_2020_4, {"CPU", "GPU"}},
-    {"Clip", V_2023_0, {"VPUX"}},
+    {"Clip", V_2023_0, {"NPU"}},
+    {"Compress", V_2023_1, {"CPU", "GPU"}},
    {"Concat", V_2020_4, {"CPU", "GPU"}},
-    {"Concat", V_2023_0, {"VPUX"}},
+    {"Concat", V_2023_0, {"NPU"}},
    {"Constant", V_2020_4, {"CPU", "GPU"}},
-    {"Constant", V_2023_0, {"VPUX"}},
+    {"Constant", V_2023_0, {"NPU"}},
    {"ConstantOfShape", V_2020_4, {"CPU", "GPU"}},
-    {"ConstantOfShape", V_2023_0, {"VPUX"}},  // Gets mapped to broadcast op in the plugin.
+    {"ConstantOfShape", V_2023_0, {"NPU"}},  // Gets mapped to broadcast op in the plugin.
    {"Conv", V_2020_4, {"CPU", "GPU"}},
-    {"Conv", V_2023_0, {"VPUX"}},
+    {"Conv", V_2023_0, {"NPU"}},
    {"ConvInteger", V_2022_1, {"CPU", "GPU"}},
+    {"ConvInteger", V_2023_1, {"NPU"}},
    {"ConvTranspose", V_2020_4, {"CPU", "GPU"}},
+    {"ConvTranspose", V_2023_1, {"NPU"}},
    {"Cos", V_2020_4, {"CPU"}},
    {"Cos", V_2022_1, {"GPU"}},
-    {"Cos", V_2023_0, {"VPUX"}},
+    {"Cos", V_2023_0, {"NPU"}},
    {"Cosh", V_2020_4, {"CPU"}},
    {"Cosh", V_2022_1, {"GPU"}},
+    {"Cosh", V_2023_1, {"NPU"}},
    {"CumSum", V_2022_1, {"CPU", "GPU"}},
-    {"CumSum", V_2023_0, {"VPUX"}},
+    {"CumSum", V_2023_0, {"NPU"}},
    {"DepthToSpace", V_2020_4, {"CPU", "GPU"}},
-    {"DepthToSpace", V_2023_0, {"VPUX"}},
+    {"DepthToSpace", V_2023_0, {"NPU"}},
    {"DequantizeLinear", V_2021_4, {"CPU", "GPU"}},
-    {"DequantizeLinear", V_2023_0, {"VPUX"}},
+    {"DequantizeLinear", V_2023_0, {"NPU"}},
    {"Div", V_2020_4, {"CPU", "GPU"}},
-    {"Div", V_2023_0, {"VPUX"}},
+    {"Div", V_2023_0, {"NPU"}},
    {"Dropout", V_2020_4, {"CPU", "GPU"}},
-    {"Dropout", V_2023_0, {"VPUX"}},
+    {"Dropout", V_2023_0, {"NPU"}},
    {"Elu", V_2020_4, {"CPU", "GPU"}},
-    {"Elu", V_2023_0, {"VPUX"}},
+    {"Elu", V_2023_0, {"NPU"}},
    // {"Einsum", V_2023_0, {"CPU", "GPU"}},
    {"Equal", V_2020_4, {"CPU", "GPU"}},
-    {"Equal", V_2023_0, {"VPUX"}},  // Added for whisper decoder model.
+    {"Equal", V_2023_0, {"NPU"}},  // Added for whisper decoder model.
    {"Erf", V_2020_4, {"CPU", "GPU"}},
-    {"Erf", V_2023_0, {"VPUX"}},
+    {"Erf", V_2023_0, {"NPU"}},
    {"Exp", V_2020_4, {"CPU", "GPU"}},
-    {"Exp", V_2023_0, {"VPUX"}},
+    {"Exp", V_2023_0, {"NPU"}},
    {"Expand", V_2022_1, {"CPU", "GPU"}},
-    {"Expand", V_2023_0, {"VPUX"}},  // Gets mapped to broadcast op and multiply op in the plugin.
+    {"Expand", V_2023_0, {"NPU"}},  // Gets mapped to broadcast op and multiply op in the plugin.
    {"EyeLike", V_2022_1, {"CPU"}},
-    {"EyeLike", V_2023_0, {"VPUX"}},  // NoOP
+    {"EyeLike", V_2023_0, {"NPU"}},  // NoOP
    {"Flatten", V_2020_4, {"CPU", "GPU"}},
-    {"Flatten", V_2023_0, {"VPUX"}},
+    {"Flatten", V_2023_0, {"NPU"}},
    {"Floor", V_2020_4, {"CPU", "GPU"}},
+    {"Floor", V_2023_1, {"NPU"}},
    {"Gather", V_2020_4, {"CPU", "GPU"}},
-    {"Gather", V_2023_0, {"VPUX"}},
+    {"Gather", V_2023_0, {"NPU"}},
    {"GatherElements", V_2022_2, {"CPU", "GPU"}},
+    {"GatherElements", V_2023_1, {"NPU"}},
    {"GatherND", V_2021_4, {"CPU", "GPU"}},
+    {"GatherND", V_2023_1, {"NPU"}},
    {"Gemm", V_2020_4, {"CPU", "GPU"}},
-    {"Gemm", V_2023_0, {"VPUX"}},
+    {"Gemm", V_2023_0, {"NPU"}},
    {"GlobalAveragePool", V_2020_4, {"CPU", "GPU"}},
-    {"GlobalAveragePool", V_2023_0, {"VPUX"}},
+    {"GlobalAveragePool", V_2023_0, {"NPU"}},
    {"GlobalLpPool", V_2020_4, {"CPU", "GPU"}},
+    {"GlobalLpPool", V_2023_1, {"NPU"}},
    {"GlobalMaxPool", V_2022_1, {"CPU", "GPU"}},
+    {"GlobalMaxPool", V_2023_1, {"NPU"}},
    {"Greater", V_2020_4, {"CPU", "GPU"}},
-    {"Greater", V_2023_0, {"VPUX"}},
+    {"Greater", V_2023_0, {"NPU"}},
    {"GreaterOrEqual", V_2022_1, {"CPU", "GPU"}},
-    {"GreaterOrEqual", V_2023_0, {"VPUX"}},
+    {"GreaterOrEqual", V_2023_0, {"NPU"}},
    {"GridSample", V_2022_3, {"CPU"}},
    {"GridSample", V_2023_0, {"GPU"}},
+    {"GridSample", V_2023_1, {"NPU"}},
+    {"HardMax", V_2023_1, {"CPU", "GPU", "NPU"}},
    {"Identity", V_2020_4, {"CPU", "GPU"}},
-    {"Identity", V_2023_0, {"VPUX"}},  // NoOP
+    {"Identity", V_2023_0, {"NPU"}},  // NoOP
    {"If", V_2022_3, {"CPU", "GPU"}},
+    {"If", V_2023_1, {"NPU"}},
    {"ImageScaler", V_2022_1, {"CPU", "GPU"}},
-    {"ImageScaler", V_2023_0, {"VPUX"}},
+    {"ImageScaler", V_2023_0, {"NPU"}},
    {"InstanceNormalization", V_2020_4, {"CPU", "GPU"}},
-    {"InstanceNormalization", V_2023_0, {"VPUX"}},
+    {"InstanceNormalization", V_2023_0, {"NPU"}},
    {"HardSigmoid", V_2020_4, {"CPU", "GPU"}},
+    {"HardSigmoid", V_2023_1, {"NPU"}},
    {"HardMax", V_2022_1, {"CPU", "GPU"}},
    {"LeakyRelu", V_2020_4, {"CPU", "GPU"}},
-    {"LeakyRelu", V_2023_0, {"VPUX"}},
+    {"LeakyRelu", V_2023_0, {"NPU"}},
    {"Less", V_2020_4, {"CPU", "GPU"}},
-    {"Less", V_2023_0, {"VPUX"}},  // Added for whisper decoder model.
+    {"Less", V_2023_0, {"NPU"}},  // Added for whisper decoder model.
    {"LessOrEqual", V_2022_1, {"CPU", "GPU"}},
-    {"LessOrEqual", V_2023_0, {"VPUX"}},
+    {"LessOrEqual", V_2023_0, {"NPU"}},
    {"Log", V_2020_4, {"CPU", "GPU"}},
-    {"Log", V_2023_0, {"VPUX"}},
+    {"Log", V_2023_0, {"NPU"}},
    {"LogSoftMax", V_2022_1, {"CPU", "GPU"}},
    {"Loop", V_2021_4, {"CPU", "GPU"}},
+    {"LpNormalization", V_2023_1, {"CPU", "GPU", "NPU"}},
+    {"LpPool", V_2023_1, {"CPU", "GPU", "NPU"}},
    {"LRN", V_2020_4, {"CPU", "GPU"}},
-    {"LRN", V_2023_0, {"VPUX"}},
+    {"LRN", V_2023_0, {"NPU"}},
    {"LSTM", V_2020_4, {"CPU", "GPU"}},
+    {"LSTM", V_2023_1, {"NPU"}},
    {"MatMul", V_2020_4, {"CPU", "GPU"}},
-    {"MatMul", V_2023_0, {"VPUX"}},
+    {"MatMul", V_2023_0, {"NPU"}},
    {"MatMulInteger", V_2022_1, {"CPU"}},
+    {"MatMulInteger", V_2023_1, {"NPU"}},
    {"Max", V_2020_4, {"CPU", "GPU"}},
-    {"Max", V_2023_0, {"VPUX"}},
+    {"Max", V_2023_0, {"NPU"}},
    {"MaxPool", V_2020_4, {"CPU", "GPU"}},
-    {"MaxPool", V_2023_0, {"VPUX"}},
+    {"MaxPool", V_2023_0, {"NPU"}},
    {"Mean", V_2020_4, {"CPU", "GPU"}},
-    {"Mean", V_2023_0, {"VPUX"}},
+    {"Mean", V_2023_0, {"NPU"}},
    {"MeanVarianceNormalization", V_2022_1, {"CPU", "GPU"}},
+    {"MeanVarianceNormalization", V_2023_1, {"NPU"}},
    {"Min", V_2020_4, {"CPU", "GPU"}},
-    {"Min", V_2023_0, {"VPUX"}},
+    {"Min", V_2023_0, {"NPU"}},
    {"Mod", V_2022_1, {"CPU", "GPU"}},
    {"Mul", V_2020_4, {"CPU", "GPU"}},
-    {"Mul", V_2023_0, {"VPUX"}},
+    {"Mul", V_2023_0, {"NPU"}},
    {"Neg", V_2020_4, {"CPU", "GPU"}},
-    {"Neg", V_2023_0, {"VPUX"}},
+    {"Neg", V_2023_0, {"NPU"}},
    {"NonMaxSuppression", V_2021_1, {"CPU", "GPU"}},
+    {"NonMaxSuppression", V_2023_1, {"NPU"}},
    {"NonZero", V_2021_1, {"CPU"}},
    {"NonZero", V_2023_0, {"GPU"}},
    {"Not", V_2021_1, {"CPU", "GPU"}},
    {"Not", V_2020_4, {"CPU", "GPU"}},
+    {"Not", V_2023_1, {"NPU"}},
    {"OneHot", V_2020_4, {"CPU", "GPU"}},
+    {"OneHot", V_2023_1, {"NPU"}},
    {"Or", V_2022_1, {"CPU", "GPU"}},
+    {"Or", V_2023_1, {"NPU"}},
    {"Pad", V_2020_4, {"CPU", "GPU"}},
-    {"Pad", V_2023_0, {"VPUX"}},
+    {"Pad", V_2023_0, {"NPU"}},
    {"Pow", V_2020_4, {"CPU", "GPU"}},
-    {"Pow", V_2023_0, {"VPUX"}},
+    {"Pow", V_2023_0, {"NPU"}},
    {"PRelu", V_2020_4, {"CPU", "GPU"}},
-    {"PRelu", V_2023_0, {"VPUX"}},
+    {"PRelu", V_2023_0, {"NPU"}},
    {"QLinearMatMul", V_2022_3, {"CPU"}},
+    // {"QLinearMatMul", V_2023_1, {"NPU"}},
    {"QuantizeLinear", V_2021_4, {"CPU", "GPU"}},
-    {"QuantizeLinear", V_2023_0, {"VPUX"}},
+    {"QuantizeLinear", V_2023_0, {"NPU"}},
+    {"RNN", V_2023_1, {"CPU", "GPU"}},
    {"RandomNormalLike", V_2023_0, {"CPU", "GPU"}},
+    {"RandomNormalLike", V_2023_0, {"CPU", "GPU"}},
+    {"RandomNormalLike", V_2023_1, {"NPU"}},
    {"RandomNormal", V_2023_0, {"CPU", "GPU"}},
+    {"RandomNormal", V_2023_1, {"NPU"}},
    {"Range", V_2022_1, {"CPU", "GPU"}},
-    {"Range", V_2023_0, {"VPUX"}},
+    {"Range", V_2023_0, {"NPU"}},
    {"Reciprocal", V_2020_4, {"CPU", "GPU"}},
-    {"Reciprocal", V_2023_0, {"VPUX"}},
+    {"Reciprocal", V_2023_0, {"NPU"}},
    {"ReduceL1", V_2022_1, {"CPU", "GPU"}},
+    {"ReduceL1", V_2023_1, {"NPU"}},
    {"ReduceL2", V_2022_1, {"CPU", "GPU"}},
+    {"ReduceL2", V_2023_1, {"NPU"}},
    {"ReduceLogSum", V_2020_4, {"CPU"}},
    {"ReduceLogSum", V_2022_1, {"CPU", "GPU"}},
+    {"ReduceLogSum", V_2023_1, {"NPU"}},
    {"ReduceLogSumExp", V_2022_1, {"CPU", "GPU"}},
+    {"ReduceLogSumExp", V_2023_1, {"NPU"}},
    {"ReduceMax", V_2020_4, {"CPU", "GPU"}},
+    {"ReduceMax", V_2023_1, {"NPU"}},
    {"ReduceMean", V_2020_4, {"CPU", "GPU"}},
-    {"ReduceMean", V_2023_0, {"VPUX"}},
+    {"ReduceMean", V_2023_0, {"NPU"}},
    {"ReduceMin", V_2020_4, {"CPU", "GPU"}},
+    {"ReduceMin", V_2023_1, {"NPU"}},
    {"ReduceProd", V_2020_4, {"CPU"}},
    {"ReduceProd", V_2022_1, {"GPU"}},
+    {"ReduceProd", V_2023_1, {"NPU"}},
    {"ReduceSum", V_2020_4, {"CPU", "GPU"}},
+    // {"ReduceSum", V_2023_1, {"NPU"}},
    {"ReduceSumSquare", V_2020_4, {"CPU"}},
    {"ReduceSumSquare", V_2022_1, {"CPU", "GPU"}},
+    {"ReduceSumSquare", V_2023_1, {"NPU"}},
    {"Relu", V_2020_4, {"CPU", "GPU"}},
-    {"Relu", V_2023_0, {"VPUX"}},
+    {"Relu", V_2023_0, {"NPU"}},
    {"Resize", V_2020_4, {"CPU"}},
    {"Resize", V_2022_1, {"GPU"}},
+    {"Resize", V_2023_1, {"NPU"}},
    {"Reshape", V_2020_4, {"CPU", "GPU"}},
-    {"Reshape", V_2023_0, {"VPUX"}},
+    {"Reshape", V_2023_0, {"NPU"}},
    {"ReverseSequence", V_2022_1, {"CPU", "GPU"}},
    {"RoiAlign", V_2021_1, {"CPU", "GPU"}},
+    {"RoiAlign", V_2023_1, {"NPU"}},
    {"Round", V_2021_4, {"CPU", "GPU"}},
+    {"Round", V_2023_1, {"NPU"}},
    {"Scatter", V_2022_1, {"CPU", "GPU"}},
+    {"Scatter", V_2023_1, {"NPU"}},
    {"ScatterElements", V_2022_1, {"CPU", "GPU"}},
+    {"ScatterElements", V_2023_1, {"NPU"}},
    {"ScatterND", V_2022_1, {"CPU", "GPU"}},
+    {"ScatterND", V_2023_1, {"NPU"}},
    {"Selu", V_2020_4, {"CPU", "GPU"}},
+    {"Selu", V_2023_1, {"NPU"}},
    {"Shape", V_2020_4, {"CPU", "GPU"}},
-    {"Shape", V_2023_0, {"VPUX"}},
+    {"Shape", V_2023_0, {"NPU"}},
    {"Shrink", V_2022_1, {"CPU", "GPU"}},
-    {"Shrink", V_2023_0, {"VPUX"}},
+    {"Shrink", V_2023_0, {"NPU"}},
    {"Sigmoid", V_2020_4, {"CPU", "GPU"}},
-    {"Sigmoid", V_2023_0, {"VPUX"}},
+    {"Sigmoid", V_2023_0, {"NPU"}},
    {"Sign", V_2020_4, {"CPU"}},
    {"Sign", V_2022_1, {"GPU"}},
-    {"Sign", V_2023_0, {"VPUX"}},
+    {"Sign", V_2023_0, {"NPU"}},
    {"Sin", V_2022_1, {"CPU", "GPU"}},
-    {"Sin", V_2023_0, {"VPUX"}},
+    {"Sin", V_2023_0, {"NPU"}},
    {"Sinh", V_2020_4, {"CPU"}},
+    {"Sinh", V_2023_1, {"NPU"}},
    {"Size", V_2022_1, {"CPU", "GPU"}},
+    {"Size", V_2023_1, {"NPU"}},
    {"Slice", V_2020_4, {"CPU", "GPU"}},
-    {"Slice", V_2023_0, {"VPUX"}},
+    {"Slice", V_2023_0, {"NPU"}},
    {"Softmax", V_2020_4, {"CPU", "GPU"}},
-    {"Softmax", V_2023_0, {"VPUX"}},
+    {"Softmax", V_2023_0, {"NPU"}},
    {"Softplus", V_2022_1, {"CPU", "GPU"}},
-    {"Softplus", V_2023_0, {"VPUX"}},
+    {"Softplus", V_2023_0, {"NPU"}},
    {"Softsign", V_2022_1, {"CPU", "GPU"}},
    {"SpaceToDepth", V_2020_4, {"CPU", "GPU"}},
-    {"SpaceToDepth", V_2023_0, {"VPUX"}},
+    {"SpaceToDepth", V_2023_0, {"NPU"}},
    {"Split", V_2020_4, {"CPU", "GPU"}},
-    {"Split", V_2023_0, {"VPUX"}},
+    {"Split", V_2023_0, {"NPU"}},
    {"Sqrt", V_2020_4, {"CPU", "GPU"}},
-    {"Sqrt", V_2023_0, {"VPUX"}},
+    {"Sqrt", V_2023_0, {"NPU"}},
    {"Squeeze", V_2020_4, {"CPU", "GPU"}},
-    {"Squeeze", V_2023_0, {"VPUX"}},
+    {"Squeeze", V_2023_0, {"NPU"}},
    {"Softsign", V_2020_4, {"CPU"}},
    {"Sub", V_2020_4, {"CPU", "GPU"}},
-    {"Sub", V_2023_0, {"VPUX"}},
+    {"Sub", V_2023_0, {"NPU"}},
    {"Sum", V_2020_4, {"CPU", "GPU"}},
-    {"Sum", V_2023_0, {"VPUX"}},
+    {"Sum", V_2023_0, {"NPU"}},
    {"Tan", V_2020_4, {"CPU", "GPU"}},
+    {"Tan", V_2023_1, {"NPU"}},
    {"Tanh", V_2020_4, {"CPU", "GPU"}},
-    {"Tanh", V_2023_0, {"VPUX"}},
+    {"Tanh", V_2023_0, {"NPU"}},
    {"ThresholdedRelu", V_2022_1, {"CPU", "GPU"}},
-    {"ThresholdedRelu", V_2023_0, {"VPUX"}},
+    {"ThresholdedRelu", V_2023_0, {"NPU"}},
    {"Tile", V_2021_3, {"CPU", "GPU"}},
-    {"Tile", V_2023_0, {"VPUX"}},
+    {"Tile", V_2023_0, {"NPU"}},
    {"Transpose", V_2020_4, {"CPU", "GPU"}},
-    {"Transpose", V_2023_0, {"VPUX"}},
+    {"Transpose", V_2023_0, {"NPU"}},
    {"Trilu", V_2023_0, {"CPU", "GPU"}},
+    {"Trilu", V_2023_1, {"NPU"}},
    {"TopK", V_2020_4, {"CPU", "GPU"}},
-    {"TopK", V_2023_0, {"VPUX"}},
+    {"TopK", V_2023_0, {"NPU"}},
+    {"Upsample", V_2020_4, {"CPU", "GPU"}},
    {"Unsqueeze", V_2020_4, {"CPU", "GPU"}},
-    {"Unsqueeze", V_2023_0, {"VPUX"}},
-    {"Upsample", V_2021_1, {"CPU"}},
-    {"Upsample", V_2021_4, {"GPU"}},
-    {"Upsample", V_2023_0, {"VPUX"}},
+    {"Unsqueeze", V_2023_0, {"NPU"}},
    {"Where", V_2022_1, {"CPU", "GPU"}},
-    {"Where", V_2023_0, {"VPUX"}},  // Added for whisper decoder model.
+    {"Where", V_2023_0, {"NPU"}},  // Added for whisper decoder model.
    {"Xor", V_2022_1, {"CPU", "GPU"}},
+    {"Xor", V_2023_1, {"NPU"}},
 };

 void DataOps::populate_types_supported() {
-  supported_types_initializer_.insert(std::make_pair(V_2020_4, ONNX_NAMESPACE::TensorProto_DataType::TensorProto_DataType_BOOL));
-  supported_types_initializer_.insert(std::make_pair(V_2020_4, ONNX_NAMESPACE::TensorProto_DataType::TensorProto_DataType_FLOAT));
-  supported_types_initializer_.insert(std::make_pair(V_2020_4, ONNX_NAMESPACE::TensorProto_DataType::TensorProto_DataType_INT32));
-  supported_types_initializer_.insert(std::make_pair(V_2020_4, ONNX_NAMESPACE::TensorProto_DataType::TensorProto_DataType_INT64));
-  supported_types_initializer_.insert(std::make_pair(V_2021_1, ONNX_NAMESPACE::TensorProto_DataType::TensorProto_DataType_FLOAT16));
-  supported_types_initializer_.insert(std::make_pair(V_2021_4, ONNX_NAMESPACE::TensorProto_DataType::TensorProto_DataType_INT8));
-  supported_types_initializer_.insert(std::make_pair(V_2021_4, ONNX_NAMESPACE::TensorProto_DataType::TensorProto_DataType_UINT8));
+  supported_types_initializer_.insert(
+      std::make_pair(V_2020_4, ONNX_NAMESPACE::TensorProto_DataType::TensorProto_DataType_BOOL));
+  supported_types_initializer_.insert(
+      std::make_pair(V_2020_4, ONNX_NAMESPACE::TensorProto_DataType::TensorProto_DataType_FLOAT));
+  supported_types_initializer_.insert(
+      std::make_pair(V_2020_4, ONNX_NAMESPACE::TensorProto_DataType::TensorProto_DataType_INT32));
+  supported_types_initializer_.insert(
+      std::make_pair(V_2020_4, ONNX_NAMESPACE::TensorProto_DataType::TensorProto_DataType_INT64));
+  supported_types_initializer_.insert(
+      std::make_pair(V_2021_1, ONNX_NAMESPACE::TensorProto_DataType::TensorProto_DataType_FLOAT16));
+  supported_types_initializer_.insert(
+      std::make_pair(V_2021_4, ONNX_NAMESPACE::TensorProto_DataType::TensorProto_DataType_INT8));
+  supported_types_initializer_.insert(
+      std::make_pair(V_2021_4, ONNX_NAMESPACE::TensorProto_DataType::TensorProto_DataType_UINT8));

-  supported_types_vpu_.insert(std::make_pair(V_2020_4, ONNX_NAMESPACE::TensorProto_DataType::TensorProto_DataType_BOOL));
-  supported_types_vpu_.insert(std::make_pair(V_2020_4, ONNX_NAMESPACE::TensorProto_DataType::TensorProto_DataType_FLOAT));
-  supported_types_vpu_.insert(std::make_pair(V_2020_4, ONNX_NAMESPACE::TensorProto_DataType::TensorProto_DataType_UINT8));
-  supported_types_vpu_.insert(std::make_pair(V_2020_4, ONNX_NAMESPACE::TensorProto_DataType::TensorProto_DataType_INT8));
-  supported_types_vpu_.insert(std::make_pair(V_2020_4, ONNX_NAMESPACE::TensorProto_DataType::TensorProto_DataType_INT16));
-  supported_types_vpu_.insert(std::make_pair(V_2020_4, ONNX_NAMESPACE::TensorProto_DataType::TensorProto_DataType_INT32));
-  supported_types_vpu_.insert(std::make_pair(V_2020_4, ONNX_NAMESPACE::TensorProto_DataType::TensorProto_DataType_INT64));
-  supported_types_vpu_.insert(std::make_pair(V_2021_1, ONNX_NAMESPACE::TensorProto_DataType::TensorProto_DataType_FLOAT16));
+  supported_types_npu_.insert(
+      std::make_pair(V_2020_4, ONNX_NAMESPACE::TensorProto_DataType::TensorProto_DataType_BOOL));
+  supported_types_npu_.insert(
+      std::make_pair(V_2020_4, ONNX_NAMESPACE::TensorProto_DataType::TensorProto_DataType_FLOAT));
+  supported_types_npu_.insert(
+      std::make_pair(V_2020_4, ONNX_NAMESPACE::TensorProto_DataType::TensorProto_DataType_UINT8));
+  supported_types_npu_.insert(
+      std::make_pair(V_2020_4, ONNX_NAMESPACE::TensorProto_DataType::TensorProto_DataType_INT8));
+  supported_types_npu_.insert(
+      std::make_pair(V_2020_4, ONNX_NAMESPACE::TensorProto_DataType::TensorProto_DataType_INT16));
+  supported_types_npu_.insert(
+      std::make_pair(V_2020_4, ONNX_NAMESPACE::TensorProto_DataType::TensorProto_DataType_INT32));
+  supported_types_npu_.insert(
+      std::make_pair(V_2020_4, ONNX_NAMESPACE::TensorProto_DataType::TensorProto_DataType_INT64));
+  supported_types_npu_.insert(
+      std::make_pair(V_2021_1, ONNX_NAMESPACE::TensorProto_DataType::TensorProto_DataType_FLOAT16));

-  supported_types_cpu_.insert(std::make_pair(V_2020_4, ONNX_NAMESPACE::TensorProto_DataType::TensorProto_DataType_BOOL));
-  supported_types_cpu_.insert(std::make_pair(V_2020_4, ONNX_NAMESPACE::TensorProto_DataType::TensorProto_DataType_FLOAT));
-  supported_types_cpu_.insert(std::make_pair(V_2020_4, ONNX_NAMESPACE::TensorProto_DataType::TensorProto_DataType_INT32));
-  supported_types_cpu_.insert(std::make_pair(V_2020_4, ONNX_NAMESPACE::TensorProto_DataType::TensorProto_DataType_INT16));
-  supported_types_cpu_.insert(std::make_pair(V_2020_4, ONNX_NAMESPACE::TensorProto_DataType::TensorProto_DataType_INT8));
-  supported_types_cpu_.insert(std::make_pair(V_2020_4, ONNX_NAMESPACE::TensorProto_DataType::TensorProto_DataType_UINT8));
-  supported_types_cpu_.insert(std::make_pair(V_2020_4, ONNX_NAMESPACE::TensorProto_DataType::TensorProto_DataType_INT64));
-  supported_types_cpu_.insert(std::make_pair(V_2022_2, ONNX_NAMESPACE::TensorProto_DataType::TensorProto_DataType_FLOAT16));
+  supported_types_cpu_.insert(
+      std::make_pair(V_2020_4, ONNX_NAMESPACE::TensorProto_DataType::TensorProto_DataType_BOOL));
+  supported_types_cpu_.insert(
+      std::make_pair(V_2020_4, ONNX_NAMESPACE::TensorProto_DataType::TensorProto_DataType_FLOAT));
+  supported_types_cpu_.insert(
+      std::make_pair(V_2020_4, ONNX_NAMESPACE::TensorProto_DataType::TensorProto_DataType_INT32));
+  supported_types_cpu_.insert(
+      std::make_pair(V_2020_4, ONNX_NAMESPACE::TensorProto_DataType::TensorProto_DataType_INT16));
+  supported_types_cpu_.insert(
+      std::make_pair(V_2020_4, ONNX_NAMESPACE::TensorProto_DataType::TensorProto_DataType_INT8));
+  supported_types_cpu_.insert(
+      std::make_pair(V_2020_4, ONNX_NAMESPACE::TensorProto_DataType::TensorProto_DataType_UINT8));
+  supported_types_cpu_.insert(
+      std::make_pair(V_2020_4, ONNX_NAMESPACE::TensorProto_DataType::TensorProto_DataType_INT64));
+  supported_types_cpu_.insert(
+      std::make_pair(V_2022_2, ONNX_NAMESPACE::TensorProto_DataType::TensorProto_DataType_FLOAT16));

-  supported_types_gpu_.insert(std::make_pair(V_2020_4, ONNX_NAMESPACE::TensorProto_DataType::TensorProto_DataType_FLOAT));
-  supported_types_gpu_.insert(std::make_pair(V_2020_4, ONNX_NAMESPACE::TensorProto_DataType::TensorProto_DataType_INT32));
-  supported_types_gpu_.insert(std::make_pair(V_2020_4, ONNX_NAMESPACE::TensorProto_DataType::TensorProto_DataType_INT64));
-  supported_types_gpu_.insert(std::make_pair(V_2021_1, ONNX_NAMESPACE::TensorProto_DataType::TensorProto_DataType_FLOAT16));
-  supported_types_gpu_.insert(std::make_pair(V_2021_4, ONNX_NAMESPACE::TensorProto_DataType::TensorProto_DataType_INT8));
-  supported_types_gpu_.insert(std::make_pair(V_2021_4, ONNX_NAMESPACE::TensorProto_DataType::TensorProto_DataType_UINT8));
-  supported_types_gpu_.insert(std::make_pair(V_2022_1, ONNX_NAMESPACE::TensorProto_DataType::TensorProto_DataType_BOOL));
+  supported_types_gpu_.insert(
+      std::make_pair(V_2020_4, ONNX_NAMESPACE::TensorProto_DataType::TensorProto_DataType_FLOAT));
+  supported_types_gpu_.insert(
+      std::make_pair(V_2020_4, ONNX_NAMESPACE::TensorProto_DataType::TensorProto_DataType_INT32));
+  supported_types_gpu_.insert(
+      std::make_pair(V_2020_4, ONNX_NAMESPACE::TensorProto_DataType::TensorProto_DataType_INT64));
+  supported_types_gpu_.insert(
+      std::make_pair(V_2021_1, ONNX_NAMESPACE::TensorProto_DataType::TensorProto_DataType_FLOAT16));
+  supported_types_gpu_.insert(
+      std::make_pair(V_2021_4, ONNX_NAMESPACE::TensorProto_DataType::TensorProto_DataType_INT8));
+  supported_types_gpu_.insert(
+      std::make_pair(V_2021_4, ONNX_NAMESPACE::TensorProto_DataType::TensorProto_DataType_UINT8));
+  supported_types_gpu_.insert(
+      std::make_pair(V_2022_1, ONNX_NAMESPACE::TensorProto_DataType::TensorProto_DataType_BOOL));
 }

 void DataOps::populate_op_mode_supported() {
@ -349,10 +439,10 @@ void DataOps::populate_op_mode_supported() {
  no_dimension_supported_.push_back({"Equal", V_2023_0, {"GPU"}});
  no_dimension_supported_.push_back({"Floor", V_2020_4, {"All"}});
  no_dimension_supported_.push_back({"Gather", V_2020_4, {"All"}});
-  no_dimension_supported_.push_back({"Greater", V_2023_0, {"VPUX"}});
+  no_dimension_supported_.push_back({"Greater", V_2023_0, {"NPU"}});
  no_dimension_supported_.push_back({"Less", V_2022_1, {"CPU"}});
  no_dimension_supported_.push_back({"Loop", V_2021_4, {"All"}});
-  no_dimension_supported_.push_back({"Max", V_2023_0, {"VPUX"}});
+  no_dimension_supported_.push_back({"Max", V_2023_0, {"NPU"}});
  no_dimension_supported_.push_back({"Min", V_2020_4, {"All"}});
  no_dimension_supported_.push_back({"Mul", V_2020_4, {"All"}});
  no_dimension_supported_.push_back({"QuantizeLinear", V_2021_4, {"All"}});
@ -382,11 +472,14 @@ void DataOps::populate_op_mode_supported() {
  {
    UnsupportedOpMode obj = {{V_2022_1, V_2022_2, V_2022_3},
                             [this](const Node* node, const InitializedTensorSet&) {
-                               // Abs is not supproted with INT8 or INT32 as input data type on GPU
-                               if (device_id_.find("GPU") != std::string::npos) {
+                               // Abs is not supproted with INT8 or INT32 as input data type on GPU and NPU
+                               if ((device_id_.find("GPU") != std::string::npos) ||
+                                   (device_id_.find("NPU") != std::string::npos)) {
                                 for (size_t i = 0; i < node->InputDefs().size(); i++) {
-                                   if (node->InputDefs()[i]->TypeAsProto()->tensor_type().elem_type() == ONNX_NAMESPACE::TensorProto_DataType::TensorProto_DataType_INT8 ||
-                                       node->InputDefs()[i]->TypeAsProto()->tensor_type().elem_type() == ONNX_NAMESPACE::TensorProto_DataType::TensorProto_DataType_INT32)
+                                   if (node->InputDefs()[i]->TypeAsProto()->tensor_type().elem_type() ==
+                                           ONNX_NAMESPACE::TensorProto_DataType::TensorProto_DataType_INT8 ||
+                                       node->InputDefs()[i]->TypeAsProto()->tensor_type().elem_type() ==
+                                           ONNX_NAMESPACE::TensorProto_DataType::TensorProto_DataType_INT32)
                                     return true;
                                 }
                               }
@ -399,11 +492,14 @@ void DataOps::populate_op_mode_supported() {
                             [this](const Node* node, const InitializedTensorSet&) {
                               // tensor type does not support select last index
                               auto& attributes = node->GetAttributes();
-                               auto last_index_arg = attributes.count("select_last_index") > 0 ? attributes.at("select_last_index").i() : 0;
+                               auto last_index_arg =
+                                   attributes.count("select_last_index") > 0 ? attributes.at("select_last_index").i()
+                                                                             : 0;
                               if (last_index_arg != 0)
                                 return true;
                               // tensor type supports float as input for argmax and argmin
-                               if (node->InputDefs()[0]->TypeAsProto()->tensor_type().elem_type() != ONNX_NAMESPACE::TensorProto_DataType::TensorProto_DataType_FLOAT)
+                               if (node->InputDefs()[0]->TypeAsProto()->tensor_type().elem_type() !=
+                                   ONNX_NAMESPACE::TensorProto_DataType::TensorProto_DataType_FLOAT)
                                 return true;
                               return false;
                             }};
@ -415,7 +511,8 @@ void DataOps::populate_op_mode_supported() {
                             [this](const Node* node, const InitializedTensorSet&) {
                               if (device_id_.find("GPU") != std::string::npos) {
                                 // int64 data type is not supported on GPU
-                                 const bool data_is_int64 = node->InputDefs()[0]->Type()->find("int64") != std::string::npos;
+                                 const bool data_is_int64 =
+                                     node->InputDefs()[0]->Type()->find("int64") != std::string::npos;
                                 return data_is_int64;
                               }
                               return false;
@ -506,9 +603,12 @@ void DataOps::populate_op_mode_supported() {
                               if (device_id_.find("GPU") != std::string::npos) {
                                 auto x_data_type = node->InputDefs()[0]->TypeAsProto()->tensor_type().elem_type();
                                 auto y_data_type = node->InputDefs()[1]->TypeAsProto()->tensor_type().elem_type();
-                                 // currently both inputs with int32 are not supported and also both input datatypes should be same
-                                 const bool A_is_int32 = node->InputDefs()[0]->Type()->find("int32") != std::string::npos;
-                                 const bool B_is_int32 = node->InputDefs()[1]->Type()->find("int32") != std::string::npos;
+                                 // currently both inputs with int32 are not supported
+                                 // and also both input datatypes should be same
+                                 const bool A_is_int32 =
+                                     node->InputDefs()[0]->Type()->find("int32") != std::string::npos;
+                                 const bool B_is_int32 =
+                                     node->InputDefs()[1]->Type()->find("int32") != std::string::npos;
                                 if ((A_is_int32 && B_is_int32) || (x_data_type != y_data_type))
                                   return true;
                               }
@ -589,11 +689,13 @@ void DataOps::populate_op_mode_supported() {
                               if (device_id_.find("GPU") != std::string::npos) {
                                 auto slope = node->InputDefs()[1];
                                 // PRelu slope has to be an initializer or needs to come from a constant node
-                                 if (initializers.count(slope->Name()))
+                                 if (initializers.count(slope->Name())) {
                                   return false;
-                                 else {
-                                   for (auto input_node = node->InputNodesBegin(); input_node != node->InputNodesEnd(); ++input_node) {
-                                     if (GetInputCount(this->graph_viewer_.GetNode((*input_node).Index()), initializers) == 0)
+                                 } else {
+                                   for (auto input_node = node->InputNodesBegin();
+                                        input_node != node->InputNodesEnd(); ++input_node) {
+                                     if (GetInputCount(
+                                             this->graph_viewer_.GetNode((*input_node).Index()), initializers) == 0)
                                       return false;
                                   }
                                 }
@ -603,12 +705,12 @@ void DataOps::populate_op_mode_supported() {
    op_list_.insert({"PRelu", obj});
  }
  {
-    UnsupportedOpMode obj = {{V_2022_1, V_2022_2, V_2022_3, V_2023_0},
+    UnsupportedOpMode obj = {{V_2022_1, V_2022_2, V_2022_3, V_2023_0, V_2023_1},
                             [this](const Node* node, const InitializedTensorSet&) {
                               const auto& input_arg = node->InputDefs()[1];
                               auto shape = input_arg->Shape();
                               // Reshape op with empty dim is Rejected for Myriad
-                               //[TODO] Is this condition required anymore with Myriad removed?
+                               // [TODO] Is this condition required anymore with Myriad removed?
                               if (shape != nullptr) {
                                 for (const auto& dim : input_arg->Shape()->dim()) {
                                   if (utils::HasDimValue(dim) && dim.dim_value() == 0)
@ -638,7 +740,8 @@ void DataOps::populate_op_mode_supported() {
                               if (device_id_.find("GPU") != std::string::npos) {
                                 // INT32 dataype is not supported as input
                                 for (size_t i = 0; i < node->InputDefs().size(); i++) {
-                                   if (node->InputDefs()[i]->TypeAsProto()->tensor_type().elem_type() == ONNX_NAMESPACE::TensorProto_DataType::TensorProto_DataType_INT32)
+                                   if (node->InputDefs()[i]->TypeAsProto()->tensor_type().elem_type() ==
+                                       ONNX_NAMESPACE::TensorProto_DataType::TensorProto_DataType_INT32)
                                     return true;
                                 }
                               }
@ -650,9 +753,11 @@ void DataOps::populate_op_mode_supported() {
    UnsupportedOpMode obj = {{V_2022_1, V_2022_2, V_2022_3},
                             [this](const Node* node, const InitializedTensorSet&) {
                               if (device_id_.find("GPU") != std::string::npos) {
-                                 auto output_data_type = node->OutputDefs()[0]->TypeAsProto()->tensor_type().elem_type();
+                                 auto output_data_type =
+                                     node->OutputDefs()[0]->TypeAsProto()->tensor_type().elem_type();
                                 // If the output of ScatterND op is BOOL, it is rejected for GPU.
-                                 if (output_data_type == ONNX_NAMESPACE::TensorProto_DataType::TensorProto_DataType_BOOL)
+                                 if (output_data_type ==
+                                     ONNX_NAMESPACE::TensorProto_DataType::TensorProto_DataType_BOOL)
                                   return true;
                               }
                               return false;
@ -666,7 +771,8 @@ void DataOps::populate_op_mode_supported() {
                             [this](const Node* node, const InitializedTensorSet&) {
                               // If the Input of Shrink op is UINT8, it is rejected (Due to output mismatch)
                               for (size_t i = 0; i < node->InputDefs().size(); i++) {
-                                 if (node->InputDefs()[i]->TypeAsProto()->tensor_type().elem_type() == ONNX_NAMESPACE::TensorProto_DataType::TensorProto_DataType_UINT8)
+                                 if (node->InputDefs()[i]->TypeAsProto()->tensor_type().elem_type() ==
+                                     ONNX_NAMESPACE::TensorProto_DataType::TensorProto_DataType_UINT8)
                                   return true;
                               }
                               return false;
@ -714,10 +820,11 @@ void DataOps::populate_op_mode_supported() {
    op_list_.insert({"Squeeze", obj});
  }
  {
-    UnsupportedOpMode obj = {{V_2022_1, V_2022_2, V_2022_3, V_2023_0},
+    UnsupportedOpMode obj = {{V_2022_1, V_2022_2, V_2022_3, V_2023_0, V_2023_1},
                             [this](const Node* node, const InitializedTensorSet&) {
                               // If the operator is unsqueeze
-                               // If axes is an input, then we cannot produce a static graph. Conversion fails in convert_function_to_cnn_network.
+                               // If axes is an input, then we cannot produce a static graph.
+                               // Conversion fails in convert_function_to_cnn_network.
                               for (size_t i = 0; i < node->InputDefs().size(); i++) {
                                 if (node->InputDefs()[i]->Name() == "axes") {
                                   return true;
@ -728,14 +835,15 @@ void DataOps::populate_op_mode_supported() {
    op_list_.insert({"Unsqueeze", obj});
  }
  {
-    UnsupportedOpMode obj = {{V_2022_1, V_2022_2, V_2022_3, V_2023_0},
+    UnsupportedOpMode obj = {{V_2022_1, V_2022_2, V_2022_3, V_2023_0, V_2023_1},
                             [this](const Node* node, const InitializedTensorSet&) {
                               // check for attributes
                               auto& upsample_attr = node->GetAttributes();
                               if (upsample_attr.count("scales") > 0) {
                                 auto& upsample_arg = upsample_attr.at("scales");
                                 auto float_size = upsample_arg.floats_size();
-                                 if (float_size > 2 && (upsample_arg.floats(0) != 1.f || upsample_arg.floats(1) != 1.f)) {
+                                 if (float_size > 2 &&
+                                     (upsample_arg.floats(0) != 1.f || upsample_arg.floats(1) != 1.f)) {
                                   return true;
                                 }
                               }
@ -750,9 +858,12 @@ void DataOps::populate_op_mode_supported() {
                                 }
                               }
                               // x_arg supports only float, int8 and float16 type
-                               if ((x_arg->TypeAsProto()->tensor_type().elem_type() == ONNX_NAMESPACE::TensorProto_DataType::TensorProto_DataType_FLOAT) ||
-                                   (x_arg->TypeAsProto()->tensor_type().elem_type() == ONNX_NAMESPACE::TensorProto_DataType::TensorProto_DataType_INT8) ||
-                                   (x_arg->TypeAsProto()->tensor_type().elem_type() == ONNX_NAMESPACE::TensorProto_DataType::TensorProto_DataType_FLOAT16)) {
+                               if ((x_arg->TypeAsProto()->tensor_type().elem_type() ==
+                                    ONNX_NAMESPACE::TensorProto_DataType::TensorProto_DataType_FLOAT) ||
+                                   (x_arg->TypeAsProto()->tensor_type().elem_type() ==
+                                    ONNX_NAMESPACE::TensorProto_DataType::TensorProto_DataType_INT8) ||
+                                   (x_arg->TypeAsProto()->tensor_type().elem_type() ==
+                                    ONNX_NAMESPACE::TensorProto_DataType::TensorProto_DataType_FLOAT16)) {
                                 return false;
                               } else {
                                 return true;
@ -849,9 +960,9 @@ bool DataOps::type_is_supported(const NodeArg* node_arg, bool is_initializer) {
  } else {
    auto dtype = type_proto->tensor_type().elem_type();

-    if (device_id_.find("VPUX") != std::string::npos || device_id_.find("HETERO") != std::string::npos ||
+    if (device_id_.find("NPU") != std::string::npos || device_id_.find("HETERO") != std::string::npos ||
        device_id_.find("MULTI") != std::string::npos || device_id_.find("AUTO") != std::string::npos) {
-      for (auto const& var : supported_types_vpu_) {
+      for (auto const& var : supported_types_npu_) {
        if ((var.first <= version_id_) &&
            (var.second == dtype)) {
          return true;
@ -1079,7 +1190,9 @@ bool DataOps::node_is_supported(const std::map<std::string, std::set<std::string
  if (opset->second.find(optype) == opset->second.end() && op_fun == ops_supported_as_function.end()) {
 #ifndef NDEBUG
    if (openvino_ep::backend_utils::IsDebugEnabled()) {
-      std::cout << "The operator is not available in OpenVINO ngraph operators list nor the operator is a special ONNX function" << std::endl;
+      std::cout << "The operator is not available in OpenVINO ngraph operators list"
+                << "nor the operator is a special ONNX function"
+                << std::endl;
    }
 #endif
    return false;
@ -1095,10 +1208,12 @@ std::vector<NodeIndex> DataOps::GetUnsupportedNodeIndices(std::unordered_set<std
  for (const auto& node_idx : graph_viewer_.GetNodesInTopologicalOrder()) {
    if (node_is_supported(ng_supported_ops, node_idx)) {
      // Collect inputs that are initializers
-      graph_viewer_.GetNode(node_idx)->ForEachDef([&ng_required_initializers, this](const NodeArg& node_arg, bool is_input) {
-            if(is_input && this->graph_viewer_.GetAllInitializedTensors().count(node_arg.Name())) {
+      graph_viewer_.GetNode(node_idx)->ForEachDef([&ng_required_initializers, this](const NodeArg& node_arg,
+                                                                                    bool is_input) {
+            if (is_input && this->graph_viewer_.GetAllInitializedTensors().count(node_arg.Name())) {
                ng_required_initializers.insert(node_arg.Name());
-              } }, true);
+              } },
+                                                  true);
    } else {
      unsupported_nodes_idx.push_back(node_idx);
    }
@ -1110,7 +1225,8 @@ bool DataOps::IsOpSupportedOnlyInModel(std::string name) {
  return ops_supported_only_in_model.find(name) != ops_supported_only_in_model.end();
 }

-bool DataOps::SpecialConditionForClusterSizeOne(std::unordered_set<std::string>& ng_required_initializers, const Node* node) {
+bool DataOps::SpecialConditionForClusterSizeOne(std::unordered_set<std::string>& ng_required_initializers,
+                                                const Node* node) {
  if (node->OpType() == "Reshape") {
    const auto& shape_arg = node->InputDefs()[1];
    if (ng_required_initializers.find(shape_arg->Name()) == ng_required_initializers.end()) {
@ -1119,15 +1235,20 @@ bool DataOps::SpecialConditionForClusterSizeOne(std::unordered_set<std::string>&
  } else if (node->OpType() == "Expand") {
    // nGraph only supports constant shape input values
    const auto& output = node->OutputDefs()[0];
-    if (output->TypeAsProto()->tensor_type().elem_type() != ONNX_NAMESPACE::TensorProto_DataType::TensorProto_DataType_FLOAT16)
+    if (output->TypeAsProto()->tensor_type().elem_type() !=
+        ONNX_NAMESPACE::TensorProto_DataType::TensorProto_DataType_FLOAT16)
      return true;
  } else if (node->OpType() == "RoiAlign") {
    using onnx_dtype = ONNX_NAMESPACE::TensorProto_DataType;

-    onnx_dtype input_0_data_type = (ONNX_NAMESPACE::TensorProto_DataType)node->InputDefs()[0]->TypeAsProto()->tensor_type().elem_type();
-    onnx_dtype input_1_data_type = (ONNX_NAMESPACE::TensorProto_DataType)node->InputDefs()[1]->TypeAsProto()->tensor_type().elem_type();
-    onnx_dtype input_2_data_type = (ONNX_NAMESPACE::TensorProto_DataType)node->InputDefs()[2]->TypeAsProto()->tensor_type().elem_type();
-    onnx_dtype output_data_type = (ONNX_NAMESPACE::TensorProto_DataType)node->OutputDefs()[0]->TypeAsProto()->tensor_type().elem_type();
+    onnx_dtype input_0_data_type =
+        (ONNX_NAMESPACE::TensorProto_DataType)node->InputDefs()[0]->TypeAsProto()->tensor_type().elem_type();
+    onnx_dtype input_1_data_type =
+        (ONNX_NAMESPACE::TensorProto_DataType)node->InputDefs()[1]->TypeAsProto()->tensor_type().elem_type();
+    onnx_dtype input_2_data_type =
+        (ONNX_NAMESPACE::TensorProto_DataType)node->InputDefs()[2]->TypeAsProto()->tensor_type().elem_type();
+    onnx_dtype output_data_type =
+        (ONNX_NAMESPACE::TensorProto_DataType)node->OutputDefs()[0]->TypeAsProto()->tensor_type().elem_type();

    if ((input_0_data_type != onnx_dtype::TensorProto_DataType_FLOAT16) ||
        (input_1_data_type != onnx_dtype::TensorProto_DataType_FLOAT16) ||
--- a/onnxruntime/core/providers/openvino/ov_versions/data_ops.h
+++ b/onnxruntime/core/providers/openvino/ov_versions/data_ops.h
@ -3,6 +3,11 @@

 #pragma once
 #include <unordered_set>
+#include <utility>
+#include <map>
+#include <set>
+#include <vector>
+#include <string>

 namespace onnxruntime {
 namespace openvino_ep {
@ -47,7 +52,7 @@ class DataOps {
  std::multimap<std::string, UnsupportedOpMode> op_list_;
  std::vector<SupportedOp> subgraph_supported_;
  std::vector<SupportedOp> no_dimension_supported_;
-  std::set<Pairs> supported_types_vpu_;
+  std::set<Pairs> supported_types_npu_;
  std::set<Pairs> supported_types_cpu_;
  std::set<Pairs> supported_types_gpu_;
  std::set<Pairs> supported_types_initializer_;
@ -64,14 +69,16 @@ class DataOps {
                         const NodeIndex node_idx);

 public:
-  DataOps(const GraphViewer& graph_viewer_param, VersionNum ver, std::string dev_id) : graph_viewer_(graph_viewer_param), version_id_(ver), device_id_(dev_id) {
+  DataOps(const GraphViewer& graph_viewer_param, VersionNum ver, std::string dev_id)
+      : graph_viewer_(graph_viewer_param), version_id_(ver), device_id_(dev_id) {
    populate_op_mode_supported();
    populate_types_supported();
  }

  virtual std::vector<NodeIndex> GetUnsupportedNodeIndices(std::unordered_set<std::string>& ng_required_initializers);
  virtual bool IsOpSupportedOnlyInModel(std::string name);
-  virtual bool SpecialConditionForClusterSizeOne(std::unordered_set<std::string>& ng_required_initializers, const Node* node);
+  virtual bool SpecialConditionForClusterSizeOne(
+      std::unordered_set<std::string>& ng_required_initializers, const Node* node);
  virtual bool DoNotOmitSubGraph(const std::string& name);
  virtual bool InsertNode(const std::string& name);
  VersionNum GetVersion() const { return version_id_; }
--- a/onnxruntime/core/providers/openvino/ov_versions/utils.cc
+++ b/onnxruntime/core/providers/openvino/ov_versions/utils.cc
@ -2,6 +2,7 @@
 // Licensed under the MIT License

 #include "core/providers/shared_library/provider_api.h"
+#include "utils.h"

 #if defined(_MSC_VER)
 #pragma warning(disable : 4244 4245 5208)
@ -113,7 +114,8 @@ std::map<std::string, std::set<std::string>> GetNgSupportedOps(const int onnx_op
 * supported_cluster + (UNsupported_node + rest_of_the_graph). This functions returns vector of all supported_clusters by nGraph
 */
 std::vector<std::vector<NodeIndex>>
-GetPartitionedClusters(const std::vector<NodeIndex>& topological_order, const std::vector<NodeIndex>& unsupported_nodes) {
+GetPartitionedClusters(const std::vector<NodeIndex>& topological_order,
+                       const std::vector<NodeIndex>& unsupported_nodes) {
  std::vector<std::vector<NodeIndex>> ng_clusters;

  auto prev = topological_order.begin();
@ -140,7 +142,10 @@ GetPartitionedClusters(const std::vector<NodeIndex>& topological_order, const st
  return ng_clusters;
 }

-void IdentifyConnectedNodes(const GraphViewer& graph_viewer, NodeIndex curr_node_index, std::vector<NodeIndex>& cluster, std::vector<NodeIndex>& sub_cluster) {
+void IdentifyConnectedNodes(const GraphViewer& graph_viewer,
+                            NodeIndex curr_node_index,
+                            std::vector<NodeIndex>& cluster,
+                            std::vector<NodeIndex>& sub_cluster) {
  if (std::find(cluster.begin(), cluster.end(), curr_node_index) == cluster.end())
    return;

@ -205,7 +210,8 @@ void GetInputsOutputsOfCluster(const GraphViewer& graph_viewer,
      const auto& ext_node = graph_viewer.GetNode((*it).Index());

      if (std::find(cluster.begin(), cluster.end(), ext_node->Index()) == cluster.end()) {
-        // Node is external to this_cluster. Search through its inputs to find the output that is generated by this_cluster.
+        // Node is external to this_cluster. Search through its inputs to
+        // find the output that is generated by this_cluster.
        std::set<std::string> ext_node_inputs;
        ext_node->ForEachDef(
            [&ext_node_inputs](const NodeArg& arg, bool is_input) {
--- a/onnxruntime/core/providers/openvino/ov_versions/utils.h
+++ b/onnxruntime/core/providers/openvino/ov_versions/utils.h
@ -1,5 +1,15 @@
 // Copyright (C) 2019-2022 Intel Corporation
 // Licensed under the MIT License
+#pragma once
+
+#include <memory>
+#include <map>
+#include <utility>
+#include <vector>
+#include <set>
+#include <algorithm>
+#include <string>
+#include <unordered_set>

 namespace onnxruntime {
 namespace openvino_ep {
@ -18,9 +28,14 @@ int GetOnnxOpSet(const GraphViewer& graph_viewer);
 std::map<std::string, std::set<std::string>> GetNgSupportedOps(const int onnx_opset);

 std::vector<std::vector<NodeIndex>>
-GetPartitionedClusters(const std::vector<NodeIndex>& topological_order, const std::vector<NodeIndex>& unsupported_nodes);
+GetPartitionedClusters(
+    const std::vector<NodeIndex>& topological_order, const std::vector<NodeIndex>& unsupported_nodes);

-void IdentifyConnectedNodes(const GraphViewer& graph_viewer, NodeIndex curr_node_index, std::vector<NodeIndex>& cluster, std::vector<NodeIndex>& sub_cluster);
+void IdentifyConnectedNodes(
+    const GraphViewer& graph_viewer,
+    NodeIndex curr_node_index,
+    std::vector<NodeIndex>& cluster,
+    std::vector<NodeIndex>& sub_cluster);

 std::vector<std::vector<NodeIndex>>
 GetConnectedClusters(const GraphViewer& graph_viewer, const std::vector<std::vector<NodeIndex>>& clusters);
--- a/onnxruntime/core/session/provider_bridge_ort.cc
+++ b/onnxruntime/core/session/provider_bridge_ort.cc
@ -1432,7 +1432,7 @@ ProviderOptions OrtOpenVINOProviderOptionsToOrtOpenVINOProviderOptionsV2(const O
  if (legacy_ov_options->device_type != nullptr)
    ov_options_converted_map["device_type"] = legacy_ov_options->device_type;

-  ov_options_converted_map["enable_vpu_fast_compile"] = legacy_ov_options->enable_vpu_fast_compile;
+  ov_options_converted_map["enable_npu_fast_compile"] = legacy_ov_options->enable_npu_fast_compile;

  if (legacy_ov_options->device_id != nullptr)
    ov_options_converted_map["device_id"] = legacy_ov_options->device_id;
--- a/onnxruntime/python/onnxruntime_pybind_state.cc
+++ b/onnxruntime/python/onnxruntime_pybind_state.cc
@ -813,10 +813,10 @@ std::unique_ptr<IExecutionProvider> CreateExecutionProviderInstance(
        if (option.first == "device_type") {
          OV_provider_options_map[option.first] = option.second;
          continue;
-        } else if (option.first == "enable_vpu_fast_compile") {
+        } else if (option.first == "enable_npu_fast_compile") {
          if (!(option.second == "True" || option.second == "true" ||
                option.second == "False" || option.second == "false")) {
-            ORT_THROW("Invalid value passed for enable_vpu_fast_compile: ", option.second);
+            ORT_THROW("Invalid value passed for enable_npu_fast_compile: ", option.second);
          }
          OV_provider_options_map[option.first] = option.second;
        } else if (option.first == "enable_opencl_throttling") {
--- a/onnxruntime/python/onnxruntime_pybind_state_common.h
+++ b/onnxruntime/python/onnxruntime_pybind_state_common.h
@ -60,11 +60,11 @@ struct OrtStatus {
 #elif OPENVINO_CONFIG_GPU_FP16
 #define BACKEND_OPENVINO "-OPENVINO_GPU_FP16"

-#elif OPENVINO_CONFIG_VPUX_FP16
-#define BACKEND_OPENVINO "-OPENVINO_VPUX_FP16"
+#elif OPENVINO_CONFIG_NPU_FP16
+#define BACKEND_OPENVINO "-OPENVINO_NPU_FP16"

-#elif OPENVINO_CONFIG_VPUX_U8
-#define BACKEND_OPENVINO "-OPENVINO_VPUX_U8"
+#elif OPENVINO_CONFIG_NPU_U8
+#define BACKEND_OPENVINO "-OPENVINO_NPU_U8"

 #elif OPENVINO_CONFIG_MULTI
 #define BACKEND_OPENVINO "-OPENVINO_MULTI"
--- a/onnxruntime/test/perftest/command_args_parser.cc
+++ b/onnxruntime/test/perftest/command_args_parser.cc
@ -60,7 +60,7 @@ namespace perftest {
      "\t-i: Specify EP specific runtime options as key value pairs. Different runtime options available are: \n"
      "\t    [OpenVINO only] [device_type]: Overrides the accelerator hardware type and precision with these values at runtime.\n"
      "\t    [OpenVINO only] [device_id]: Selects a particular hardware device for inference.\n"
-      "\t    [OpenVINO only] [enable_vpu_fast_compile]: Optionally enabled to speeds up the model's compilation on VPU device targets.\n"
+      "\t    [OpenVINO only] [enable_npu_fast_compile]: Optionally enabled to speeds up the model's compilation on NPU device targets.\n"
      "\t    [OpenVINO only] [num_of_threads]: Overrides the accelerator hardware type and precision with these values at runtime.\n"
      "\t    [OpenVINO only] [cache_dir]: Explicitly specify the path to dump and load the blobs(Model caching) or cl_cache (Kernel Caching) files feature. If blob files are already present, it will be directly loaded.\n"
      "\t    [OpenVINO only] [enable_opencl_throttling]: Enables OpenCL queue throttling for GPU device(Reduces the CPU Utilization while using GPU) \n"
@ -72,7 +72,7 @@ namespace perftest {
      "\t    [QNN only] [htp_performance_mode]: QNN performance mode, options: 'burst', 'balanced', 'default', 'high_performance', \n"
      "\t    'high_power_saver', 'low_balanced', 'low_power_saver', 'power_saver', 'sustained_high_performance'. Default to 'default'. \n"
      "\t [Usage]: -e <provider_name> -i '<key1>|<value1> <key2>|<value2>'\n\n"
-      "\t [Example] [For OpenVINO EP] -e openvino -i \"device_type|CPU_FP32 enable_vpu_fast_compile|true num_of_threads|5 enable_opencl_throttling|true cache_dir|\"<path>\"\"\n"
+      "\t [Example] [For OpenVINO EP] -e openvino -i \"device_type|CPU_FP32 enable_npu_fast_compile|true num_of_threads|5 enable_opencl_throttling|true cache_dir|\"<path>\"\"\n"
      "\t [Example] [For QNN EP] -e qnn -i \"backend_path|/folderpath/libQnnCpu.so\" \n\n"
      "\t    [TensorRT only] [trt_max_partition_iterations]: Maximum iterations for TensorRT parser to get capability.\n"
      "\t    [TensorRT only] [trt_min_subgraph_size]: Minimum size of TensorRT subgraphs.\n"
--- a/onnxruntime/test/perftest/ort_test_session.cc
+++ b/onnxruntime/test/perftest/ort_test_session.cc
@ -240,8 +240,7 @@ OnnxRuntimeTestSession::OnnxRuntimeTestSession(Ort::Env& env, std::random_device
      if (key == "device_type") {
        std::set<std::string> ov_supported_device_types = {"CPU_FP32", "CPU_FP16", "GPU_FP32",
                                                           "GPU.0_FP32", "GPU.1_FP32", "GPU_FP16",
-                                                           "GPU.0_FP16", "GPU.1_FP16",
-                                                           "VPUX_FP16", "VPUX_U8"};
+                                                           "GPU.0_FP16", "GPU.1_FP16"};
        if (ov_supported_device_types.find(value) != ov_supported_device_types.end()) {
          ov_options[key] = value;
        } else if (value.find("HETERO:") == 0) {
@ -254,17 +253,17 @@ OnnxRuntimeTestSession::OnnxRuntimeTestSession(Ort::Env& env, std::random_device
          ORT_THROW(
              "[ERROR] [OpenVINO] You have selcted wrong configuration value for the key 'device_type'. "
              "Select from 'CPU_FP32', 'CPU_FP16', 'GPU_FP32', 'GPU.0_FP32', 'GPU.1_FP32', 'GPU_FP16', "
-              "'GPU.0_FP16', 'GPU.1_FP16', 'VPUX_FP16', 'VPUX_U8' or from"
+              "'GPU.0_FP16', 'GPU.1_FP16' or from"
              " HETERO/MULTI/AUTO options available. \n");
        }
      } else if (key == "device_id") {
        ov_options[key] = value;
-      } else if (key == "enable_vpu_fast_compile") {
+      } else if (key == "enable_npu_fast_compile") {
        if (value == "true" || value == "True" ||
            value == "false" || value == "False") {
          ov_options[key] = value;
        } else {
-          ORT_THROW("[ERROR] [OpenVINO] The value for the key 'enable_vpu_fast_compile' should be a boolean i.e. true or false. Default value is false.\n");
+          ORT_THROW("[ERROR] [OpenVINO] The value for the key 'enable_npu_fast_compile' should be a boolean i.e. true or false. Default value is false.\n");
        }
      } else if (key == "enable_opencl_throttling") {
        if (value == "true" || value == "True" ||
@ -299,7 +298,7 @@ OnnxRuntimeTestSession::OnnxRuntimeTestSession(Ort::Env& env, std::random_device
          ov_options[key] = value;
        }
      } else {
-        ORT_THROW("[ERROR] [OpenVINO] wrong key type entered. Choose from the following runtime key options that are available for OpenVINO. ['device_type', 'device_id', 'enable_vpu_fast_compile', 'num_of_threads', 'cache_dir', 'num_streams', 'enable_opencl_throttling|true'] \n");
+        ORT_THROW("[ERROR] [OpenVINO] wrong key type entered. Choose from the following runtime key options that are available for OpenVINO. ['device_type', 'device_id', 'enable_npu_fast_compile', 'num_of_threads', 'cache_dir', 'num_streams', 'enable_opencl_throttling|true'] \n");
      }
    }
    session_options.AppendExecutionProvider("OpenVINO", ov_options);
--- a/onnxruntime/test/providers/cpu/nn/lp_norm_op_test.cc
+++ b/onnxruntime/test/providers/cpu/nn/lp_norm_op_test.cc
@ -143,7 +143,7 @@ void L1NormalizationWithZeroNorm() {

  vector<T> expected_output = {0.5f, 0.5f, 0.f, 0.f};
  test.AddOutput<T>("Y", input_dims, expected_output);
-  test.Run();
+  test.Run(OpTester::ExpectResult::kExpectSuccess, "", {kOpenVINOExecutionProvider});
 }

 TEST(LpNormalizationTest, L1NormalizationWithZeroNorm) {
@ -163,7 +163,7 @@ void L2NormalizationWithZeroNorm() {

  vector<T> expected_output = {1.f, 0.f, 0.f, 0.f};
  test.AddOutput<T>("Y", input_dims, expected_output);
-  test.Run();
+  test.Run(OpTester::ExpectResult::kExpectSuccess, "", {kOpenVINOExecutionProvider});
 }

 TEST(LpNormalizationTest, L2NormalizationWithZeroNorm) {
--- a/onnxruntime/test/providers/cpu/rnn/rnn_op_test.cc
+++ b/onnxruntime/test/providers/cpu/rnn/rnn_op_test.cc
@ -762,7 +762,7 @@ TEST(RNNTest, RNN_invalid_sequence_lens) {
    test.AddOutput<float>("Y_h", Y_h_dims, Y_h_data);

    // the CUDA RNN version allows the invalid sequence lengths, so disable testing on CUDA and TensorRT
-    test.Run(OpTester::ExpectResult::kExpectFailure, error_msg, {kCudaExecutionProvider, kTensorrtExecutionProvider});
+    test.Run(OpTester::ExpectResult::kExpectFailure, error_msg, {kCudaExecutionProvider, kTensorrtExecutionProvider, kOpenVINOExecutionProvider});
  };

  // should batch batch_size to be valid
@ -860,7 +860,7 @@ TEST(RNNTest, RNN_bidirectional_with_sequence_lens) {

  test.AddOutput<float>("Y_h", Y_h_dims, Y_h_data);

-  test.Run(OpTester::ExpectResult::kExpectSuccess, "", {kCudaExecutionProvider, kTensorrtExecutionProvider});
+  test.Run(OpTester::ExpectResult::kExpectSuccess, "", {kCudaExecutionProvider, kTensorrtExecutionProvider, kOpenVINOExecutionProvider});
 }

 TEST(RNNTest, RNN_with_invalid_activation_load_failure) {
--- a/onnxruntime/test/providers/cpu/tensor/compress_op.test.cc
+++ b/onnxruntime/test/providers/cpu/tensor/compress_op.test.cc
@ -66,7 +66,7 @@ TEST(CompressTest, Compress_3dims_has_extra_condition) {
  // has condition length = 3 > input_dim[axis] = 2
  test.AddInput<bool>("condition", {3}, {0, 1, 1});
  test.AddOutput<float>("output", {2, 1, 3}, {4.0f, 5.0f, 6.0f, 10.0f, 11.0f, 12.0f});
-  test.Run();
+  test.Run(OpTester::ExpectResult::kExpectSuccess, "", {kOpenVINOExecutionProvider});
 }

 TEST(CompressTest, Compress_3dims_has_extra_input) {
--- a/onnxruntime/test/providers/cpu/tensor/unsqueeze_op_test.cc
+++ b/onnxruntime/test/providers/cpu/tensor/unsqueeze_op_test.cc
@ -99,7 +99,7 @@ TEST(TensorOpTest, Unsqueeze_scalar_2) {
    test.AddInput<float>("input", {}, std::vector<float>{1.0f});
    test.AddInput<int64_t>("axes", {2}, std::vector<int64_t>{0, -1}, axes_is_initializer);
    test.AddOutput<float>("output", {1, 1}, std::vector<float>{1.0f});
-    test.Run();
+    test.Run(OpTester::ExpectResult::kExpectSuccess, "", {kOpenVINOExecutionProvider});
  };
  run_test(false);
  run_test(true);
--- a/onnxruntime/test/python/onnx_backend_test_series.py
+++ b/onnxruntime/test/python/onnx_backend_test_series.py
@ -140,6 +140,9 @@ def create_backend_test(test_name=None):
        if backend.supports_device("OPENVINO_CPU_FP16"):
            current_failing_tests += apply_filters(filters, "current_failing_tests_OPENVINO_CPU_FP16")

+        if backend.supports_device("OPENVINO_NPU_FP16"):
+            current_failing_tests += apply_filters(filters, "current_failing_tests_OPENVINO_NPU_FP16")
+
        if backend.supports_device("OPENVINO"):
            current_failing_tests += apply_filters(filters, "current_failing_tests_OPENVINO_opset18")

--- a/onnxruntime/test/testdata/onnx_backend_test_series_filters.jsonc
+++ b/onnxruntime/test/testdata/onnx_backend_test_series_filters.jsonc
@ -521,6 +521,10 @@
        "test_scan_sum_cpu", // Disabled due to output mismatch with tolerance.
        "test_scan9_sum_cpu" // Disabled due to output mismatch with tolerance.
    ],
+    "current_failing_tests_OPENVINO_NPU_FP16": [
+        "^test_prelu_broadcast",
+        "test_loop11_cpu"
+    ],
    "current_failing_tests_OPENVINO_opset18": [
        // pending opset 18 support, RUNTIME_EXCEPTION : Encountered unknown exception in Initialize()
        "^test_center_crop_pad_crop_axes_chw",
--- a/tools/ci_build/build.py
+++ b/tools/ci_build/build.py
@ -66,15 +66,13 @@ _check_python_version()


 def _openvino_verify_device_type(device_read):
-    choices = ["CPU_FP32", "CPU_FP16", "GPU_FP32", "GPU_FP16", "VPUX_FP16", "VPUX_U8"]
+    choices = ["CPU_FP32", "CPU_FP16", "GPU_FP32", "GPU_FP16"]

    choices1 = [
        "CPU_FP32_NO_PARTITION",
        "CPU_FP16_NO_PARTITION",
        "GPU_FP32_NO_PARTITION",
        "GPU_FP16_NO_PARTITION",
-        "VPUX_FP16_NO_PARTITION",
-        "VPUX_U8_NO_PARTITION",
    ]
    status_hetero = True
    res = False
@ -89,7 +87,7 @@ def _openvino_verify_device_type(device_read):
        if len(comma_separated_devices) < 2:
            print("At least two devices required in Hetero/Multi/Auto Mode")
            status_hetero = False
-        dev_options = ["CPU", "GPU", "VPUX"]
+        dev_options = ["CPU", "GPU"]
        for dev in comma_separated_devices:
            if dev not in dev_options:
                status_hetero = False
@ -100,7 +98,7 @@ def _openvino_verify_device_type(device_read):
        print("specify the keyword HETERO or MULTI or AUTO followed by the devices ")
        print("in the order of priority you want to build\n")
        print("The different hardware devices that can be added in HETERO or MULTI or AUTO")
-        print("are ['CPU','GPU', 'VPUX'] \n")
+        print("are ['CPU','GPU'] \n")
        print("An example of how to specify the hetero build type. Ex: HETERO:GPU,CPU \n")
        print("An example of how to specify the MULTI build type. Ex: MULTI:GPU,CPU \n")
        print("An example of how to specify the AUTO build type. Ex: AUTO:GPU,CPU \n")
@ -1158,8 +1156,6 @@ def generate_build_tree(
            "-Donnxruntime_USE_OPENVINO_GPU_FP16=" + ("ON" if args.use_openvino == "GPU_FP16" else "OFF"),
            "-Donnxruntime_USE_OPENVINO_CPU_FP32=" + ("ON" if args.use_openvino == "CPU_FP32" else "OFF"),
            "-Donnxruntime_USE_OPENVINO_CPU_FP16=" + ("ON" if args.use_openvino == "CPU_FP16" else "OFF"),
-            "-Donnxruntime_USE_OPENVINO_VPUX_FP16=" + ("ON" if args.use_openvino == "VPUX_FP16" else "OFF"),
-            "-Donnxruntime_USE_OPENVINO_VPUX_U8=" + ("ON" if args.use_openvino == "VPUX_U8" else "OFF"),
            "-Donnxruntime_USE_OPENVINO_GPU_FP32_NP="
            + ("ON" if args.use_openvino == "GPU_FP32_NO_PARTITION" else "OFF"),
            "-Donnxruntime_USE_OPENVINO_GPU_FP16_NP="
@ -1168,9 +1164,6 @@ def generate_build_tree(
            + ("ON" if args.use_openvino == "CPU_FP32_NO_PARTITION" else "OFF"),
            "-Donnxruntime_USE_OPENVINO_CPU_FP16_NP="
            + ("ON" if args.use_openvino == "CPU_FP16_NO_PARTITION" else "OFF"),
-            "-Donnxruntime_USE_OPENVINO_VPUX_FP16_NP="
-            + ("ON" if args.use_openvino == "VPUX_FP16_NP_PARTITION" else "OFF"),
-            "-Donnxruntime_USE_OPENVINO_VPUX_U8_NP=" + ("ON" if args.use_openvino == "VPUX_U8_NP_PARTITION" else "OFF"),
            "-Donnxruntime_USE_OPENVINO_HETERO=" + ("ON" if args.use_openvino.startswith("HETERO") else "OFF"),
            "-Donnxruntime_USE_OPENVINO_DEVICE=" + (args.use_openvino),
            "-Donnxruntime_USE_OPENVINO_MULTI=" + ("ON" if args.use_openvino.startswith("MULTI") else "OFF"),
--- a/tools/nuget/generate_nuspec_for_native_nuget.py
+++ b/tools/nuget/generate_nuspec_for_native_nuget.py
@ -552,6 +552,7 @@ def generate_files(line_list, args):
                files_list.append(
                    "<file src=" + '"' + os.path.join(args.native_build_path, "onnxruntime.pdb") + runtimes + " />"
                )
+
    else:
        files_list.append(
            "<file src="
@ -706,25 +707,9 @@ def generate_files(line_list, args):
        )

        if is_windows():
-            if "2022" in openvino_path:
-                dll_list_path = os.path.join(openvino_path, "runtime\\bin\\intel64\\Release\\")
-                tbb_list_path = os.path.join(openvino_path, "runtime\\3rdparty\\tbb\\bin\\")
-            else:
-                dll_list_path = os.path.join(
-                    openvino_path, "deployment_tools\\inference_engine\\bin\\intel64\\Release\\"
-                )
-                tbb_list_path = os.path.join(openvino_path, "deployment_tools\\inference_engine\\external\\tbb\\bin\\")
-                ngraph_list_path = os.path.join(openvino_path, "deployment_tools\\ngraph\\lib\\")
-                for ngraph_element in os.listdir(ngraph_list_path):
-                    if ngraph_element.endswith("dll"):
-                        files_list.append(
-                            "<file src="
-                            + '"'
-                            + os.path.join(ngraph_list_path, ngraph_element)
-                            + runtimes_target
-                            + args.target_architecture
-                            + '\\native" />'
-                        )
+            dll_list_path = os.path.join(openvino_path, "runtime\\bin\\intel64\\Release\\")
+            tbb_list_path = os.path.join(openvino_path, "runtime\\3rdparty\\tbb\\bin\\")
+
            for dll_element in os.listdir(dll_list_path):
                if dll_element.endswith("dll"):
                    files_list.append(
@ -735,26 +720,7 @@ def generate_files(line_list, args):
                        + args.target_architecture
                        + '\\native" />'
                    )
-            # plugins.xml
-            files_list.append(
-                "<file src="
-                + '"'
-                + os.path.join(dll_list_path, "plugins.xml")
-                + runtimes_target
-                + args.target_architecture
-                + '\\native" />'
-            )
-            # usb-ma2x8x.mvcmd
-            # OpenVINO 2022.3 doesn't have usb-ma2x8x.mvcmd
-            if "2022.3" not in openvino_path:
-                files_list.append(
-                    "<file src="
-                    + '"'
-                    + os.path.join(dll_list_path, "usb-ma2x8x.mvcmd")
-                    + runtimes_target
-                    + args.target_architecture
-                    + '\\native" />'
-                )
+
            for tbb_element in os.listdir(tbb_list_path):
                if tbb_element.endswith("dll"):
                    files_list.append(