diff --git a/cmake/CMakeLists.txt b/cmake/CMakeLists.txt
index f81a268d38..94181448fd 100644
--- a/cmake/CMakeLists.txt
+++ b/cmake/CMakeLists.txt
@@ -1282,14 +1282,6 @@ if (onnxruntime_USE_OPENVINO)
     add_definitions(-DOPENVINO_CONFIG_CPU_FP16=1)
   endif()
 
-  if (onnxruntime_USE_OPENVINO_VPUX_FP16)
-    add_definitions(-DOPENVINO_CONFIG_VPUX_FP16=1)
-  endif()
-
-  if (onnxruntime_USE_OPENVINO_VPUX_U8)
-    add_definitions(-DOPENVINO_CONFIG_VPUX_U8=1)
-  endif()
-
   if (onnxruntime_USE_OPENVINO_GPU_FP32_NP)
     add_definitions(-DOPENVINO_CONFIG_GPU_FP32=1)
     add_definitions(-DOPENVINO_DISABLE_GRAPH_PARTITION=1)
@@ -1310,16 +1302,6 @@ if (onnxruntime_USE_OPENVINO)
     add_definitions(-DOPENVINO_DISABLE_GRAPH_PARTITION=1)
   endif()
 
-  if (onnxruntime_USE_OPENVINO_VPUX_FP32_NP)
-    add_definitions(-DOPENVINO_CONFIG_VPUX_FP32=1)
-    add_definitions(-DOPENVINO_DISABLE_GRAPH_PARTITION=1)
-  endif()
-
-  if (onnxruntime_USE_OPENVINO_VPUX_FP16_NP)
-    add_definitions(-DOPENVINO_CONFIG_VPUX_FP16=1)
-    add_definitions(-DOPENVINO_DISABLE_GRAPH_PARTITION=1)
-  endif()
-
   if (onnxruntime_USE_OPENVINO_HETERO)
     add_definitions(-DOPENVINO_CONFIG_HETERO=1)
     add_definitions(-DDEVICE_NAME="${onnxruntime_USE_OPENVINO_DEVICE}")
diff --git a/docs/python/ReadMeOV.rst b/docs/python/ReadMeOV.rst
index f12c01d278..6ef16e1378 100644
--- a/docs/python/ReadMeOV.rst
+++ b/docs/python/ReadMeOV.rst
@@ -7,7 +7,6 @@ OpenVINO™ Execution Provider for ONNX Runtime accelerates inference across man
  - Intel® CPUs
  - Intel® integrated GPUs
  - Intel® discrete GPUs
- - Intel® integrated VPUs
 
 Installation
 ------------
@@ -22,7 +21,6 @@ This package supports:
  - Intel® CPUs
  - Intel® integrated GPUs
  - Intel® discrete GPUs
- - Intel® integrated VPUs
 
 ``pip3 install onnxruntime-openvino``
 
diff --git a/include/onnxruntime/core/session/onnxruntime_c_api.h b/include/onnxruntime/core/session/onnxruntime_c_api.h
index 613c1ac93c..729a302f3d 100644
--- a/include/onnxruntime/core/session/onnxruntime_c_api.h
+++ b/include/onnxruntime/core/session/onnxruntime_c_api.h
@@ -611,7 +611,7 @@ typedef struct OrtMIGraphXProviderOptions {
 typedef struct OrtOpenVINOProviderOptions {
 #ifdef __cplusplus
   OrtOpenVINOProviderOptions() : device_type{},
-                                 enable_vpu_fast_compile{},
+                                 enable_npu_fast_compile{},
                                  device_id{},
                                  num_of_threads{},
                                  cache_dir{},
@@ -624,7 +624,7 @@ typedef struct OrtOpenVINOProviderOptions {
    * Valid settings are one of: "CPU_FP32", "CPU_FP16", "GPU_FP32", "GPU_FP16"
    */
   const char* device_type;
-  unsigned char enable_vpu_fast_compile;  ///< 0 = disabled, nonzero = enabled
+  unsigned char enable_npu_fast_compile;  ///< 0 = disabled, nonzero = enabled
   const char* device_id;
   size_t num_of_threads;  ///< 0 = Use default number of threads
   const char* cache_dir;  // path is set to empty by default
diff --git a/onnxruntime/core/providers/openvino/backend_manager.cc b/onnxruntime/core/providers/openvino/backend_manager.cc
index 78467b646b..7e4c0dc8d7 100644
--- a/onnxruntime/core/providers/openvino/backend_manager.cc
+++ b/onnxruntime/core/providers/openvino/backend_manager.cc
@@ -2,9 +2,7 @@
 // Licensed under the MIT License
 
 #include <fstream>
-#include <vector>
-#include <string>
-#include <memory>
+#include <utility>
 
 #include "core/providers/shared_library/provider_api.h"
 #include "contexts.h"
@@ -18,7 +16,8 @@ namespace openvino_ep {
 static std::unique_ptr<GlobalContext> g_global_context;
 
 GlobalContext& BackendManager::GetGlobalContext() {
-  // This is not thread safe to call for the first time, but it is first called on the main thread by the constructor so it is safe.
+  // This is not thread safe to call for the first time,
+  // but it is first called on the main thread by the constructor so it is safe.
   if (!g_global_context)
     g_global_context = std::make_unique<GlobalContext>();
   return *g_global_context;
@@ -88,7 +87,9 @@ BackendManager::BackendManager(const onnxruntime::Node& fused_node,
                          << "Backend created for graph " << subgraph_context_.subgraph_name;
     }
   } else {
-    LOGS_DEFAULT(INFO) << "[OpenVINO-EP] Model has concrete input dims. Initializing backend for graph " << subgraph_context_.subgraph_name;
+    LOGS_DEFAULT(INFO) << "[OpenVINO-EP] Model has concrete input dims. "
+                       << "Initializing backend for graph "
+                       << subgraph_context_.subgraph_name;
 
     subgraph_context_.has_dynamic_input_shape = false;
     try {
@@ -104,7 +105,7 @@ BackendManager::BackendManager(const onnxruntime::Node& fused_node,
 bool BackendManager::ModelHasBatchedInputs(const ONNX_NAMESPACE::ModelProto& model_proto) const {
   bool has_batched_inputs = true;
 
-  for (int i = 0; i < (int)subgraph_context_.input_indexes.size(); i++) {
+  for (int i = 0; i < static_cast<int>(subgraph_context_.input_indexes.size()); i++) {
     auto& input = model_proto.graph().input(subgraph_context_.input_indexes[i]);
 
     // Batch-process only raw image inputs (NCHW or NHWC layouts)
@@ -215,7 +216,10 @@ BackendManager::ReWriteInputShapeInfo(const ONNX_NAMESPACE::ModelProto& model_pr
   auto graph_proto = model_copy->mutable_graph();
 
   for (size_t i = 0, limit = input_shapes.size(); i < limit; i++) {
-    auto g_in_shape = graph_proto->mutable_input((int)i)->mutable_type()->mutable_tensor_type()->mutable_shape();
+    auto g_in_shape = graph_proto->mutable_input(static_cast<int>(i))
+                          ->mutable_type()
+                          ->mutable_tensor_type()
+                          ->mutable_shape();
     g_in_shape->clear_dim();
     const auto& shape = input_shapes[i];
     for (size_t dim = 0, end = shape.size(); dim < end; dim++) {
@@ -234,7 +238,11 @@ BackendManager::ReWriteBatchDimWithOne(const ONNX_NAMESPACE::ModelProto& model_p
   auto graph_proto = model_copy->mutable_graph();
 
   for (int i = 0; i < graph_proto->input_size(); i++) {
-    ONNX_NAMESPACE::TensorShapeProto* g_in_shape = graph_proto->mutable_input((int)i)->mutable_type()->mutable_tensor_type()->mutable_shape();
+    ONNX_NAMESPACE::TensorShapeProto* g_in_shape =
+        graph_proto->mutable_input(static_cast<int>(i))
+            ->mutable_type()
+            ->mutable_tensor_type()
+            ->mutable_shape();
     g_in_shape->mutable_dim(0)->clear_dim_value();
     g_in_shape->mutable_dim(0)->set_dim_value(1);
   }
diff --git a/onnxruntime/core/providers/openvino/backend_manager.h b/onnxruntime/core/providers/openvino/backend_manager.h
index c247ab60d3..a177324b23 100644
--- a/onnxruntime/core/providers/openvino/backend_manager.h
+++ b/onnxruntime/core/providers/openvino/backend_manager.h
@@ -3,6 +3,11 @@
 
 #pragma once
 
+#include <vector>
+#include <map>
+#include <memory>
+#include <string>
+
 #include "ov_interface.h"
 #include "contexts.h"
 #include "ibackend.h"
@@ -13,7 +18,9 @@ namespace openvino_ep {
 // Singleton class that manages all the backends
 class BackendManager {
  public:
-  BackendManager(const onnxruntime::Node& fused_node, const onnxruntime::GraphViewer& subgraph, const logging::Logger& logger);
+  BackendManager(const onnxruntime::Node& fused_node,
+                 const onnxruntime::GraphViewer& subgraph,
+                 const logging::Logger& logger);
   void Compute(OrtKernelContext* context);
   void ShutdownBackendManager();
   static GlobalContext& GetGlobalContext();
@@ -21,7 +28,9 @@ class BackendManager {
 
  private:
   std::unique_ptr<ONNX_NAMESPACE::ModelProto> GetModelProtoFromFusedNode(
-      const onnxruntime::Node& fused_node, const onnxruntime::GraphViewer& subgraph, const logging::Logger& logger) const;
+      const onnxruntime::Node& fused_node,
+      const onnxruntime::GraphViewer& subgraph,
+      const logging::Logger& logger) const;
   bool ModelHasSymbolicInputDims(const onnxruntime::GraphViewer& subgraph) const;
   bool ModelHasBatchedInputs(const ONNX_NAMESPACE::ModelProto& model_proto) const;
 
diff --git a/onnxruntime/core/providers/openvino/backend_utils.cc b/onnxruntime/core/providers/openvino/backend_utils.cc
index d49968cdb7..d47c91dd46 100644
--- a/onnxruntime/core/providers/openvino/backend_utils.cc
+++ b/onnxruntime/core/providers/openvino/backend_utils.cc
@@ -1,9 +1,7 @@
 // Copyright (C) 2019-2022 Intel Corporation
 // Licensed under the MIT License
 
-#include <map>
-#include <string>
-#include <memory>
+#include <algorithm>
 #include <sstream>
 #include <fstream>
 
@@ -58,7 +56,7 @@ CreateOVModel(const ONNX_NAMESPACE::ModelProto& model_proto, const GlobalContext
   try {
     auto cnn_network = global_context.ie_core.ReadModel(model);
     if ((subgraph_context.precision == "FP16") &&
-        (global_context.device_type.find("VPUX") == std::string::npos)) {
+        (global_context.device_type.find("NPU") == std::string::npos)) {
       // FP16 transformations
       ov::pass::ConvertFP32ToFP16 pass_obj;
       pass_obj.run_on_model(cnn_network);
@@ -88,7 +86,8 @@ CreateOVModel(const ONNX_NAMESPACE::ModelProto& model_proto, const GlobalContext
       size_t index = results.size() - 1;
 
       for (auto it = results.rbegin(); it != results.rend(); ++it) {
-        if (auto const_node = std::dynamic_pointer_cast<ov::op::v0::Constant>((*it)->input_value(0).get_node_shared_ptr())) {
+        if (auto const_node =
+                std::dynamic_pointer_cast<ov::op::v0::Constant>((*it)->input_value(0).get_node_shared_ptr())) {
           const_outputs_map[(*it)->get_friendly_name()] = const_node;
           results.erase(results.begin() + index);
         }
@@ -254,7 +253,7 @@ void FillOutputBlob(OVTensorPtr outputBlob, Ort::UnownedValue& output_tensor,
 
 void printPerformanceCounts(const std::vector<OVProfilingInfo>& performanceMap,
                             std::ostream& stream, std::string deviceName) {
-  long long totalTime = 0;
+  int64_t totalTime = 0;
   // Print performance counts
   stream << std::endl
          << "performance counts:" << std::endl
diff --git a/onnxruntime/core/providers/openvino/backend_utils.h b/onnxruntime/core/providers/openvino/backend_utils.h
index de78a150fe..82b0351e87 100644
--- a/onnxruntime/core/providers/openvino/backend_utils.h
+++ b/onnxruntime/core/providers/openvino/backend_utils.h
@@ -4,9 +4,15 @@
 #pragma once
 
 #define ORT_API_MANUAL_INIT
+#include <iomanip>
+#include <unordered_map>
+#include <map>
+#include <memory>
+#include <vector>
+#include <string>
+
 #include "core/session/onnxruntime_cxx_api.h"
 #include "contexts.h"
-#include <iomanip>
 #include "ov_interface.h"
 #ifdef _WIN32
 #include <direct.h>
@@ -57,7 +63,9 @@ void FillOutputBlob(OVTensorPtr outputBlob, Ort::UnownedValue& output_tensor,
                     size_t batch_slice_idx);
 
 std::shared_ptr<OVNetwork>
-CreateOVModel(const ONNX_NAMESPACE::ModelProto& model_proto, const GlobalContext& global_context, const SubGraphContext& subgraph_context,
+CreateOVModel(const ONNX_NAMESPACE::ModelProto& model_proto,
+              const GlobalContext& global_context,
+              const SubGraphContext& subgraph_context,
               std::map<std::string, std::shared_ptr<ov::Node>>& const_outputs_map);
 
 void printPerformanceCounts(const std::vector<OVProfilingInfo>& performanceMap,
diff --git a/onnxruntime/core/providers/openvino/backends/backend_factory.cc b/onnxruntime/core/providers/openvino/backends/backend_factory.cc
index c339f24e70..c586dd8b38 100644
--- a/onnxruntime/core/providers/openvino/backends/backend_factory.cc
+++ b/onnxruntime/core/providers/openvino/backends/backend_factory.cc
@@ -16,7 +16,7 @@ BackendFactory::MakeBackend(const ONNX_NAMESPACE::ModelProto& model_proto,
                             const SubGraphContext& subgraph_context) {
   std::string type = global_context.device_type;
   if (type == "CPU" || type.find("GPU") != std::string::npos ||
-      type.find("VPUX") != std::string::npos ||
+      type.find("NPU") != std::string::npos ||
       type.find("HETERO") != std::string::npos ||
       type.find("MULTI") != std::string::npos ||
       type.find("AUTO") != std::string::npos) {
diff --git a/onnxruntime/core/providers/openvino/backends/basic_backend.cc b/onnxruntime/core/providers/openvino/backends/basic_backend.cc
index f9517d7942..09e1322ff5 100644
--- a/onnxruntime/core/providers/openvino/backends/basic_backend.cc
+++ b/onnxruntime/core/providers/openvino/backends/basic_backend.cc
@@ -6,10 +6,10 @@
 #include <memory>
 #include <sstream>
 #include <fstream>
+#include <utility>
 
 #include "core/providers/shared_library/provider_api.h"
 #include "../backend_utils.h"
-// #include <ngraph/pass/constant_folding.hpp>
 #include "basic_backend.h"
 #include "../backend_manager.h"
 
@@ -57,33 +57,39 @@ BasicBackend::BasicBackend(const ONNX_NAMESPACE::ModelProto& model_proto,
         cl_context ctx = static_cast<cl_context>(global_context_.context);
         remote_context_ = new ov::intel_gpu::ocl::ClContext(global_context_.ie_core.Get(), ctx);
         ie_cnn_network_ = CreateOVModel(model_proto, global_context_, subgraph_context_, const_outputs_map_);
-        exe_network_ = global_context_.ie_core.LoadNetwork(ie_cnn_network_, remote_context_, subgraph_context_.subgraph_name);
+        exe_network_ = global_context_.ie_core.LoadNetwork(
+            ie_cnn_network_, remote_context_, subgraph_context_.subgraph_name);
         LOGS_DEFAULT(INFO) << log_tag << "Loaded model to the plugin";
       } else {
         ie_cnn_network_ = CreateOVModel(model_proto, global_context_, subgraph_context_, const_outputs_map_);
-        exe_network_ = global_context_.ie_core.LoadNetwork(ie_cnn_network_, hw_target, device_config, subgraph_context_.subgraph_name);
+        exe_network_ = global_context_.ie_core.LoadNetwork(
+            ie_cnn_network_, hw_target, device_config, subgraph_context_.subgraph_name);
         LOGS_DEFAULT(INFO) << log_tag << "Loaded model to the plugin";
       }
 #else
 #if defined(OPENVINO_2023_0) || (OPENVINO_2023_1)
       if (!subgraph_context_.has_dynamic_input_shape && dev_prec != "CPU_FP16") {
         const std::string model = model_proto.SerializeAsString();
-        exe_network_ = global_context_.ie_core.LoadNetwork(model, hw_target, device_config, subgraph_context_.subgraph_name);
+        exe_network_ = global_context_.ie_core.LoadNetwork(
+            model, hw_target, device_config, subgraph_context_.subgraph_name);
         LOGS_DEFAULT(INFO) << log_tag << "Loaded model to the plugin";
       } else {
         ie_cnn_network_ = CreateOVModel(model_proto, global_context_, subgraph_context_, const_outputs_map_);
-        exe_network_ = global_context_.ie_core.LoadNetwork(ie_cnn_network_, hw_target, device_config, subgraph_context_.subgraph_name);
+        exe_network_ = global_context_.ie_core.LoadNetwork(
+            ie_cnn_network_, hw_target, device_config, subgraph_context_.subgraph_name);
         LOGS_DEFAULT(INFO) << log_tag << "Loaded model to the plugin";
       }
 #else
       ie_cnn_network_ = CreateOVModel(model_proto, global_context_, subgraph_context_, const_outputs_map_);
-      exe_network_ = global_context_.ie_core.LoadNetwork(ie_cnn_network_, hw_target, device_config, subgraph_context_.subgraph_name);
+      exe_network_ = global_context_.ie_core.LoadNetwork(
+          ie_cnn_network_, hw_target, device_config, subgraph_context_.subgraph_name);
       LOGS_DEFAULT(INFO) << log_tag << "Loaded model to the plugin";
 #endif
 #endif
     } else {
       ie_cnn_network_ = CreateOVModel(model_proto, global_context_, subgraph_context_, const_outputs_map_);
-      exe_network_ = global_context_.ie_core.LoadNetwork(ie_cnn_network_, hw_target, device_config, subgraph_context_.subgraph_name);
+      exe_network_ = global_context_.ie_core.LoadNetwork(
+          ie_cnn_network_, hw_target, device_config, subgraph_context_.subgraph_name);
       LOGS_DEFAULT(INFO) << log_tag << "Loaded model to the plugin";
     }
   } catch (const char* msg) {
@@ -127,10 +133,10 @@ void BasicBackend::PopulateConfigValue(ov::AnyMap& device_config) {
   }
 #endif
 #if defined(OPENVINO_2023_0) || (OPENVINO_2023_1)
-  if (global_context_.device_type.find("VPUX") != std::string::npos) {
+  if (global_context_.device_type.find("NPU") != std::string::npos) {
     std::pair<std::string, ov::Any> device_property;
-    device_property = std::make_pair("VPU_COMPILER_TYPE", "MLIR");
-    device_config.emplace(ov::device::properties("VPUX", device_property));
+    device_property = std::make_pair("NPU_COMPILER_TYPE", "DRIVER");
+    device_config.emplace(ov::device::properties("NPU", device_property));
   }
 #endif
 }
@@ -152,12 +158,12 @@ void BasicBackend::EnableCaching() {
 }
 
 void BasicBackend::EnableGPUThrottling(ov::AnyMap& device_config) {
-  if (global_context_.enable_opencl_throttling == true && global_context_.device_type.find("GPU") != std::string::npos) {
+  if (global_context_.enable_opencl_throttling == true &&
+      global_context_.device_type.find("GPU") != std::string::npos) {
     LOGS_DEFAULT(INFO) << log_tag << "Enabled OpenCL queue throttling for GPU device";
     std::pair<std::string, ov::Any> device_property;
     device_property = std::make_pair("PLUGIN_THROTTLE", "1");
     device_config.emplace(ov::device::properties("GPU_CONFIG_KEY", device_property));
-    // device_config[GPU_CONFIG_KEY(PLUGIN_THROTTLE)] = "1";
   }
 }
 
@@ -187,7 +193,9 @@ void BasicBackend::StartAsyncInference(Ort::KernelContext& context, OVInferReque
       if (input_names.find(onnx_input_name) != input_names.end()) {
         input_name = onnx_input_name;
       } else {
-        throw(log_tag + "Input names mismatch between OpenVINO and ONNX. " + onnx_input_name + " doesn't exist in the list of OpenVINO input tensor names");
+        throw(log_tag +
+              "Input names mismatch between OpenVINO and ONNX. " + onnx_input_name +
+              " doesn't exist in the list of OpenVINO input tensor names");
       }
       size_t batch_slice_idx = 0;
       if (subgraph_context_.has_dynamic_input_shape &&
@@ -197,6 +205,7 @@ void BasicBackend::StartAsyncInference(Ort::KernelContext& context, OVInferReque
         auto tensor_info = tensor.GetTensorTypeAndShapeInfo();
         auto tensor_shape = tensor_info.GetShape();
         auto tensor_size = tensor_shape.size();
+        const char* tensor_data = tensor.GetTensorData<char>();
         auto tensor_iter = 0;
         ov::Shape input_tensor_shape = ov::Shape(tensor_size, 0);
         for (auto i = tensor_shape.begin(); i != tensor_shape.end(); ++i) {
@@ -204,8 +213,16 @@ void BasicBackend::StartAsyncInference(Ort::KernelContext& context, OVInferReque
           tensor_iter += 1;
         }
         auto input = ie_cnn_network_->get_parameters().at(input_idx);
-        OVTensorPtr tensor_ptr = std::make_shared<ov::Tensor>(input->get_element_type(), input_tensor_shape);
-        FillInputBlob(tensor_ptr, batch_slice_idx, input_name, context, subgraph_context_);
+        OVTensorPtr tensor_ptr;
+        // avoid input copies on the CPU device
+        if (global_context_.device_type.find("CPU") != std::string::npos) {
+          tensor_ptr = std::make_shared<ov::Tensor>(input->get_element_type(), input_tensor_shape,
+                                                    (void*)tensor_data);
+        } else {
+          tensor_ptr = std::make_shared<ov::Tensor>(input->get_element_type(), input_tensor_shape);
+          FillInputBlob(tensor_ptr, batch_slice_idx, input_name, context, subgraph_context_);
+        }
+
         try {
           infer_request->SetTensor(input_name, tensor_ptr);
         } catch (const char* msg) {
@@ -251,7 +268,10 @@ void BasicBackend::StartRemoteAsyncInference(Ort::KernelContext& context, OVInfe
       if (input_names.find(onnx_input_name) != input_names.end()) {
         input_name = onnx_input_name;
       } else {
-        throw(log_tag + "Input names mismatch between OpenVINO and ONNX. " + onnx_input_name + " doesn't exist in the list of OpenVINO input tensor names");
+        throw(log_tag +
+              "Input names mismatch between OpenVINO and ONNX. " +
+              onnx_input_name +
+              " doesn't exist in the list of OpenVINO input tensor names");
       }
       input_idx++;
       // Kernel Context Input Buffer
@@ -264,9 +284,10 @@ void BasicBackend::StartRemoteAsyncInference(Ort::KernelContext& context, OVInfe
         const cl::Buffer* shared_buffer_const = static_cast<const cl::Buffer*>(tensor_data);
         // Create an Input Remote Blob
         auto input = ie_cnn_network_->get_parameters().at(0);
-        auto remote_blob = remote_context_->create_tensor(input->get_element_type(), input->get_shape(), *shared_buffer_const);
-        ov::Tensor tensor = static_cast<ov::Tensor>(remote_blob);
-        OVTensorPtr tensor_ptr = std::make_shared<ov::Tensor>(tensor);
+        auto remote_blob = remote_context_->create_tensor(
+            input->get_element_type(), input->get_shape(), *shared_buffer_const);
+        ov::Tensor tensor_remote = static_cast<ov::Tensor>(remote_blob);
+        OVTensorPtr tensor_ptr = std::make_shared<ov::Tensor>(tensor_remote);
         infer_request->SetTensor(input_name, tensor_ptr);
       } else {
         OVTensorPtr graph_input_blob;
@@ -295,7 +316,10 @@ void BasicBackend::StartRemoteAsyncInference(Ort::KernelContext& context, OVInfe
         }
       }
       if (!output_name_found) {
-        throw std::string(log_tag + "Output names mismatch between OpenVINO and ONNX. [ONNX Output: ] " + onnx_output_name + " doesn't exist in the list of OpenVINO output tensor names");
+        throw std::string(
+            log_tag +
+            "Output names mismatch between OpenVINO and ONNX. [ONNX Output: ] " +
+            onnx_output_name + " doesn't exist in the list of OpenVINO output tensor names");
       }
 
       size_t batch_size = 1;
@@ -307,9 +331,10 @@ void BasicBackend::StartRemoteAsyncInference(Ort::KernelContext& context, OVInfe
         const cl::Buffer* shared_buffer_const = static_cast<const cl::Buffer*>(tensor_data);
         // Create a shared Blob, set the Infer Request Output Blob
         auto output = ie_cnn_network_->get_results().at(0);
-        auto remote_tensor = remote_context_->create_tensor(output->get_element_type(), output->get_shape(), *shared_buffer_const);
-        ov::Tensor tensor = static_cast<ov::Tensor>(remote_tensor);
-        OVTensorPtr tensor_ptr = std::make_shared<ov::Tensor>(tensor);
+        auto remote_tensor =
+            remote_context_->create_tensor(output->get_element_type(), output->get_shape(), *shared_buffer_const);
+        ov::Tensor tensor_t = static_cast<ov::Tensor>(remote_tensor);
+        OVTensorPtr tensor_ptr = std::make_shared<ov::Tensor>(tensor_t);
         try {
           infer_request->SetTensor(output_name, tensor_ptr);
         } catch (const char* msg) {
@@ -364,7 +389,8 @@ void BasicBackend::CompleteAsyncInference(Ort::KernelContext& context, OVInferRe
         throw(msg);
       }
       size_t batch_size = 1;
-      auto output_tensor = GetOutputTensor(context, batch_size, infer_request, output_name, subgraph_context_.output_names);
+      auto output_tensor =
+          GetOutputTensor(context, batch_size, infer_request, output_name, subgraph_context_.output_names);
       auto mem_info = output_tensor.GetTensorMemoryInfo();
       if (mem_info.GetAllocatorName() == OpenVINO_GPU) {
         return;
@@ -465,7 +491,8 @@ void BasicBackend::Infer(OrtKernelContext* ctx) {
 #ifndef IO_BUFFER_ENABLED  // Printing performance counts is disabled when IO_BUFFER_ENABLED
     if (openvino_ep::backend_utils::IsDebugEnabled()) {
       inferRequestsQueue_->printstatus();  // Printing the elements of infer_requests_ vector pool only in debug mode
-      std::string& hw_target = (global_context_.device_id != "") ? global_context_.device_id : global_context_.device_type;
+      std::string& hw_target =
+          (global_context_.device_id != "") ? global_context_.device_id : global_context_.device_type;
       printPerformanceCounts(infer_request, std::cout, hw_target);
     }
 #endif
diff --git a/onnxruntime/core/providers/openvino/backends/basic_backend.h b/onnxruntime/core/providers/openvino/backends/basic_backend.h
index 2f1d603640..6eda641451 100644
--- a/onnxruntime/core/providers/openvino/backends/basic_backend.h
+++ b/onnxruntime/core/providers/openvino/backends/basic_backend.h
@@ -6,16 +6,17 @@
 #include <memory>
 
 #define ORT_API_MANUAL_INIT
-#include "core/session/onnxruntime_cxx_api.h"
-#include "core/providers/openvino/contexts.h"
-#include "core/providers/openvino/ibackend.h"
-#include "core/providers/openvino/ov_interface.h"
 #include <vector>
 #include <iostream>
 #include <string>
 #include <condition_variable>
 #include <mutex>
 
+#include "core/session/onnxruntime_cxx_api.h"
+#include "core/providers/openvino/contexts.h"
+#include "core/providers/openvino/ibackend.h"
+#include "core/providers/openvino/ov_interface.h"
+
 namespace onnxruntime {
 namespace openvino_ep {
 
@@ -29,7 +30,7 @@ class BasicBackend : public IBackend {
   void Infer(OrtKernelContext* context) override;
 
  private:
-  bool ImportBlob(std::string hw_target, bool vpu_status);
+  bool ImportBlob(std::string hw_target, bool npu_status);
   void PopulateCompiledDirectory(std::string, std::string&, std::string&, bool&);
   bool ValidateSubgraph(std::map<std::string, std::shared_ptr<ov::Node>>& const_outputs_map);
   void PopulateConfigValue(ov::AnyMap& device_config);
diff --git a/onnxruntime/core/providers/openvino/contexts.h b/onnxruntime/core/providers/openvino/contexts.h
index b61dcf8ca4..29233e72c3 100644
--- a/onnxruntime/core/providers/openvino/contexts.h
+++ b/onnxruntime/core/providers/openvino/contexts.h
@@ -3,6 +3,9 @@
 
 #pragma once
 
+#include <vector>
+#include <unordered_map>
+#include <string>
 #include "ov_interface.h"
 
 namespace onnxruntime {
@@ -12,7 +15,7 @@ namespace openvino_ep {
 struct GlobalContext {
   OVCore ie_core;
   bool is_wholly_supported_graph = false;
-  bool enable_vpu_fast_compile = false;
+  bool enable_npu_fast_compile = false;
   bool enable_opencl_throttling = false;
   bool enable_dynamic_shapes = false;
   size_t num_of_threads;
@@ -34,7 +37,7 @@ struct GlobalContext {
 struct SubGraphContext {
   bool has_dynamic_input_shape = false;
   bool enable_batching = false;
-  bool set_vpu_config = false;
+  bool set_npu_config = false;
   bool is_constant = false;
   void* context = 0;
   std::string subgraph_name;
diff --git a/onnxruntime/core/providers/openvino/openvino_execution_provider.cc b/onnxruntime/core/providers/openvino/openvino_execution_provider.cc
index 9908099262..a4c6b0f851 100644
--- a/onnxruntime/core/providers/openvino/openvino_execution_provider.cc
+++ b/onnxruntime/core/providers/openvino/openvino_execution_provider.cc
@@ -17,17 +17,18 @@ OpenVINOExecutionProvider::OpenVINOExecutionProvider(const OpenVINOExecutionProv
 
   openvino_ep::BackendManager::GetGlobalContext().device_type = info.device_type_;
   openvino_ep::BackendManager::GetGlobalContext().precision_str = info.precision_;
-  openvino_ep::BackendManager::GetGlobalContext().enable_vpu_fast_compile = info.enable_vpu_fast_compile_;
+  openvino_ep::BackendManager::GetGlobalContext().enable_npu_fast_compile = info.enable_npu_fast_compile_;
   openvino_ep::BackendManager::GetGlobalContext().cache_dir = info.cache_dir_;
   openvino_ep::BackendManager::GetGlobalContext().num_streams = info.num_streams_;
   openvino_ep::BackendManager::GetGlobalContext().context = info.context_;
   openvino_ep::BackendManager::GetGlobalContext().enable_opencl_throttling = info.enable_opencl_throttling_;
   openvino_ep::BackendManager::GetGlobalContext().enable_dynamic_shapes = info.enable_dynamic_shapes_;
 
-  if ((int)info.num_of_threads_ <= 0) {
+  if (static_cast<int>(info.num_of_threads_) <= 0) {
     openvino_ep::BackendManager::GetGlobalContext().num_of_threads = 8;
-  } else if ((int)info.num_of_threads_ > 8) {
-    std::string err_msg = std::string("\n [ERROR] num_of_threads configured during runtime is: ") + std::to_string(info.num_of_threads_) + "\nnum_of_threads configured should be >0 and <=8.\n";
+  } else if (static_cast<int>(info.num_of_threads_) > 8) {
+    std::string err_msg = std::string("\n [ERROR] num_of_threads configured during runtime is: ") +
+                          std::to_string(info.num_of_threads_) + "\nnum_of_threads configured should be >0 and <=8.\n";
     ORT_THROW(err_msg);
   } else {
     openvino_ep::BackendManager::GetGlobalContext().num_of_threads = info.num_of_threads_;
@@ -56,7 +57,8 @@ OpenVINOExecutionProvider::OpenVINOExecutionProvider(const OpenVINOExecutionProv
               device_found = true;
               break;
             }
-            if (info.device_type_.find("VPUX") != std::string::npos && (info.precision_ == "FP16" || info.precision_ == "U8")) {
+            if ((info.device_type_.find("NPU") != std::string::npos) &&
+                (info.precision_ == "FP16" || info.precision_ == "U8")) {
               device_found = true;
               break;
             }
@@ -109,11 +111,14 @@ OpenVINOExecutionProvider::GetCapability(const GraphViewer& graph_viewer,
   openvino_ep::BackendManager::GetGlobalContext().onnx_model_name = graph_viewer.Name();
 #ifdef _WIN32
   std::wstring onnx_path = graph_viewer.ModelPath().ToPathString();
-  openvino_ep::BackendManager::GetGlobalContext().onnx_model_path_name = std::string(onnx_path.begin(), onnx_path.end());
+  openvino_ep::BackendManager::GetGlobalContext().onnx_model_path_name =
+      std::string(onnx_path.begin(), onnx_path.end());
 #else
-  openvino_ep::BackendManager::GetGlobalContext().onnx_model_path_name = graph_viewer.ModelPath().ToPathString();
+  openvino_ep::BackendManager::GetGlobalContext().onnx_model_path_name =
+      graph_viewer.ModelPath().ToPathString();
 #endif
-  openvino_ep::BackendManager::GetGlobalContext().onnx_opset_version = graph_viewer.DomainToVersionMap().at(kOnnxDomain);
+  openvino_ep::BackendManager::GetGlobalContext().onnx_opset_version =
+      graph_viewer.DomainToVersionMap().at(kOnnxDomain);
 
 #if defined(OPENVINO_2022_1)
   openvino_ep::GetCapability obj(graph_viewer,
@@ -151,7 +156,8 @@ common::Status OpenVINOExecutionProvider::Compile(
 
     openvino_ep::BackendManager::GetGlobalContext().use_api_2 = true;
 
-    std::shared_ptr<openvino_ep::BackendManager> backend_manager = std::make_shared<openvino_ep::BackendManager>(fused_node, graph_body_viewer, *GetLogger());
+    std::shared_ptr<openvino_ep::BackendManager> backend_manager =
+        std::make_shared<openvino_ep::BackendManager>(fused_node, graph_body_viewer, *GetLogger());
 
     compute_info.create_state_func =
         [backend_manager](ComputeContext* context, FunctionState* state) {
diff --git a/onnxruntime/core/providers/openvino/openvino_execution_provider.h b/onnxruntime/core/providers/openvino/openvino_execution_provider.h
index a4fc09362f..3b56b54410 100644
--- a/onnxruntime/core/providers/openvino/openvino_execution_provider.h
+++ b/onnxruntime/core/providers/openvino/openvino_execution_provider.h
@@ -3,19 +3,28 @@
 
 #pragma once
 
-#include "backend_manager.h"
 #include <map>
 #include <algorithm>
 #include <iostream>
+#include <string>
+#include <memory>
+#include <vector>
+
+#include "backend_manager.h"
 
 namespace onnxruntime {
 
 static void print_build_options() {
   std::cout << "[ERROR] INVALID DEVICE BUILD TYPE SPECIFIED" << std::endl;
-  std::cout << "Specify the keyword HETERO (or) MULTI (or) AUTO followed by the devices in the order of priority you want to build" << std::endl;
-  std::cout << "The different hardware devices that can be added with HETERO/MULTI/AUTO build ";
-  std::cout << "are ['CPU','GPU','VPUX']" << std::endl;
-  std::cout << "An example of how to specify the HETERO or MULTI or AUTO build type. Ex: HETERO:GPU,CPU  Ex: MULTI:GPU,CPU Ex: AUTO:GPU,CPU" << std::endl;
+  std::cout << "Specify the keyword HETERO (or) MULTI (or) AUTO followed by the devices in the order of priority "
+            << "you want to build"
+            << std::endl;
+  std::cout << "The different hardware devices that can be added with HETERO/MULTI/AUTO build "
+            << "are ['CPU','GPU']"
+            << std::endl;
+  std::cout << "An example of how to specify the HETERO or MULTI or AUTO build type. "
+            << "Ex: HETERO:GPU,CPU  Ex: MULTI:GPU,CPU Ex: AUTO:GPU,CPU"
+            << std::endl;
 }
 
 static std::vector<std::string> split(const std::string& s, char delim) {
@@ -39,7 +48,7 @@ static std::vector<std::string> parseDevices(const std::string& device_string) {
     print_build_options();
     ORT_THROW("Invalid device string: " + device_string);
   }
-  std::vector<std::string> dev_options = {"CPU", "GPU", "VPUX"};
+  std::vector<std::string> dev_options = {"CPU", "GPU"};
   for (std::string dev : devices) {
     if (!std::count(dev_options.begin(), dev_options.end(), dev)) {
       print_build_options();
@@ -53,7 +62,7 @@ static std::vector<std::string> parseDevices(const std::string& device_string) {
 struct OpenVINOExecutionProviderInfo {
   std::string device_type_;
   std::string precision_;
-  bool enable_vpu_fast_compile_;
+  bool enable_npu_fast_compile_;
   std::string device_id_;
   size_t num_of_threads_;
   std::string cache_dir_;
@@ -62,11 +71,18 @@ struct OpenVINOExecutionProviderInfo {
   bool enable_opencl_throttling_;
   bool enable_dynamic_shapes_;
 
-  explicit OpenVINOExecutionProviderInfo(std::string dev_type, bool enable_vpu_fast_compile, std::string dev_id,
+  explicit OpenVINOExecutionProviderInfo(std::string dev_type, bool enable_npu_fast_compile, std::string dev_id,
                                          size_t num_of_threads, std::string cache_dir, int num_streams,
                                          void* context, bool enable_opencl_throttling,
                                          bool enable_dynamic_shapes)
-      : enable_vpu_fast_compile_(enable_vpu_fast_compile), device_id_(dev_id), num_of_threads_(num_of_threads), cache_dir_(cache_dir), num_streams_(num_streams), context_(context), enable_opencl_throttling_(enable_opencl_throttling), enable_dynamic_shapes_(enable_dynamic_shapes) {
+      : enable_npu_fast_compile_(enable_npu_fast_compile),
+        device_id_(dev_id),
+        num_of_threads_(num_of_threads),
+        cache_dir_(cache_dir),
+        num_streams_(num_streams),
+        context_(context),
+        enable_opencl_throttling_(enable_opencl_throttling),
+        enable_dynamic_shapes_(enable_dynamic_shapes) {
     if (dev_type == "") {
       LOGS_DEFAULT(INFO) << "[OpenVINO-EP]"
                          << "No runtime device selection option provided.";
@@ -82,11 +98,11 @@ struct OpenVINOExecutionProviderInfo {
 #elif defined OPENVINO_CONFIG_GPU_FP16
       device_type_ = "GPU";
       precision_ = "FP16";
-#elif defined OPENVINO_CONFIG_VPUX_FP16
-      device_type_ = "VPUX";
+#elif defined OPENVINO_CONFIG_NPU_FP16
+      device_type_ = "NPU";
       precision_ = "FP16";
-#elif defined OPENVINO_CONFIG_VPUX_U8
-      device_type_ = "VPUX";
+#elif defined OPENVINO_CONFIG_NPU_U8
+      device_type_ = "NPU";
       precision_ = "U8";
 #elif defined OPENVINO_CONFIG_HETERO || defined OPENVINO_CONFIG_MULTI || defined OPENVINO_CONFIG_AUTO
 #ifdef DEVICE_NAME
@@ -126,11 +142,11 @@ struct OpenVINOExecutionProviderInfo {
     } else if (dev_type == "GPU.1_FP16") {
       device_type_ = "GPU.1";
       precision_ = "FP16";
-    } else if (dev_type == "VPUX_FP16") {
-      device_type_ = "VPUX";
+    } else if (dev_type == "NPU_FP16") {
+      device_type_ = "NPU";
       precision_ = "FP16";
-    } else if (dev_type == "VPUX_U8") {
-      device_type_ = "VPUX";
+    } else if (dev_type == "NPU_U8") {
+      device_type_ = "NPU";
       precision_ = "U8";
     } else if (dev_type.find("HETERO") == 0 || dev_type.find("MULTI") == 0) {
       std::vector<std::string> devices = parseDevices(dev_type);
diff --git a/onnxruntime/core/providers/openvino/openvino_provider_factory.cc b/onnxruntime/core/providers/openvino/openvino_provider_factory.cc
index 95b39bcc05..fbb89710c8 100644
--- a/onnxruntime/core/providers/openvino/openvino_provider_factory.cc
+++ b/onnxruntime/core/providers/openvino/openvino_provider_factory.cc
@@ -8,11 +8,16 @@
 
 namespace onnxruntime {
 struct OpenVINOProviderFactory : IExecutionProviderFactory {
-  OpenVINOProviderFactory(const char* device_type, bool enable_vpu_fast_compile,
+  OpenVINOProviderFactory(const char* device_type, bool enable_npu_fast_compile,
                           const char* device_id, size_t num_of_threads,
                           const char* cache_dir, int num_streams, void* context,
                           bool enable_opencl_throttling, bool enable_dynamic_shapes)
-      : enable_vpu_fast_compile_(enable_vpu_fast_compile), num_of_threads_(num_of_threads), num_streams_(num_streams), context_(context), enable_opencl_throttling_(enable_opencl_throttling), enable_dynamic_shapes_(enable_dynamic_shapes) {
+      : enable_npu_fast_compile_(enable_npu_fast_compile),
+        num_of_threads_(num_of_threads),
+        num_streams_(num_streams),
+        context_(context),
+        enable_opencl_throttling_(enable_opencl_throttling),
+        enable_dynamic_shapes_(enable_dynamic_shapes) {
     device_type_ = (device_type == nullptr) ? "" : device_type;
     device_id_ = (device_id == nullptr) ? "" : device_id;
     cache_dir_ = (cache_dir == nullptr) ? "" : cache_dir;
@@ -24,7 +29,7 @@ struct OpenVINOProviderFactory : IExecutionProviderFactory {
 
  private:
   std::string device_type_;
-  bool enable_vpu_fast_compile_;
+  bool enable_npu_fast_compile_;
   std::string device_id_;
   size_t num_of_threads_;
   std::string cache_dir_;
@@ -35,7 +40,7 @@ struct OpenVINOProviderFactory : IExecutionProviderFactory {
 };
 
 std::unique_ptr<IExecutionProvider> OpenVINOProviderFactory::CreateProvider() {
-  OpenVINOExecutionProviderInfo info(device_type_, enable_vpu_fast_compile_, device_id_, num_of_threads_,
+  OpenVINOExecutionProviderInfo info(device_type_, enable_npu_fast_compile_, device_id_, num_of_threads_,
                                      cache_dir_, num_streams_, context_, enable_opencl_throttling_,
                                      enable_dynamic_shapes_);
   return std::make_unique<OpenVINOExecutionProvider>(info);
@@ -59,17 +64,18 @@ struct OpenVINO_Provider : Provider {
 
     std::string device_type = "";           // [device_type]: Overrides the accelerator hardware type and precision
                                             //   with these values at runtime.
-    bool enable_vpu_fast_compile = false;   // [enable_vpu_fast_compile]: Fast-compile may be optionally enabled to
-                                            // speeds up the model's compilation to VPU device specific format.
+    bool enable_npu_fast_compile = false;   // [enable_npu_fast_compile]: Fast-compile may be optionally enabled to
+                                            // speeds up the model's compilation to NPU device specific format.
     const char* device_id = "";             // [device_id]: Selects a particular hardware device for inference.
-    size_t num_of_threads = 8;              // [num_of_threads]: Overrides the accelerator default value of number of
+    int num_of_threads = 8;                 // [num_of_threads]: Overrides the accelerator default value of number of
                                             //  threads with this value at runtime.
     const char* cache_dir = "";             // [cache_dir]: specify the path to
                                             // dump and load the blobs for the model caching/kernel caching (GPU)
                                             // feature. If blob files are already present, it will be directly loaded.
     int num_streams = 1;                    // [num_streams]: Option that specifies the number of parallel inference
                                             // requests to be processed on a given `device_type`. Overrides the
-                                            // accelerator default value of number of streams with this value at runtime.
+                                            // accelerator default value of number of streams
+                                            // with this value at runtime.
     bool enable_opencl_throttling = false;  // [enable_opencl_throttling]: Enables OpenCL queue throttling for GPU
                                             // device (Reduces CPU Utilization when using GPU)
     bool enable_dynamic_shapes = false;     // [enable_dynamic_shapes]: Enables Dynamic Shapes feature for CPU device)
@@ -80,14 +86,15 @@ struct OpenVINO_Provider : Provider {
 
       std::set<std::string> ov_supported_device_types = {"CPU_FP32", "CPU_FP16", "GPU_FP32",
                                                          "GPU.0_FP32", "GPU.1_FP32", "GPU_FP16",
-                                                         "GPU.0_FP16", "GPU.1_FP16",
-                                                         "VPUX_FP16", "VPUX_U8"};
+                                                         "GPU.0_FP16", "GPU.1_FP16"};
       if (!((ov_supported_device_types.find(device_type) != ov_supported_device_types.end()) ||
-            (device_type.find("HETERO:") == 0) || (device_type.find("MULTI:") == 0) || (device_type.find("AUTO:") == 0))) {
+            (device_type.find("HETERO:") == 0) ||
+            (device_type.find("MULTI:") == 0) ||
+            (device_type.find("AUTO:") == 0))) {
         ORT_THROW(
             "[ERROR] [OpenVINO] You have selcted wrong configuration value for the key 'device_type'. "
             "Select from 'CPU_FP32', 'CPU_FP16', 'GPU_FP32', 'GPU.0_FP32', 'GPU.1_FP32', 'GPU_FP16', "
-            "'GPU.0_FP16', 'GPU.1_FP16', 'VPUX_FP16', 'VPUX_U8' or from"
+            "'GPU.0_FP16', 'GPU.1_FP16' or from"
             " HETERO/MULTI/AUTO options available. \n");
       }
     }
@@ -97,30 +104,37 @@ struct OpenVINO_Provider : Provider {
     if (provider_options_map.find("cache_dir") != provider_options_map.end()) {
       cache_dir = provider_options_map.at("cache_dir").c_str();
     }
+
     if (provider_options_map.find("context") != provider_options_map.end()) {
-      context = (void*)provider_options_map.at("context").c_str();
+      std::string str = provider_options_map.at("context");
+      uint64_t number = std::strtoull(str.c_str(), nullptr, 16);
+      context = reinterpret_cast<void*>(number);
     }
 
     if (provider_options_map.find("num_of_threads") != provider_options_map.end()) {
       num_of_threads = std::stoi(provider_options_map.at("num_of_threads"));
       if (num_of_threads <= 0) {
         num_of_threads = 1;
+        LOGS_DEFAULT(WARNING) << "[OpenVINO-EP] The value for the key 'num_threads' should be in the positive range.\n "
+                              << "Executing with num_threads=1";
       }
     }
 
     if (provider_options_map.find("num_streams") != provider_options_map.end()) {
       num_streams = std::stoi(provider_options_map.at("num_streams"));
-      if (num_streams <= 0 && num_streams > 8) {
-        ORT_THROW("[ERROR] [OpenVINO] The value for the key 'num_streams' should be in the range of 1-8 \n");
+      if (num_streams <= 0) {
+        num_streams = 1;
+        LOGS_DEFAULT(WARNING) << "[OpenVINO-EP] The value for the key 'num_streams' should be in the range of 1-8.\n "
+                              << "Executing with num_streams=1";
       }
     }
     std::string bool_flag = "";
-    if (provider_options_map.find("enable_vpu_fast_compile") != provider_options_map.end()) {
-      bool_flag = provider_options_map.at("enable_vpu_fast_compile");
+    if (provider_options_map.find("enable_npu_fast_compile") != provider_options_map.end()) {
+      bool_flag = provider_options_map.at("enable_npu_fast_compile");
       if (bool_flag == "true" || bool_flag == "True")
-        enable_vpu_fast_compile = true;
+        enable_npu_fast_compile = true;
       else if (bool_flag == "false" || bool_flag == "False")
-        enable_vpu_fast_compile = false;
+        enable_npu_fast_compile = false;
       bool_flag = "";
     }
 
@@ -141,7 +155,7 @@ struct OpenVINO_Provider : Provider {
         enable_dynamic_shapes = false;
     }
     return std::make_shared<OpenVINOProviderFactory>(const_cast<char*>(device_type.c_str()),
-                                                     enable_vpu_fast_compile,
+                                                     enable_npu_fast_compile,
                                                      device_id,
                                                      num_of_threads,
                                                      cache_dir,
@@ -157,7 +171,6 @@ struct OpenVINO_Provider : Provider {
   void Shutdown() override {
     openvino_ep::BackendManager::ReleaseGlobalContext();
   }
-
 } g_provider;
 
 }  // namespace onnxruntime
diff --git a/onnxruntime/core/providers/openvino/ov_interface.cc b/onnxruntime/core/providers/openvino/ov_interface.cc
index 3914488fc5..d2ce378c97 100644
--- a/onnxruntime/core/providers/openvino/ov_interface.cc
+++ b/onnxruntime/core/providers/openvino/ov_interface.cc
@@ -29,7 +29,10 @@ std::shared_ptr<OVNetwork> OVCore::ReadModel(const std::string& model) const {
   }
 }
 
-OVExeNetwork OVCore::LoadNetwork(std::shared_ptr<OVNetwork>& ie_cnn_network, std::string& hw_target, ov::AnyMap& device_config, std::string name) {
+OVExeNetwork OVCore::LoadNetwork(std::shared_ptr<OVNetwork>& ie_cnn_network,
+                                 std::string& hw_target,
+                                 ov::AnyMap& device_config,
+                                 std::string name) {
   ov::CompiledModel obj;
   try {
     obj = oe.compile_model(ie_cnn_network, hw_target, device_config);
@@ -43,7 +46,10 @@ OVExeNetwork OVCore::LoadNetwork(std::shared_ptr<OVNetwork>& ie_cnn_network, std
 }
 
 #if defined(OPENVINO_2023_0) || (OPENVINO_2023_1)
-OVExeNetwork OVCore::LoadNetwork(const std::string& model, std::string& hw_target, ov::AnyMap& device_config, std::string name) {
+OVExeNetwork OVCore::LoadNetwork(const std::string& model,
+                                 std::string& hw_target,
+                                 ov::AnyMap& device_config,
+                                 std::string name) {
   ov::CompiledModel obj;
   try {
     obj = oe.compile_model(model, ov::Tensor(), hw_target, device_config);
diff --git a/onnxruntime/core/providers/openvino/ov_interface.h b/onnxruntime/core/providers/openvino/ov_interface.h
index ed9583033a..935ac8f684 100644
--- a/onnxruntime/core/providers/openvino/ov_interface.h
+++ b/onnxruntime/core/providers/openvino/ov_interface.h
@@ -4,6 +4,7 @@
 #pragma once
 
 #include <vector>
+#include <memory>
 
 #if defined(OPENVINO_2022_1) || (OPENVINO_2022_2) || (OPENVINO_2022_3) || (OPENVINO_2023_0) || (OPENVINO_2023_1)
 #define OV_API_20
@@ -43,9 +44,15 @@ class OVCore {
 
  public:
   std::shared_ptr<OVNetwork> ReadModel(const std::string& model_stream) const;
-  OVExeNetwork LoadNetwork(std::shared_ptr<OVNetwork>& ie_cnn_network, std::string& hw_target, ov::AnyMap& device_config, std::string name);
+  OVExeNetwork LoadNetwork(std::shared_ptr<OVNetwork>& ie_cnn_network,
+                           std::string& hw_target,
+                           ov::AnyMap& device_config,
+                           std::string name);
 #if defined(OPENVINO_2023_0) || (OPENVINO_2023_1)
-  OVExeNetwork LoadNetwork(const std::string& model_stream, std::string& hw_target, ov::AnyMap& device_config, std::string name);
+  OVExeNetwork LoadNetwork(const std::string& model_stream,
+                           std::string& hw_target,
+                           ov::AnyMap& device_config,
+                           std::string name);
 #endif
   void SetCache(std::string cache_dir_path);
 #ifdef IO_BUFFER_ENABLED
@@ -62,7 +69,7 @@ class OVExeNetwork {
   ov::CompiledModel obj;
 
  public:
-  OVExeNetwork(ov::CompiledModel md) { obj = md; }
+  explicit OVExeNetwork(ov::CompiledModel md) { obj = md; }
   OVExeNetwork() { obj = ov::CompiledModel(); }
   ov::CompiledModel& Get() { return obj; }
   OVInferRequest CreateInferRequest();
diff --git a/onnxruntime/core/providers/openvino/ov_versions/capabilities.h b/onnxruntime/core/providers/openvino/ov_versions/capabilities.h
index b76d1cf534..5bcf9d68cd 100644
--- a/onnxruntime/core/providers/openvino/ov_versions/capabilities.h
+++ b/onnxruntime/core/providers/openvino/ov_versions/capabilities.h
@@ -3,6 +3,8 @@
 
 #pragma once
 #include <vector>
+#include <string>
+#include <memory>
 #include "data_ops.h"
 
 namespace onnxruntime {
diff --git a/onnxruntime/core/providers/openvino/ov_versions/capability.cc b/onnxruntime/core/providers/openvino/ov_versions/capability.cc
index 171dd45c50..b030efa238 100644
--- a/onnxruntime/core/providers/openvino/ov_versions/capability.cc
+++ b/onnxruntime/core/providers/openvino/ov_versions/capability.cc
@@ -24,7 +24,8 @@ namespace openvino_ep {
 
 // Constructor
 GetCapability::GetCapability(const GraphViewer& graph_viewer_param, std::string device_type_param,
-                             const std::string version_param) : graph_viewer_(graph_viewer_param), device_type_(device_type_param) {
+                             const std::string version_param)
+    : graph_viewer_(graph_viewer_param), device_type_(device_type_param) {
   if (version_param == "V_2022_1") {
     data_ops_ = new DataOps(graph_viewer_, V_2022_1, device_type_);
   } else if (version_param == "V_2022_2") {
@@ -114,11 +115,11 @@ std::vector<std::unique_ptr<ComputeCapability>> GetCapability::Execute() {
     }
     openvino_ep::BackendManager::GetGlobalContext().is_wholly_supported_graph = true;
 
-  } else {  // unsupported_nodes_idx.empty()
-
+  } else {                                     // unsupported_nodes_idx.empty()
 #if defined(OPENVINO_DISABLE_GRAPH_PARTITION)  // disables graph partition at build time
     LOGS_DEFAULT(INFO) << "[OpenVINO-EP] DISABLE_GRAPH_PARTITION option is set";
-    LOGS_DEFAULT(INFO) << "[OpenVINO-EP] Model is not fully supported by OpenVINO, so making the full model fall back to default CPU Execution Provider";
+    LOGS_DEFAULT(INFO) << "[OpenVINO-EP] Model is not fully supported by OpenVINO, "
+                       << "so making the full model fall back to default CPU Execution Provider";
     return result;
 #endif
 
@@ -159,7 +160,13 @@ std::vector<std::unique_ptr<ComputeCapability>> GetCapability::Execute() {
 
       std::vector<std::string> cluster_graph_inputs, cluster_inputs, const_inputs, cluster_outputs;
 
-      GetInputsOutputsOfCluster(graph_viewer_, this_cluster, ng_required_initializers, cluster_graph_inputs, cluster_inputs, const_inputs, cluster_outputs);
+      GetInputsOutputsOfCluster(graph_viewer_,
+                                this_cluster,
+                                ng_required_initializers,
+                                cluster_graph_inputs,
+                                cluster_inputs,
+                                const_inputs,
+                                cluster_outputs);
 
       bool omit_subgraph = false;
       // Omitting zero dim subgraphs
diff --git a/onnxruntime/core/providers/openvino/ov_versions/data_ops.cc b/onnxruntime/core/providers/openvino/ov_versions/data_ops.cc
index 70118c94f9..a5a0faa3a8 100644
--- a/onnxruntime/core/providers/openvino/ov_versions/data_ops.cc
+++ b/onnxruntime/core/providers/openvino/ov_versions/data_ops.cc
@@ -2,11 +2,15 @@
 // Licensed under the MIT License
 
 #include <unordered_set>
+#include <string>
+#include <vector>
+#include <utility>
+#include <map>
+#include <set>
+
 #include "core/providers/shared_library/provider_api.h"
 #include "../backend_utils.h"
 #include "../backend_manager.h"
-#include <string>
-#include <vector>
 #include "data_ops.h"
 #include "capabilities.h"
 #include "utils.h"
@@ -72,269 +76,355 @@ std::set<std::string> ops_supported_as_function = {
 
 std::vector<SupportedOp> supported_op_mode = {
     {"Abs", V_2020_4, {"CPU", "GPU"}},
-    {"Abs", V_2023_0, {"VPUX"}},
+    {"Abs", V_2023_0, {"NPU"}},
     {"Acos", V_2020_4, {"CPU"}},
     {"Acos", V_2022_1, {"GPU"}},
+    {"Acos", V_2023_1, {"NPU"}},
     {"Acosh", V_2020_4, {"CPU"}},
     {"Acosh", V_2022_1, {"GPU"}},
+    {"Acosh", V_2023_1, {"NPU"}},
     {"Add", V_2020_4, {"CPU", "GPU"}},
-    {"Add", V_2023_0, {"VPUX"}},
+    {"Add", V_2023_0, {"NPU"}},
     {"And", V_2020_4, {"CPU", "GPU"}},
+    {"And", V_2023_1, {"NPU"}},
     {"ArgMax", V_2020_4, {"CPU"}},
     {"ArgMax", V_2021_1, {"GPU"}},
     {"ArgMin", V_2020_4, {"CPU"}},
     {"ArgMin", V_2022_1, {"GPU"}},
     {"Asin", V_2020_4, {"CPU", "GPU"}},
+    {"Asin", V_2023_1, {"NPU"}},
     {"Asinh", V_2020_4, {"CPU", "GPU"}},
+    {"Asinh", V_2023_1, {"NPU"}},
     {"Atan", V_2020_4, {"CPU", "GPU"}},
+    {"Atan", V_2023_1, {"NPU"}},
     {"Atanh", V_2020_4, {"CPU"}},
     {"Atanh", V_2022_1, {"GPU"}},
+    {"Atanh", V_2023_1, {"NPU"}},
     {"AveragePool", V_2020_4, {"CPU", "GPU"}},
-    {"AveragePool", V_2023_0, {"VPUX"}},
+    {"AveragePool", V_2023_0, {"NPU"}},
     {"BatchNormalization", V_2020_4, {"CPU", "GPU"}},
-    {"BatchNormalization", V_2023_0, {"VPUX"}},
+    {"BatchNormalization", V_2023_0, {"NPU"}},
     {"BitShift", V_2022_1, {"CPU"}},
+    {"BitShift", V_2023_1, {"NPU"}},
     {"Cast", V_2020_4, {"CPU", "GPU"}},
-    {"Cast", V_2023_0, {"VPUX"}},
+    {"Cast", V_2023_0, {"NPU"}},
+    {"CastLike", V_2023_1, {"CPU", "GPU", "NPU"}},
     {"Ceil", V_2020_4, {"GPU"}},
     {"Ceil", V_2021_4, {"CPU"}},
+    {"Ceil", V_2023_1, {"NPU"}},
     {"Celu", V_2022_1, {"CPU", "GPU"}},
     {"Clip", V_2020_4, {"CPU", "GPU"}},
-    {"Clip", V_2023_0, {"VPUX"}},
+    {"Clip", V_2023_0, {"NPU"}},
+    {"Compress", V_2023_1, {"CPU", "GPU"}},
     {"Concat", V_2020_4, {"CPU", "GPU"}},
-    {"Concat", V_2023_0, {"VPUX"}},
+    {"Concat", V_2023_0, {"NPU"}},
     {"Constant", V_2020_4, {"CPU", "GPU"}},
-    {"Constant", V_2023_0, {"VPUX"}},
+    {"Constant", V_2023_0, {"NPU"}},
     {"ConstantOfShape", V_2020_4, {"CPU", "GPU"}},
-    {"ConstantOfShape", V_2023_0, {"VPUX"}},  // Gets mapped to broadcast op in the plugin.
+    {"ConstantOfShape", V_2023_0, {"NPU"}},  // Gets mapped to broadcast op in the plugin.
     {"Conv", V_2020_4, {"CPU", "GPU"}},
-    {"Conv", V_2023_0, {"VPUX"}},
+    {"Conv", V_2023_0, {"NPU"}},
     {"ConvInteger", V_2022_1, {"CPU", "GPU"}},
+    {"ConvInteger", V_2023_1, {"NPU"}},
     {"ConvTranspose", V_2020_4, {"CPU", "GPU"}},
+    {"ConvTranspose", V_2023_1, {"NPU"}},
     {"Cos", V_2020_4, {"CPU"}},
     {"Cos", V_2022_1, {"GPU"}},
-    {"Cos", V_2023_0, {"VPUX"}},
+    {"Cos", V_2023_0, {"NPU"}},
     {"Cosh", V_2020_4, {"CPU"}},
     {"Cosh", V_2022_1, {"GPU"}},
+    {"Cosh", V_2023_1, {"NPU"}},
     {"CumSum", V_2022_1, {"CPU", "GPU"}},
-    {"CumSum", V_2023_0, {"VPUX"}},
+    {"CumSum", V_2023_0, {"NPU"}},
     {"DepthToSpace", V_2020_4, {"CPU", "GPU"}},
-    {"DepthToSpace", V_2023_0, {"VPUX"}},
+    {"DepthToSpace", V_2023_0, {"NPU"}},
     {"DequantizeLinear", V_2021_4, {"CPU", "GPU"}},
-    {"DequantizeLinear", V_2023_0, {"VPUX"}},
+    {"DequantizeLinear", V_2023_0, {"NPU"}},
     {"Div", V_2020_4, {"CPU", "GPU"}},
-    {"Div", V_2023_0, {"VPUX"}},
+    {"Div", V_2023_0, {"NPU"}},
     {"Dropout", V_2020_4, {"CPU", "GPU"}},
-    {"Dropout", V_2023_0, {"VPUX"}},
+    {"Dropout", V_2023_0, {"NPU"}},
     {"Elu", V_2020_4, {"CPU", "GPU"}},
-    {"Elu", V_2023_0, {"VPUX"}},
+    {"Elu", V_2023_0, {"NPU"}},
     // {"Einsum", V_2023_0, {"CPU", "GPU"}},
     {"Equal", V_2020_4, {"CPU", "GPU"}},
-    {"Equal", V_2023_0, {"VPUX"}},  // Added for whisper decoder model.
+    {"Equal", V_2023_0, {"NPU"}},  // Added for whisper decoder model.
     {"Erf", V_2020_4, {"CPU", "GPU"}},
-    {"Erf", V_2023_0, {"VPUX"}},
+    {"Erf", V_2023_0, {"NPU"}},
     {"Exp", V_2020_4, {"CPU", "GPU"}},
-    {"Exp", V_2023_0, {"VPUX"}},
+    {"Exp", V_2023_0, {"NPU"}},
     {"Expand", V_2022_1, {"CPU", "GPU"}},
-    {"Expand", V_2023_0, {"VPUX"}},  // Gets mapped to broadcast op and multiply op in the plugin.
+    {"Expand", V_2023_0, {"NPU"}},  // Gets mapped to broadcast op and multiply op in the plugin.
     {"EyeLike", V_2022_1, {"CPU"}},
-    {"EyeLike", V_2023_0, {"VPUX"}},  // NoOP
+    {"EyeLike", V_2023_0, {"NPU"}},  // NoOP
     {"Flatten", V_2020_4, {"CPU", "GPU"}},
-    {"Flatten", V_2023_0, {"VPUX"}},
+    {"Flatten", V_2023_0, {"NPU"}},
     {"Floor", V_2020_4, {"CPU", "GPU"}},
+    {"Floor", V_2023_1, {"NPU"}},
     {"Gather", V_2020_4, {"CPU", "GPU"}},
-    {"Gather", V_2023_0, {"VPUX"}},
+    {"Gather", V_2023_0, {"NPU"}},
     {"GatherElements", V_2022_2, {"CPU", "GPU"}},
+    {"GatherElements", V_2023_1, {"NPU"}},
     {"GatherND", V_2021_4, {"CPU", "GPU"}},
+    {"GatherND", V_2023_1, {"NPU"}},
     {"Gemm", V_2020_4, {"CPU", "GPU"}},
-    {"Gemm", V_2023_0, {"VPUX"}},
+    {"Gemm", V_2023_0, {"NPU"}},
     {"GlobalAveragePool", V_2020_4, {"CPU", "GPU"}},
-    {"GlobalAveragePool", V_2023_0, {"VPUX"}},
+    {"GlobalAveragePool", V_2023_0, {"NPU"}},
     {"GlobalLpPool", V_2020_4, {"CPU", "GPU"}},
+    {"GlobalLpPool", V_2023_1, {"NPU"}},
     {"GlobalMaxPool", V_2022_1, {"CPU", "GPU"}},
+    {"GlobalMaxPool", V_2023_1, {"NPU"}},
     {"Greater", V_2020_4, {"CPU", "GPU"}},
-    {"Greater", V_2023_0, {"VPUX"}},
+    {"Greater", V_2023_0, {"NPU"}},
     {"GreaterOrEqual", V_2022_1, {"CPU", "GPU"}},
-    {"GreaterOrEqual", V_2023_0, {"VPUX"}},
+    {"GreaterOrEqual", V_2023_0, {"NPU"}},
     {"GridSample", V_2022_3, {"CPU"}},
     {"GridSample", V_2023_0, {"GPU"}},
+    {"GridSample", V_2023_1, {"NPU"}},
+    {"HardMax", V_2023_1, {"CPU", "GPU", "NPU"}},
     {"Identity", V_2020_4, {"CPU", "GPU"}},
-    {"Identity", V_2023_0, {"VPUX"}},  // NoOP
+    {"Identity", V_2023_0, {"NPU"}},  // NoOP
     {"If", V_2022_3, {"CPU", "GPU"}},
+    {"If", V_2023_1, {"NPU"}},
     {"ImageScaler", V_2022_1, {"CPU", "GPU"}},
-    {"ImageScaler", V_2023_0, {"VPUX"}},
+    {"ImageScaler", V_2023_0, {"NPU"}},
     {"InstanceNormalization", V_2020_4, {"CPU", "GPU"}},
-    {"InstanceNormalization", V_2023_0, {"VPUX"}},
+    {"InstanceNormalization", V_2023_0, {"NPU"}},
     {"HardSigmoid", V_2020_4, {"CPU", "GPU"}},
+    {"HardSigmoid", V_2023_1, {"NPU"}},
     {"HardMax", V_2022_1, {"CPU", "GPU"}},
     {"LeakyRelu", V_2020_4, {"CPU", "GPU"}},
-    {"LeakyRelu", V_2023_0, {"VPUX"}},
+    {"LeakyRelu", V_2023_0, {"NPU"}},
     {"Less", V_2020_4, {"CPU", "GPU"}},
-    {"Less", V_2023_0, {"VPUX"}},  // Added for whisper decoder model.
+    {"Less", V_2023_0, {"NPU"}},  // Added for whisper decoder model.
     {"LessOrEqual", V_2022_1, {"CPU", "GPU"}},
-    {"LessOrEqual", V_2023_0, {"VPUX"}},
+    {"LessOrEqual", V_2023_0, {"NPU"}},
     {"Log", V_2020_4, {"CPU", "GPU"}},
-    {"Log", V_2023_0, {"VPUX"}},
+    {"Log", V_2023_0, {"NPU"}},
     {"LogSoftMax", V_2022_1, {"CPU", "GPU"}},
     {"Loop", V_2021_4, {"CPU", "GPU"}},
+    {"LpNormalization", V_2023_1, {"CPU", "GPU", "NPU"}},
+    {"LpPool", V_2023_1, {"CPU", "GPU", "NPU"}},
     {"LRN", V_2020_4, {"CPU", "GPU"}},
-    {"LRN", V_2023_0, {"VPUX"}},
+    {"LRN", V_2023_0, {"NPU"}},
     {"LSTM", V_2020_4, {"CPU", "GPU"}},
+    {"LSTM", V_2023_1, {"NPU"}},
     {"MatMul", V_2020_4, {"CPU", "GPU"}},
-    {"MatMul", V_2023_0, {"VPUX"}},
+    {"MatMul", V_2023_0, {"NPU"}},
     {"MatMulInteger", V_2022_1, {"CPU"}},
+    {"MatMulInteger", V_2023_1, {"NPU"}},
     {"Max", V_2020_4, {"CPU", "GPU"}},
-    {"Max", V_2023_0, {"VPUX"}},
+    {"Max", V_2023_0, {"NPU"}},
     {"MaxPool", V_2020_4, {"CPU", "GPU"}},
-    {"MaxPool", V_2023_0, {"VPUX"}},
+    {"MaxPool", V_2023_0, {"NPU"}},
     {"Mean", V_2020_4, {"CPU", "GPU"}},
-    {"Mean", V_2023_0, {"VPUX"}},
+    {"Mean", V_2023_0, {"NPU"}},
     {"MeanVarianceNormalization", V_2022_1, {"CPU", "GPU"}},
+    {"MeanVarianceNormalization", V_2023_1, {"NPU"}},
     {"Min", V_2020_4, {"CPU", "GPU"}},
-    {"Min", V_2023_0, {"VPUX"}},
+    {"Min", V_2023_0, {"NPU"}},
     {"Mod", V_2022_1, {"CPU", "GPU"}},
     {"Mul", V_2020_4, {"CPU", "GPU"}},
-    {"Mul", V_2023_0, {"VPUX"}},
+    {"Mul", V_2023_0, {"NPU"}},
     {"Neg", V_2020_4, {"CPU", "GPU"}},
-    {"Neg", V_2023_0, {"VPUX"}},
+    {"Neg", V_2023_0, {"NPU"}},
     {"NonMaxSuppression", V_2021_1, {"CPU", "GPU"}},
+    {"NonMaxSuppression", V_2023_1, {"NPU"}},
     {"NonZero", V_2021_1, {"CPU"}},
     {"NonZero", V_2023_0, {"GPU"}},
     {"Not", V_2021_1, {"CPU", "GPU"}},
     {"Not", V_2020_4, {"CPU", "GPU"}},
+    {"Not", V_2023_1, {"NPU"}},
     {"OneHot", V_2020_4, {"CPU", "GPU"}},
+    {"OneHot", V_2023_1, {"NPU"}},
     {"Or", V_2022_1, {"CPU", "GPU"}},
+    {"Or", V_2023_1, {"NPU"}},
     {"Pad", V_2020_4, {"CPU", "GPU"}},
-    {"Pad", V_2023_0, {"VPUX"}},
+    {"Pad", V_2023_0, {"NPU"}},
     {"Pow", V_2020_4, {"CPU", "GPU"}},
-    {"Pow", V_2023_0, {"VPUX"}},
+    {"Pow", V_2023_0, {"NPU"}},
     {"PRelu", V_2020_4, {"CPU", "GPU"}},
-    {"PRelu", V_2023_0, {"VPUX"}},
+    {"PRelu", V_2023_0, {"NPU"}},
     {"QLinearMatMul", V_2022_3, {"CPU"}},
+    // {"QLinearMatMul", V_2023_1, {"NPU"}},
     {"QuantizeLinear", V_2021_4, {"CPU", "GPU"}},
-    {"QuantizeLinear", V_2023_0, {"VPUX"}},
+    {"QuantizeLinear", V_2023_0, {"NPU"}},
+    {"RNN", V_2023_1, {"CPU", "GPU"}},
     {"RandomNormalLike", V_2023_0, {"CPU", "GPU"}},
+    {"RandomNormalLike", V_2023_0, {"CPU", "GPU"}},
+    {"RandomNormalLike", V_2023_1, {"NPU"}},
     {"RandomNormal", V_2023_0, {"CPU", "GPU"}},
+    {"RandomNormal", V_2023_1, {"NPU"}},
     {"Range", V_2022_1, {"CPU", "GPU"}},
-    {"Range", V_2023_0, {"VPUX"}},
+    {"Range", V_2023_0, {"NPU"}},
     {"Reciprocal", V_2020_4, {"CPU", "GPU"}},
-    {"Reciprocal", V_2023_0, {"VPUX"}},
+    {"Reciprocal", V_2023_0, {"NPU"}},
     {"ReduceL1", V_2022_1, {"CPU", "GPU"}},
+    {"ReduceL1", V_2023_1, {"NPU"}},
     {"ReduceL2", V_2022_1, {"CPU", "GPU"}},
+    {"ReduceL2", V_2023_1, {"NPU"}},
     {"ReduceLogSum", V_2020_4, {"CPU"}},
     {"ReduceLogSum", V_2022_1, {"CPU", "GPU"}},
+    {"ReduceLogSum", V_2023_1, {"NPU"}},
     {"ReduceLogSumExp", V_2022_1, {"CPU", "GPU"}},
+    {"ReduceLogSumExp", V_2023_1, {"NPU"}},
     {"ReduceMax", V_2020_4, {"CPU", "GPU"}},
+    {"ReduceMax", V_2023_1, {"NPU"}},
     {"ReduceMean", V_2020_4, {"CPU", "GPU"}},
-    {"ReduceMean", V_2023_0, {"VPUX"}},
+    {"ReduceMean", V_2023_0, {"NPU"}},
     {"ReduceMin", V_2020_4, {"CPU", "GPU"}},
+    {"ReduceMin", V_2023_1, {"NPU"}},
     {"ReduceProd", V_2020_4, {"CPU"}},
     {"ReduceProd", V_2022_1, {"GPU"}},
+    {"ReduceProd", V_2023_1, {"NPU"}},
     {"ReduceSum", V_2020_4, {"CPU", "GPU"}},
+    // {"ReduceSum", V_2023_1, {"NPU"}},
     {"ReduceSumSquare", V_2020_4, {"CPU"}},
     {"ReduceSumSquare", V_2022_1, {"CPU", "GPU"}},
+    {"ReduceSumSquare", V_2023_1, {"NPU"}},
     {"Relu", V_2020_4, {"CPU", "GPU"}},
-    {"Relu", V_2023_0, {"VPUX"}},
+    {"Relu", V_2023_0, {"NPU"}},
     {"Resize", V_2020_4, {"CPU"}},
     {"Resize", V_2022_1, {"GPU"}},
+    {"Resize", V_2023_1, {"NPU"}},
     {"Reshape", V_2020_4, {"CPU", "GPU"}},
-    {"Reshape", V_2023_0, {"VPUX"}},
+    {"Reshape", V_2023_0, {"NPU"}},
     {"ReverseSequence", V_2022_1, {"CPU", "GPU"}},
     {"RoiAlign", V_2021_1, {"CPU", "GPU"}},
+    {"RoiAlign", V_2023_1, {"NPU"}},
     {"Round", V_2021_4, {"CPU", "GPU"}},
+    {"Round", V_2023_1, {"NPU"}},
     {"Scatter", V_2022_1, {"CPU", "GPU"}},
+    {"Scatter", V_2023_1, {"NPU"}},
     {"ScatterElements", V_2022_1, {"CPU", "GPU"}},
+    {"ScatterElements", V_2023_1, {"NPU"}},
     {"ScatterND", V_2022_1, {"CPU", "GPU"}},
+    {"ScatterND", V_2023_1, {"NPU"}},
     {"Selu", V_2020_4, {"CPU", "GPU"}},
+    {"Selu", V_2023_1, {"NPU"}},
     {"Shape", V_2020_4, {"CPU", "GPU"}},
-    {"Shape", V_2023_0, {"VPUX"}},
+    {"Shape", V_2023_0, {"NPU"}},
     {"Shrink", V_2022_1, {"CPU", "GPU"}},
-    {"Shrink", V_2023_0, {"VPUX"}},
+    {"Shrink", V_2023_0, {"NPU"}},
     {"Sigmoid", V_2020_4, {"CPU", "GPU"}},
-    {"Sigmoid", V_2023_0, {"VPUX"}},
+    {"Sigmoid", V_2023_0, {"NPU"}},
     {"Sign", V_2020_4, {"CPU"}},
     {"Sign", V_2022_1, {"GPU"}},
-    {"Sign", V_2023_0, {"VPUX"}},
+    {"Sign", V_2023_0, {"NPU"}},
     {"Sin", V_2022_1, {"CPU", "GPU"}},
-    {"Sin", V_2023_0, {"VPUX"}},
+    {"Sin", V_2023_0, {"NPU"}},
     {"Sinh", V_2020_4, {"CPU"}},
+    {"Sinh", V_2023_1, {"NPU"}},
     {"Size", V_2022_1, {"CPU", "GPU"}},
+    {"Size", V_2023_1, {"NPU"}},
     {"Slice", V_2020_4, {"CPU", "GPU"}},
-    {"Slice", V_2023_0, {"VPUX"}},
+    {"Slice", V_2023_0, {"NPU"}},
     {"Softmax", V_2020_4, {"CPU", "GPU"}},
-    {"Softmax", V_2023_0, {"VPUX"}},
+    {"Softmax", V_2023_0, {"NPU"}},
     {"Softplus", V_2022_1, {"CPU", "GPU"}},
-    {"Softplus", V_2023_0, {"VPUX"}},
+    {"Softplus", V_2023_0, {"NPU"}},
     {"Softsign", V_2022_1, {"CPU", "GPU"}},
     {"SpaceToDepth", V_2020_4, {"CPU", "GPU"}},
-    {"SpaceToDepth", V_2023_0, {"VPUX"}},
+    {"SpaceToDepth", V_2023_0, {"NPU"}},
     {"Split", V_2020_4, {"CPU", "GPU"}},
-    {"Split", V_2023_0, {"VPUX"}},
+    {"Split", V_2023_0, {"NPU"}},
     {"Sqrt", V_2020_4, {"CPU", "GPU"}},
-    {"Sqrt", V_2023_0, {"VPUX"}},
+    {"Sqrt", V_2023_0, {"NPU"}},
     {"Squeeze", V_2020_4, {"CPU", "GPU"}},
-    {"Squeeze", V_2023_0, {"VPUX"}},
+    {"Squeeze", V_2023_0, {"NPU"}},
     {"Softsign", V_2020_4, {"CPU"}},
     {"Sub", V_2020_4, {"CPU", "GPU"}},
-    {"Sub", V_2023_0, {"VPUX"}},
+    {"Sub", V_2023_0, {"NPU"}},
     {"Sum", V_2020_4, {"CPU", "GPU"}},
-    {"Sum", V_2023_0, {"VPUX"}},
+    {"Sum", V_2023_0, {"NPU"}},
     {"Tan", V_2020_4, {"CPU", "GPU"}},
+    {"Tan", V_2023_1, {"NPU"}},
     {"Tanh", V_2020_4, {"CPU", "GPU"}},
-    {"Tanh", V_2023_0, {"VPUX"}},
+    {"Tanh", V_2023_0, {"NPU"}},
     {"ThresholdedRelu", V_2022_1, {"CPU", "GPU"}},
-    {"ThresholdedRelu", V_2023_0, {"VPUX"}},
+    {"ThresholdedRelu", V_2023_0, {"NPU"}},
     {"Tile", V_2021_3, {"CPU", "GPU"}},
-    {"Tile", V_2023_0, {"VPUX"}},
+    {"Tile", V_2023_0, {"NPU"}},
     {"Transpose", V_2020_4, {"CPU", "GPU"}},
-    {"Transpose", V_2023_0, {"VPUX"}},
+    {"Transpose", V_2023_0, {"NPU"}},
     {"Trilu", V_2023_0, {"CPU", "GPU"}},
+    {"Trilu", V_2023_1, {"NPU"}},
     {"TopK", V_2020_4, {"CPU", "GPU"}},
-    {"TopK", V_2023_0, {"VPUX"}},
+    {"TopK", V_2023_0, {"NPU"}},
+    {"Upsample", V_2020_4, {"CPU", "GPU"}},
     {"Unsqueeze", V_2020_4, {"CPU", "GPU"}},
-    {"Unsqueeze", V_2023_0, {"VPUX"}},
-    {"Upsample", V_2021_1, {"CPU"}},
-    {"Upsample", V_2021_4, {"GPU"}},
-    {"Upsample", V_2023_0, {"VPUX"}},
+    {"Unsqueeze", V_2023_0, {"NPU"}},
     {"Where", V_2022_1, {"CPU", "GPU"}},
-    {"Where", V_2023_0, {"VPUX"}},  // Added for whisper decoder model.
+    {"Where", V_2023_0, {"NPU"}},  // Added for whisper decoder model.
     {"Xor", V_2022_1, {"CPU", "GPU"}},
+    {"Xor", V_2023_1, {"NPU"}},
 };
 
 void DataOps::populate_types_supported() {
-  supported_types_initializer_.insert(std::make_pair(V_2020_4, ONNX_NAMESPACE::TensorProto_DataType::TensorProto_DataType_BOOL));
-  supported_types_initializer_.insert(std::make_pair(V_2020_4, ONNX_NAMESPACE::TensorProto_DataType::TensorProto_DataType_FLOAT));
-  supported_types_initializer_.insert(std::make_pair(V_2020_4, ONNX_NAMESPACE::TensorProto_DataType::TensorProto_DataType_INT32));
-  supported_types_initializer_.insert(std::make_pair(V_2020_4, ONNX_NAMESPACE::TensorProto_DataType::TensorProto_DataType_INT64));
-  supported_types_initializer_.insert(std::make_pair(V_2021_1, ONNX_NAMESPACE::TensorProto_DataType::TensorProto_DataType_FLOAT16));
-  supported_types_initializer_.insert(std::make_pair(V_2021_4, ONNX_NAMESPACE::TensorProto_DataType::TensorProto_DataType_INT8));
-  supported_types_initializer_.insert(std::make_pair(V_2021_4, ONNX_NAMESPACE::TensorProto_DataType::TensorProto_DataType_UINT8));
+  supported_types_initializer_.insert(
+      std::make_pair(V_2020_4, ONNX_NAMESPACE::TensorProto_DataType::TensorProto_DataType_BOOL));
+  supported_types_initializer_.insert(
+      std::make_pair(V_2020_4, ONNX_NAMESPACE::TensorProto_DataType::TensorProto_DataType_FLOAT));
+  supported_types_initializer_.insert(
+      std::make_pair(V_2020_4, ONNX_NAMESPACE::TensorProto_DataType::TensorProto_DataType_INT32));
+  supported_types_initializer_.insert(
+      std::make_pair(V_2020_4, ONNX_NAMESPACE::TensorProto_DataType::TensorProto_DataType_INT64));
+  supported_types_initializer_.insert(
+      std::make_pair(V_2021_1, ONNX_NAMESPACE::TensorProto_DataType::TensorProto_DataType_FLOAT16));
+  supported_types_initializer_.insert(
+      std::make_pair(V_2021_4, ONNX_NAMESPACE::TensorProto_DataType::TensorProto_DataType_INT8));
+  supported_types_initializer_.insert(
+      std::make_pair(V_2021_4, ONNX_NAMESPACE::TensorProto_DataType::TensorProto_DataType_UINT8));
 
-  supported_types_vpu_.insert(std::make_pair(V_2020_4, ONNX_NAMESPACE::TensorProto_DataType::TensorProto_DataType_BOOL));
-  supported_types_vpu_.insert(std::make_pair(V_2020_4, ONNX_NAMESPACE::TensorProto_DataType::TensorProto_DataType_FLOAT));
-  supported_types_vpu_.insert(std::make_pair(V_2020_4, ONNX_NAMESPACE::TensorProto_DataType::TensorProto_DataType_UINT8));
-  supported_types_vpu_.insert(std::make_pair(V_2020_4, ONNX_NAMESPACE::TensorProto_DataType::TensorProto_DataType_INT8));
-  supported_types_vpu_.insert(std::make_pair(V_2020_4, ONNX_NAMESPACE::TensorProto_DataType::TensorProto_DataType_INT16));
-  supported_types_vpu_.insert(std::make_pair(V_2020_4, ONNX_NAMESPACE::TensorProto_DataType::TensorProto_DataType_INT32));
-  supported_types_vpu_.insert(std::make_pair(V_2020_4, ONNX_NAMESPACE::TensorProto_DataType::TensorProto_DataType_INT64));
-  supported_types_vpu_.insert(std::make_pair(V_2021_1, ONNX_NAMESPACE::TensorProto_DataType::TensorProto_DataType_FLOAT16));
+  supported_types_npu_.insert(
+      std::make_pair(V_2020_4, ONNX_NAMESPACE::TensorProto_DataType::TensorProto_DataType_BOOL));
+  supported_types_npu_.insert(
+      std::make_pair(V_2020_4, ONNX_NAMESPACE::TensorProto_DataType::TensorProto_DataType_FLOAT));
+  supported_types_npu_.insert(
+      std::make_pair(V_2020_4, ONNX_NAMESPACE::TensorProto_DataType::TensorProto_DataType_UINT8));
+  supported_types_npu_.insert(
+      std::make_pair(V_2020_4, ONNX_NAMESPACE::TensorProto_DataType::TensorProto_DataType_INT8));
+  supported_types_npu_.insert(
+      std::make_pair(V_2020_4, ONNX_NAMESPACE::TensorProto_DataType::TensorProto_DataType_INT16));
+  supported_types_npu_.insert(
+      std::make_pair(V_2020_4, ONNX_NAMESPACE::TensorProto_DataType::TensorProto_DataType_INT32));
+  supported_types_npu_.insert(
+      std::make_pair(V_2020_4, ONNX_NAMESPACE::TensorProto_DataType::TensorProto_DataType_INT64));
+  supported_types_npu_.insert(
+      std::make_pair(V_2021_1, ONNX_NAMESPACE::TensorProto_DataType::TensorProto_DataType_FLOAT16));
 
-  supported_types_cpu_.insert(std::make_pair(V_2020_4, ONNX_NAMESPACE::TensorProto_DataType::TensorProto_DataType_BOOL));
-  supported_types_cpu_.insert(std::make_pair(V_2020_4, ONNX_NAMESPACE::TensorProto_DataType::TensorProto_DataType_FLOAT));
-  supported_types_cpu_.insert(std::make_pair(V_2020_4, ONNX_NAMESPACE::TensorProto_DataType::TensorProto_DataType_INT32));
-  supported_types_cpu_.insert(std::make_pair(V_2020_4, ONNX_NAMESPACE::TensorProto_DataType::TensorProto_DataType_INT16));
-  supported_types_cpu_.insert(std::make_pair(V_2020_4, ONNX_NAMESPACE::TensorProto_DataType::TensorProto_DataType_INT8));
-  supported_types_cpu_.insert(std::make_pair(V_2020_4, ONNX_NAMESPACE::TensorProto_DataType::TensorProto_DataType_UINT8));
-  supported_types_cpu_.insert(std::make_pair(V_2020_4, ONNX_NAMESPACE::TensorProto_DataType::TensorProto_DataType_INT64));
-  supported_types_cpu_.insert(std::make_pair(V_2022_2, ONNX_NAMESPACE::TensorProto_DataType::TensorProto_DataType_FLOAT16));
+  supported_types_cpu_.insert(
+      std::make_pair(V_2020_4, ONNX_NAMESPACE::TensorProto_DataType::TensorProto_DataType_BOOL));
+  supported_types_cpu_.insert(
+      std::make_pair(V_2020_4, ONNX_NAMESPACE::TensorProto_DataType::TensorProto_DataType_FLOAT));
+  supported_types_cpu_.insert(
+      std::make_pair(V_2020_4, ONNX_NAMESPACE::TensorProto_DataType::TensorProto_DataType_INT32));
+  supported_types_cpu_.insert(
+      std::make_pair(V_2020_4, ONNX_NAMESPACE::TensorProto_DataType::TensorProto_DataType_INT16));
+  supported_types_cpu_.insert(
+      std::make_pair(V_2020_4, ONNX_NAMESPACE::TensorProto_DataType::TensorProto_DataType_INT8));
+  supported_types_cpu_.insert(
+      std::make_pair(V_2020_4, ONNX_NAMESPACE::TensorProto_DataType::TensorProto_DataType_UINT8));
+  supported_types_cpu_.insert(
+      std::make_pair(V_2020_4, ONNX_NAMESPACE::TensorProto_DataType::TensorProto_DataType_INT64));
+  supported_types_cpu_.insert(
+      std::make_pair(V_2022_2, ONNX_NAMESPACE::TensorProto_DataType::TensorProto_DataType_FLOAT16));
 
-  supported_types_gpu_.insert(std::make_pair(V_2020_4, ONNX_NAMESPACE::TensorProto_DataType::TensorProto_DataType_FLOAT));
-  supported_types_gpu_.insert(std::make_pair(V_2020_4, ONNX_NAMESPACE::TensorProto_DataType::TensorProto_DataType_INT32));
-  supported_types_gpu_.insert(std::make_pair(V_2020_4, ONNX_NAMESPACE::TensorProto_DataType::TensorProto_DataType_INT64));
-  supported_types_gpu_.insert(std::make_pair(V_2021_1, ONNX_NAMESPACE::TensorProto_DataType::TensorProto_DataType_FLOAT16));
-  supported_types_gpu_.insert(std::make_pair(V_2021_4, ONNX_NAMESPACE::TensorProto_DataType::TensorProto_DataType_INT8));
-  supported_types_gpu_.insert(std::make_pair(V_2021_4, ONNX_NAMESPACE::TensorProto_DataType::TensorProto_DataType_UINT8));
-  supported_types_gpu_.insert(std::make_pair(V_2022_1, ONNX_NAMESPACE::TensorProto_DataType::TensorProto_DataType_BOOL));
+  supported_types_gpu_.insert(
+      std::make_pair(V_2020_4, ONNX_NAMESPACE::TensorProto_DataType::TensorProto_DataType_FLOAT));
+  supported_types_gpu_.insert(
+      std::make_pair(V_2020_4, ONNX_NAMESPACE::TensorProto_DataType::TensorProto_DataType_INT32));
+  supported_types_gpu_.insert(
+      std::make_pair(V_2020_4, ONNX_NAMESPACE::TensorProto_DataType::TensorProto_DataType_INT64));
+  supported_types_gpu_.insert(
+      std::make_pair(V_2021_1, ONNX_NAMESPACE::TensorProto_DataType::TensorProto_DataType_FLOAT16));
+  supported_types_gpu_.insert(
+      std::make_pair(V_2021_4, ONNX_NAMESPACE::TensorProto_DataType::TensorProto_DataType_INT8));
+  supported_types_gpu_.insert(
+      std::make_pair(V_2021_4, ONNX_NAMESPACE::TensorProto_DataType::TensorProto_DataType_UINT8));
+  supported_types_gpu_.insert(
+      std::make_pair(V_2022_1, ONNX_NAMESPACE::TensorProto_DataType::TensorProto_DataType_BOOL));
 }
 
 void DataOps::populate_op_mode_supported() {
@@ -349,10 +439,10 @@ void DataOps::populate_op_mode_supported() {
   no_dimension_supported_.push_back({"Equal", V_2023_0, {"GPU"}});
   no_dimension_supported_.push_back({"Floor", V_2020_4, {"All"}});
   no_dimension_supported_.push_back({"Gather", V_2020_4, {"All"}});
-  no_dimension_supported_.push_back({"Greater", V_2023_0, {"VPUX"}});
+  no_dimension_supported_.push_back({"Greater", V_2023_0, {"NPU"}});
   no_dimension_supported_.push_back({"Less", V_2022_1, {"CPU"}});
   no_dimension_supported_.push_back({"Loop", V_2021_4, {"All"}});
-  no_dimension_supported_.push_back({"Max", V_2023_0, {"VPUX"}});
+  no_dimension_supported_.push_back({"Max", V_2023_0, {"NPU"}});
   no_dimension_supported_.push_back({"Min", V_2020_4, {"All"}});
   no_dimension_supported_.push_back({"Mul", V_2020_4, {"All"}});
   no_dimension_supported_.push_back({"QuantizeLinear", V_2021_4, {"All"}});
@@ -382,11 +472,14 @@ void DataOps::populate_op_mode_supported() {
   {
     UnsupportedOpMode obj = {{V_2022_1, V_2022_2, V_2022_3},
                              [this](const Node* node, const InitializedTensorSet&) {
-                               // Abs is not supproted with INT8 or INT32 as input data type on GPU
-                               if (device_id_.find("GPU") != std::string::npos) {
+                               // Abs is not supproted with INT8 or INT32 as input data type on GPU and NPU
+                               if ((device_id_.find("GPU") != std::string::npos) ||
+                                   (device_id_.find("NPU") != std::string::npos)) {
                                  for (size_t i = 0; i < node->InputDefs().size(); i++) {
-                                   if (node->InputDefs()[i]->TypeAsProto()->tensor_type().elem_type() == ONNX_NAMESPACE::TensorProto_DataType::TensorProto_DataType_INT8 ||
-                                       node->InputDefs()[i]->TypeAsProto()->tensor_type().elem_type() == ONNX_NAMESPACE::TensorProto_DataType::TensorProto_DataType_INT32)
+                                   if (node->InputDefs()[i]->TypeAsProto()->tensor_type().elem_type() ==
+                                           ONNX_NAMESPACE::TensorProto_DataType::TensorProto_DataType_INT8 ||
+                                       node->InputDefs()[i]->TypeAsProto()->tensor_type().elem_type() ==
+                                           ONNX_NAMESPACE::TensorProto_DataType::TensorProto_DataType_INT32)
                                      return true;
                                  }
                                }
@@ -399,11 +492,14 @@ void DataOps::populate_op_mode_supported() {
                              [this](const Node* node, const InitializedTensorSet&) {
                                // tensor type does not support select last index
                                auto& attributes = node->GetAttributes();
-                               auto last_index_arg = attributes.count("select_last_index") > 0 ? attributes.at("select_last_index").i() : 0;
+                               auto last_index_arg =
+                                   attributes.count("select_last_index") > 0 ? attributes.at("select_last_index").i()
+                                                                             : 0;
                                if (last_index_arg != 0)
                                  return true;
                                // tensor type supports float as input for argmax and argmin
-                               if (node->InputDefs()[0]->TypeAsProto()->tensor_type().elem_type() != ONNX_NAMESPACE::TensorProto_DataType::TensorProto_DataType_FLOAT)
+                               if (node->InputDefs()[0]->TypeAsProto()->tensor_type().elem_type() !=
+                                   ONNX_NAMESPACE::TensorProto_DataType::TensorProto_DataType_FLOAT)
                                  return true;
                                return false;
                              }};
@@ -415,7 +511,8 @@ void DataOps::populate_op_mode_supported() {
                              [this](const Node* node, const InitializedTensorSet&) {
                                if (device_id_.find("GPU") != std::string::npos) {
                                  // int64 data type is not supported on GPU
-                                 const bool data_is_int64 = node->InputDefs()[0]->Type()->find("int64") != std::string::npos;
+                                 const bool data_is_int64 =
+                                     node->InputDefs()[0]->Type()->find("int64") != std::string::npos;
                                  return data_is_int64;
                                }
                                return false;
@@ -506,9 +603,12 @@ void DataOps::populate_op_mode_supported() {
                                if (device_id_.find("GPU") != std::string::npos) {
                                  auto x_data_type = node->InputDefs()[0]->TypeAsProto()->tensor_type().elem_type();
                                  auto y_data_type = node->InputDefs()[1]->TypeAsProto()->tensor_type().elem_type();
-                                 // currently both inputs with int32 are not supported and also both input datatypes should be same
-                                 const bool A_is_int32 = node->InputDefs()[0]->Type()->find("int32") != std::string::npos;
-                                 const bool B_is_int32 = node->InputDefs()[1]->Type()->find("int32") != std::string::npos;
+                                 // currently both inputs with int32 are not supported
+                                 // and also both input datatypes should be same
+                                 const bool A_is_int32 =
+                                     node->InputDefs()[0]->Type()->find("int32") != std::string::npos;
+                                 const bool B_is_int32 =
+                                     node->InputDefs()[1]->Type()->find("int32") != std::string::npos;
                                  if ((A_is_int32 && B_is_int32) || (x_data_type != y_data_type))
                                    return true;
                                }
@@ -589,11 +689,13 @@ void DataOps::populate_op_mode_supported() {
                                if (device_id_.find("GPU") != std::string::npos) {
                                  auto slope = node->InputDefs()[1];
                                  // PRelu slope has to be an initializer or needs to come from a constant node
-                                 if (initializers.count(slope->Name()))
+                                 if (initializers.count(slope->Name())) {
                                    return false;
-                                 else {
-                                   for (auto input_node = node->InputNodesBegin(); input_node != node->InputNodesEnd(); ++input_node) {
-                                     if (GetInputCount(this->graph_viewer_.GetNode((*input_node).Index()), initializers) == 0)
+                                 } else {
+                                   for (auto input_node = node->InputNodesBegin();
+                                        input_node != node->InputNodesEnd(); ++input_node) {
+                                     if (GetInputCount(
+                                             this->graph_viewer_.GetNode((*input_node).Index()), initializers) == 0)
                                        return false;
                                    }
                                  }
@@ -603,12 +705,12 @@ void DataOps::populate_op_mode_supported() {
     op_list_.insert({"PRelu", obj});
   }
   {
-    UnsupportedOpMode obj = {{V_2022_1, V_2022_2, V_2022_3, V_2023_0},
+    UnsupportedOpMode obj = {{V_2022_1, V_2022_2, V_2022_3, V_2023_0, V_2023_1},
                              [this](const Node* node, const InitializedTensorSet&) {
                                const auto& input_arg = node->InputDefs()[1];
                                auto shape = input_arg->Shape();
                                // Reshape op with empty dim is Rejected for Myriad
-                               //[TODO] Is this condition required anymore with Myriad removed?
+                               // [TODO] Is this condition required anymore with Myriad removed?
                                if (shape != nullptr) {
                                  for (const auto& dim : input_arg->Shape()->dim()) {
                                    if (utils::HasDimValue(dim) && dim.dim_value() == 0)
@@ -638,7 +740,8 @@ void DataOps::populate_op_mode_supported() {
                                if (device_id_.find("GPU") != std::string::npos) {
                                  // INT32 dataype is not supported as input
                                  for (size_t i = 0; i < node->InputDefs().size(); i++) {
-                                   if (node->InputDefs()[i]->TypeAsProto()->tensor_type().elem_type() == ONNX_NAMESPACE::TensorProto_DataType::TensorProto_DataType_INT32)
+                                   if (node->InputDefs()[i]->TypeAsProto()->tensor_type().elem_type() ==
+                                       ONNX_NAMESPACE::TensorProto_DataType::TensorProto_DataType_INT32)
                                      return true;
                                  }
                                }
@@ -650,9 +753,11 @@ void DataOps::populate_op_mode_supported() {
     UnsupportedOpMode obj = {{V_2022_1, V_2022_2, V_2022_3},
                              [this](const Node* node, const InitializedTensorSet&) {
                                if (device_id_.find("GPU") != std::string::npos) {
-                                 auto output_data_type = node->OutputDefs()[0]->TypeAsProto()->tensor_type().elem_type();
+                                 auto output_data_type =
+                                     node->OutputDefs()[0]->TypeAsProto()->tensor_type().elem_type();
                                  // If the output of ScatterND op is BOOL, it is rejected for GPU.
-                                 if (output_data_type == ONNX_NAMESPACE::TensorProto_DataType::TensorProto_DataType_BOOL)
+                                 if (output_data_type ==
+                                     ONNX_NAMESPACE::TensorProto_DataType::TensorProto_DataType_BOOL)
                                    return true;
                                }
                                return false;
@@ -666,7 +771,8 @@ void DataOps::populate_op_mode_supported() {
                              [this](const Node* node, const InitializedTensorSet&) {
                                // If the Input of Shrink op is UINT8, it is rejected (Due to output mismatch)
                                for (size_t i = 0; i < node->InputDefs().size(); i++) {
-                                 if (node->InputDefs()[i]->TypeAsProto()->tensor_type().elem_type() == ONNX_NAMESPACE::TensorProto_DataType::TensorProto_DataType_UINT8)
+                                 if (node->InputDefs()[i]->TypeAsProto()->tensor_type().elem_type() ==
+                                     ONNX_NAMESPACE::TensorProto_DataType::TensorProto_DataType_UINT8)
                                    return true;
                                }
                                return false;
@@ -714,10 +820,11 @@ void DataOps::populate_op_mode_supported() {
     op_list_.insert({"Squeeze", obj});
   }
   {
-    UnsupportedOpMode obj = {{V_2022_1, V_2022_2, V_2022_3, V_2023_0},
+    UnsupportedOpMode obj = {{V_2022_1, V_2022_2, V_2022_3, V_2023_0, V_2023_1},
                              [this](const Node* node, const InitializedTensorSet&) {
                                // If the operator is unsqueeze
-                               // If axes is an input, then we cannot produce a static graph. Conversion fails in convert_function_to_cnn_network.
+                               // If axes is an input, then we cannot produce a static graph.
+                               // Conversion fails in convert_function_to_cnn_network.
                                for (size_t i = 0; i < node->InputDefs().size(); i++) {
                                  if (node->InputDefs()[i]->Name() == "axes") {
                                    return true;
@@ -728,14 +835,15 @@ void DataOps::populate_op_mode_supported() {
     op_list_.insert({"Unsqueeze", obj});
   }
   {
-    UnsupportedOpMode obj = {{V_2022_1, V_2022_2, V_2022_3, V_2023_0},
+    UnsupportedOpMode obj = {{V_2022_1, V_2022_2, V_2022_3, V_2023_0, V_2023_1},
                              [this](const Node* node, const InitializedTensorSet&) {
                                // check for attributes
                                auto& upsample_attr = node->GetAttributes();
                                if (upsample_attr.count("scales") > 0) {
                                  auto& upsample_arg = upsample_attr.at("scales");
                                  auto float_size = upsample_arg.floats_size();
-                                 if (float_size > 2 && (upsample_arg.floats(0) != 1.f || upsample_arg.floats(1) != 1.f)) {
+                                 if (float_size > 2 &&
+                                     (upsample_arg.floats(0) != 1.f || upsample_arg.floats(1) != 1.f)) {
                                    return true;
                                  }
                                }
@@ -750,9 +858,12 @@ void DataOps::populate_op_mode_supported() {
                                  }
                                }
                                // x_arg supports only float, int8 and float16 type
-                               if ((x_arg->TypeAsProto()->tensor_type().elem_type() == ONNX_NAMESPACE::TensorProto_DataType::TensorProto_DataType_FLOAT) ||
-                                   (x_arg->TypeAsProto()->tensor_type().elem_type() == ONNX_NAMESPACE::TensorProto_DataType::TensorProto_DataType_INT8) ||
-                                   (x_arg->TypeAsProto()->tensor_type().elem_type() == ONNX_NAMESPACE::TensorProto_DataType::TensorProto_DataType_FLOAT16)) {
+                               if ((x_arg->TypeAsProto()->tensor_type().elem_type() ==
+                                    ONNX_NAMESPACE::TensorProto_DataType::TensorProto_DataType_FLOAT) ||
+                                   (x_arg->TypeAsProto()->tensor_type().elem_type() ==
+                                    ONNX_NAMESPACE::TensorProto_DataType::TensorProto_DataType_INT8) ||
+                                   (x_arg->TypeAsProto()->tensor_type().elem_type() ==
+                                    ONNX_NAMESPACE::TensorProto_DataType::TensorProto_DataType_FLOAT16)) {
                                  return false;
                                } else {
                                  return true;
@@ -849,9 +960,9 @@ bool DataOps::type_is_supported(const NodeArg* node_arg, bool is_initializer) {
   } else {
     auto dtype = type_proto->tensor_type().elem_type();
 
-    if (device_id_.find("VPUX") != std::string::npos || device_id_.find("HETERO") != std::string::npos ||
+    if (device_id_.find("NPU") != std::string::npos || device_id_.find("HETERO") != std::string::npos ||
         device_id_.find("MULTI") != std::string::npos || device_id_.find("AUTO") != std::string::npos) {
-      for (auto const& var : supported_types_vpu_) {
+      for (auto const& var : supported_types_npu_) {
         if ((var.first <= version_id_) &&
             (var.second == dtype)) {
           return true;
@@ -1079,7 +1190,9 @@ bool DataOps::node_is_supported(const std::map<std::string, std::set<std::string
   if (opset->second.find(optype) == opset->second.end() && op_fun == ops_supported_as_function.end()) {
 #ifndef NDEBUG
     if (openvino_ep::backend_utils::IsDebugEnabled()) {
-      std::cout << "The operator is not available in OpenVINO ngraph operators list nor the operator is a special ONNX function" << std::endl;
+      std::cout << "The operator is not available in OpenVINO ngraph operators list"
+                << "nor the operator is a special ONNX function"
+                << std::endl;
     }
 #endif
     return false;
@@ -1095,10 +1208,12 @@ std::vector<NodeIndex> DataOps::GetUnsupportedNodeIndices(std::unordered_set<std
   for (const auto& node_idx : graph_viewer_.GetNodesInTopologicalOrder()) {
     if (node_is_supported(ng_supported_ops, node_idx)) {
       // Collect inputs that are initializers
-      graph_viewer_.GetNode(node_idx)->ForEachDef([&ng_required_initializers, this](const NodeArg& node_arg, bool is_input) {
-            if(is_input && this->graph_viewer_.GetAllInitializedTensors().count(node_arg.Name())) {
+      graph_viewer_.GetNode(node_idx)->ForEachDef([&ng_required_initializers, this](const NodeArg& node_arg,
+                                                                                    bool is_input) {
+            if (is_input && this->graph_viewer_.GetAllInitializedTensors().count(node_arg.Name())) {
                 ng_required_initializers.insert(node_arg.Name());
-              } }, true);
+              } },
+                                                  true);
     } else {
       unsupported_nodes_idx.push_back(node_idx);
     }
@@ -1110,7 +1225,8 @@ bool DataOps::IsOpSupportedOnlyInModel(std::string name) {
   return ops_supported_only_in_model.find(name) != ops_supported_only_in_model.end();
 }
 
-bool DataOps::SpecialConditionForClusterSizeOne(std::unordered_set<std::string>& ng_required_initializers, const Node* node) {
+bool DataOps::SpecialConditionForClusterSizeOne(std::unordered_set<std::string>& ng_required_initializers,
+                                                const Node* node) {
   if (node->OpType() == "Reshape") {
     const auto& shape_arg = node->InputDefs()[1];
     if (ng_required_initializers.find(shape_arg->Name()) == ng_required_initializers.end()) {
@@ -1119,15 +1235,20 @@ bool DataOps::SpecialConditionForClusterSizeOne(std::unordered_set<std::string>&
   } else if (node->OpType() == "Expand") {
     // nGraph only supports constant shape input values
     const auto& output = node->OutputDefs()[0];
-    if (output->TypeAsProto()->tensor_type().elem_type() != ONNX_NAMESPACE::TensorProto_DataType::TensorProto_DataType_FLOAT16)
+    if (output->TypeAsProto()->tensor_type().elem_type() !=
+        ONNX_NAMESPACE::TensorProto_DataType::TensorProto_DataType_FLOAT16)
       return true;
   } else if (node->OpType() == "RoiAlign") {
     using onnx_dtype = ONNX_NAMESPACE::TensorProto_DataType;
 
-    onnx_dtype input_0_data_type = (ONNX_NAMESPACE::TensorProto_DataType)node->InputDefs()[0]->TypeAsProto()->tensor_type().elem_type();
-    onnx_dtype input_1_data_type = (ONNX_NAMESPACE::TensorProto_DataType)node->InputDefs()[1]->TypeAsProto()->tensor_type().elem_type();
-    onnx_dtype input_2_data_type = (ONNX_NAMESPACE::TensorProto_DataType)node->InputDefs()[2]->TypeAsProto()->tensor_type().elem_type();
-    onnx_dtype output_data_type = (ONNX_NAMESPACE::TensorProto_DataType)node->OutputDefs()[0]->TypeAsProto()->tensor_type().elem_type();
+    onnx_dtype input_0_data_type =
+        (ONNX_NAMESPACE::TensorProto_DataType)node->InputDefs()[0]->TypeAsProto()->tensor_type().elem_type();
+    onnx_dtype input_1_data_type =
+        (ONNX_NAMESPACE::TensorProto_DataType)node->InputDefs()[1]->TypeAsProto()->tensor_type().elem_type();
+    onnx_dtype input_2_data_type =
+        (ONNX_NAMESPACE::TensorProto_DataType)node->InputDefs()[2]->TypeAsProto()->tensor_type().elem_type();
+    onnx_dtype output_data_type =
+        (ONNX_NAMESPACE::TensorProto_DataType)node->OutputDefs()[0]->TypeAsProto()->tensor_type().elem_type();
 
     if ((input_0_data_type != onnx_dtype::TensorProto_DataType_FLOAT16) ||
         (input_1_data_type != onnx_dtype::TensorProto_DataType_FLOAT16) ||
diff --git a/onnxruntime/core/providers/openvino/ov_versions/data_ops.h b/onnxruntime/core/providers/openvino/ov_versions/data_ops.h
index cc968d02ea..a5aa3f8256 100644
--- a/onnxruntime/core/providers/openvino/ov_versions/data_ops.h
+++ b/onnxruntime/core/providers/openvino/ov_versions/data_ops.h
@@ -3,6 +3,11 @@
 
 #pragma once
 #include <unordered_set>
+#include <utility>
+#include <map>
+#include <set>
+#include <vector>
+#include <string>
 
 namespace onnxruntime {
 namespace openvino_ep {
@@ -47,7 +52,7 @@ class DataOps {
   std::multimap<std::string, UnsupportedOpMode> op_list_;
   std::vector<SupportedOp> subgraph_supported_;
   std::vector<SupportedOp> no_dimension_supported_;
-  std::set<Pairs> supported_types_vpu_;
+  std::set<Pairs> supported_types_npu_;
   std::set<Pairs> supported_types_cpu_;
   std::set<Pairs> supported_types_gpu_;
   std::set<Pairs> supported_types_initializer_;
@@ -64,14 +69,16 @@ class DataOps {
                          const NodeIndex node_idx);
 
  public:
-  DataOps(const GraphViewer& graph_viewer_param, VersionNum ver, std::string dev_id) : graph_viewer_(graph_viewer_param), version_id_(ver), device_id_(dev_id) {
+  DataOps(const GraphViewer& graph_viewer_param, VersionNum ver, std::string dev_id)
+      : graph_viewer_(graph_viewer_param), version_id_(ver), device_id_(dev_id) {
     populate_op_mode_supported();
     populate_types_supported();
   }
 
   virtual std::vector<NodeIndex> GetUnsupportedNodeIndices(std::unordered_set<std::string>& ng_required_initializers);
   virtual bool IsOpSupportedOnlyInModel(std::string name);
-  virtual bool SpecialConditionForClusterSizeOne(std::unordered_set<std::string>& ng_required_initializers, const Node* node);
+  virtual bool SpecialConditionForClusterSizeOne(
+      std::unordered_set<std::string>& ng_required_initializers, const Node* node);
   virtual bool DoNotOmitSubGraph(const std::string& name);
   virtual bool InsertNode(const std::string& name);
   VersionNum GetVersion() const { return version_id_; }
diff --git a/onnxruntime/core/providers/openvino/ov_versions/utils.cc b/onnxruntime/core/providers/openvino/ov_versions/utils.cc
index be509b6743..74369d39b9 100644
--- a/onnxruntime/core/providers/openvino/ov_versions/utils.cc
+++ b/onnxruntime/core/providers/openvino/ov_versions/utils.cc
@@ -2,6 +2,7 @@
 // Licensed under the MIT License
 
 #include "core/providers/shared_library/provider_api.h"
+#include "utils.h"
 
 #if defined(_MSC_VER)
 #pragma warning(disable : 4244 4245 5208)
@@ -113,7 +114,8 @@ std::map<std::string, std::set<std::string>> GetNgSupportedOps(const int onnx_op
  * supported_cluster + (UNsupported_node + rest_of_the_graph). This functions returns vector of all supported_clusters by nGraph
  */
 std::vector<std::vector<NodeIndex>>
-GetPartitionedClusters(const std::vector<NodeIndex>& topological_order, const std::vector<NodeIndex>& unsupported_nodes) {
+GetPartitionedClusters(const std::vector<NodeIndex>& topological_order,
+                       const std::vector<NodeIndex>& unsupported_nodes) {
   std::vector<std::vector<NodeIndex>> ng_clusters;
 
   auto prev = topological_order.begin();
@@ -140,7 +142,10 @@ GetPartitionedClusters(const std::vector<NodeIndex>& topological_order, const st
   return ng_clusters;
 }
 
-void IdentifyConnectedNodes(const GraphViewer& graph_viewer, NodeIndex curr_node_index, std::vector<NodeIndex>& cluster, std::vector<NodeIndex>& sub_cluster) {
+void IdentifyConnectedNodes(const GraphViewer& graph_viewer,
+                            NodeIndex curr_node_index,
+                            std::vector<NodeIndex>& cluster,
+                            std::vector<NodeIndex>& sub_cluster) {
   if (std::find(cluster.begin(), cluster.end(), curr_node_index) == cluster.end())
     return;
 
@@ -205,7 +210,8 @@ void GetInputsOutputsOfCluster(const GraphViewer& graph_viewer,
       const auto& ext_node = graph_viewer.GetNode((*it).Index());
 
       if (std::find(cluster.begin(), cluster.end(), ext_node->Index()) == cluster.end()) {
-        // Node is external to this_cluster. Search through its inputs to find the output that is generated by this_cluster.
+        // Node is external to this_cluster. Search through its inputs to
+        // find the output that is generated by this_cluster.
         std::set<std::string> ext_node_inputs;
         ext_node->ForEachDef(
             [&ext_node_inputs](const NodeArg& arg, bool is_input) {
diff --git a/onnxruntime/core/providers/openvino/ov_versions/utils.h b/onnxruntime/core/providers/openvino/ov_versions/utils.h
index 70f6954ea9..c256cde979 100644
--- a/onnxruntime/core/providers/openvino/ov_versions/utils.h
+++ b/onnxruntime/core/providers/openvino/ov_versions/utils.h
@@ -1,5 +1,15 @@
 // Copyright (C) 2019-2022 Intel Corporation
 // Licensed under the MIT License
+#pragma once
+
+#include <memory>
+#include <map>
+#include <utility>
+#include <vector>
+#include <set>
+#include <algorithm>
+#include <string>
+#include <unordered_set>
 
 namespace onnxruntime {
 namespace openvino_ep {
@@ -18,9 +28,14 @@ int GetOnnxOpSet(const GraphViewer& graph_viewer);
 std::map<std::string, std::set<std::string>> GetNgSupportedOps(const int onnx_opset);
 
 std::vector<std::vector<NodeIndex>>
-GetPartitionedClusters(const std::vector<NodeIndex>& topological_order, const std::vector<NodeIndex>& unsupported_nodes);
+GetPartitionedClusters(
+    const std::vector<NodeIndex>& topological_order, const std::vector<NodeIndex>& unsupported_nodes);
 
-void IdentifyConnectedNodes(const GraphViewer& graph_viewer, NodeIndex curr_node_index, std::vector<NodeIndex>& cluster, std::vector<NodeIndex>& sub_cluster);
+void IdentifyConnectedNodes(
+    const GraphViewer& graph_viewer,
+    NodeIndex curr_node_index,
+    std::vector<NodeIndex>& cluster,
+    std::vector<NodeIndex>& sub_cluster);
 
 std::vector<std::vector<NodeIndex>>
 GetConnectedClusters(const GraphViewer& graph_viewer, const std::vector<std::vector<NodeIndex>>& clusters);
diff --git a/onnxruntime/core/session/provider_bridge_ort.cc b/onnxruntime/core/session/provider_bridge_ort.cc
index 9e59883478..df4dd55417 100644
--- a/onnxruntime/core/session/provider_bridge_ort.cc
+++ b/onnxruntime/core/session/provider_bridge_ort.cc
@@ -1432,7 +1432,7 @@ ProviderOptions OrtOpenVINOProviderOptionsToOrtOpenVINOProviderOptionsV2(const O
   if (legacy_ov_options->device_type != nullptr)
     ov_options_converted_map["device_type"] = legacy_ov_options->device_type;
 
-  ov_options_converted_map["enable_vpu_fast_compile"] = legacy_ov_options->enable_vpu_fast_compile;
+  ov_options_converted_map["enable_npu_fast_compile"] = legacy_ov_options->enable_npu_fast_compile;
 
   if (legacy_ov_options->device_id != nullptr)
     ov_options_converted_map["device_id"] = legacy_ov_options->device_id;
diff --git a/onnxruntime/python/onnxruntime_pybind_state.cc b/onnxruntime/python/onnxruntime_pybind_state.cc
index 7faca3b468..2027b59232 100644
--- a/onnxruntime/python/onnxruntime_pybind_state.cc
+++ b/onnxruntime/python/onnxruntime_pybind_state.cc
@@ -813,10 +813,10 @@ std::unique_ptr<IExecutionProvider> CreateExecutionProviderInstance(
         if (option.first == "device_type") {
           OV_provider_options_map[option.first] = option.second;
           continue;
-        } else if (option.first == "enable_vpu_fast_compile") {
+        } else if (option.first == "enable_npu_fast_compile") {
           if (!(option.second == "True" || option.second == "true" ||
                 option.second == "False" || option.second == "false")) {
-            ORT_THROW("Invalid value passed for enable_vpu_fast_compile: ", option.second);
+            ORT_THROW("Invalid value passed for enable_npu_fast_compile: ", option.second);
           }
           OV_provider_options_map[option.first] = option.second;
         } else if (option.first == "enable_opencl_throttling") {
diff --git a/onnxruntime/python/onnxruntime_pybind_state_common.h b/onnxruntime/python/onnxruntime_pybind_state_common.h
index 5bb6bcc38b..a5bcbce89b 100644
--- a/onnxruntime/python/onnxruntime_pybind_state_common.h
+++ b/onnxruntime/python/onnxruntime_pybind_state_common.h
@@ -60,11 +60,11 @@ struct OrtStatus {
 #elif OPENVINO_CONFIG_GPU_FP16
 #define BACKEND_OPENVINO "-OPENVINO_GPU_FP16"
 
-#elif OPENVINO_CONFIG_VPUX_FP16
-#define BACKEND_OPENVINO "-OPENVINO_VPUX_FP16"
+#elif OPENVINO_CONFIG_NPU_FP16
+#define BACKEND_OPENVINO "-OPENVINO_NPU_FP16"
 
-#elif OPENVINO_CONFIG_VPUX_U8
-#define BACKEND_OPENVINO "-OPENVINO_VPUX_U8"
+#elif OPENVINO_CONFIG_NPU_U8
+#define BACKEND_OPENVINO "-OPENVINO_NPU_U8"
 
 #elif OPENVINO_CONFIG_MULTI
 #define BACKEND_OPENVINO "-OPENVINO_MULTI"
diff --git a/onnxruntime/test/perftest/command_args_parser.cc b/onnxruntime/test/perftest/command_args_parser.cc
index b1a04a00e8..6d075fec99 100644
--- a/onnxruntime/test/perftest/command_args_parser.cc
+++ b/onnxruntime/test/perftest/command_args_parser.cc
@@ -60,7 +60,7 @@ namespace perftest {
       "\t-i: Specify EP specific runtime options as key value pairs. Different runtime options available are: \n"
       "\t    [OpenVINO only] [device_type]: Overrides the accelerator hardware type and precision with these values at runtime.\n"
       "\t    [OpenVINO only] [device_id]: Selects a particular hardware device for inference.\n"
-      "\t    [OpenVINO only] [enable_vpu_fast_compile]: Optionally enabled to speeds up the model's compilation on VPU device targets.\n"
+      "\t    [OpenVINO only] [enable_npu_fast_compile]: Optionally enabled to speeds up the model's compilation on NPU device targets.\n"
       "\t    [OpenVINO only] [num_of_threads]: Overrides the accelerator hardware type and precision with these values at runtime.\n"
       "\t    [OpenVINO only] [cache_dir]: Explicitly specify the path to dump and load the blobs(Model caching) or cl_cache (Kernel Caching) files feature. If blob files are already present, it will be directly loaded.\n"
       "\t    [OpenVINO only] [enable_opencl_throttling]: Enables OpenCL queue throttling for GPU device(Reduces the CPU Utilization while using GPU) \n"
@@ -72,7 +72,7 @@ namespace perftest {
       "\t    [QNN only] [htp_performance_mode]: QNN performance mode, options: 'burst', 'balanced', 'default', 'high_performance', \n"
       "\t    'high_power_saver', 'low_balanced', 'low_power_saver', 'power_saver', 'sustained_high_performance'. Default to 'default'. \n"
       "\t [Usage]: -e <provider_name> -i '<key1>|<value1> <key2>|<value2>'\n\n"
-      "\t [Example] [For OpenVINO EP] -e openvino -i \"device_type|CPU_FP32 enable_vpu_fast_compile|true num_of_threads|5 enable_opencl_throttling|true cache_dir|\"<path>\"\"\n"
+      "\t [Example] [For OpenVINO EP] -e openvino -i \"device_type|CPU_FP32 enable_npu_fast_compile|true num_of_threads|5 enable_opencl_throttling|true cache_dir|\"<path>\"\"\n"
       "\t [Example] [For QNN EP] -e qnn -i \"backend_path|/folderpath/libQnnCpu.so\" \n\n"
       "\t    [TensorRT only] [trt_max_partition_iterations]: Maximum iterations for TensorRT parser to get capability.\n"
       "\t    [TensorRT only] [trt_min_subgraph_size]: Minimum size of TensorRT subgraphs.\n"
diff --git a/onnxruntime/test/perftest/ort_test_session.cc b/onnxruntime/test/perftest/ort_test_session.cc
index 41a1eafebb..b7a111783f 100644
--- a/onnxruntime/test/perftest/ort_test_session.cc
+++ b/onnxruntime/test/perftest/ort_test_session.cc
@@ -240,8 +240,7 @@ OnnxRuntimeTestSession::OnnxRuntimeTestSession(Ort::Env& env, std::random_device
       if (key == "device_type") {
         std::set<std::string> ov_supported_device_types = {"CPU_FP32", "CPU_FP16", "GPU_FP32",
                                                            "GPU.0_FP32", "GPU.1_FP32", "GPU_FP16",
-                                                           "GPU.0_FP16", "GPU.1_FP16",
-                                                           "VPUX_FP16", "VPUX_U8"};
+                                                           "GPU.0_FP16", "GPU.1_FP16"};
         if (ov_supported_device_types.find(value) != ov_supported_device_types.end()) {
           ov_options[key] = value;
         } else if (value.find("HETERO:") == 0) {
@@ -254,17 +253,17 @@ OnnxRuntimeTestSession::OnnxRuntimeTestSession(Ort::Env& env, std::random_device
           ORT_THROW(
               "[ERROR] [OpenVINO] You have selcted wrong configuration value for the key 'device_type'. "
               "Select from 'CPU_FP32', 'CPU_FP16', 'GPU_FP32', 'GPU.0_FP32', 'GPU.1_FP32', 'GPU_FP16', "
-              "'GPU.0_FP16', 'GPU.1_FP16', 'VPUX_FP16', 'VPUX_U8' or from"
+              "'GPU.0_FP16', 'GPU.1_FP16' or from"
               " HETERO/MULTI/AUTO options available. \n");
         }
       } else if (key == "device_id") {
         ov_options[key] = value;
-      } else if (key == "enable_vpu_fast_compile") {
+      } else if (key == "enable_npu_fast_compile") {
         if (value == "true" || value == "True" ||
             value == "false" || value == "False") {
           ov_options[key] = value;
         } else {
-          ORT_THROW("[ERROR] [OpenVINO] The value for the key 'enable_vpu_fast_compile' should be a boolean i.e. true or false. Default value is false.\n");
+          ORT_THROW("[ERROR] [OpenVINO] The value for the key 'enable_npu_fast_compile' should be a boolean i.e. true or false. Default value is false.\n");
         }
       } else if (key == "enable_opencl_throttling") {
         if (value == "true" || value == "True" ||
@@ -299,7 +298,7 @@ OnnxRuntimeTestSession::OnnxRuntimeTestSession(Ort::Env& env, std::random_device
           ov_options[key] = value;
         }
       } else {
-        ORT_THROW("[ERROR] [OpenVINO] wrong key type entered. Choose from the following runtime key options that are available for OpenVINO. ['device_type', 'device_id', 'enable_vpu_fast_compile', 'num_of_threads', 'cache_dir', 'num_streams', 'enable_opencl_throttling|true'] \n");
+        ORT_THROW("[ERROR] [OpenVINO] wrong key type entered. Choose from the following runtime key options that are available for OpenVINO. ['device_type', 'device_id', 'enable_npu_fast_compile', 'num_of_threads', 'cache_dir', 'num_streams', 'enable_opencl_throttling|true'] \n");
       }
     }
     session_options.AppendExecutionProvider("OpenVINO", ov_options);
diff --git a/onnxruntime/test/providers/cpu/nn/lp_norm_op_test.cc b/onnxruntime/test/providers/cpu/nn/lp_norm_op_test.cc
index e37206d6ae..b7cead66bd 100644
--- a/onnxruntime/test/providers/cpu/nn/lp_norm_op_test.cc
+++ b/onnxruntime/test/providers/cpu/nn/lp_norm_op_test.cc
@@ -143,7 +143,7 @@ void L1NormalizationWithZeroNorm() {
 
   vector<T> expected_output = {0.5f, 0.5f, 0.f, 0.f};
   test.AddOutput<T>("Y", input_dims, expected_output);
-  test.Run();
+  test.Run(OpTester::ExpectResult::kExpectSuccess, "", {kOpenVINOExecutionProvider});
 }
 
 TEST(LpNormalizationTest, L1NormalizationWithZeroNorm) {
@@ -163,7 +163,7 @@ void L2NormalizationWithZeroNorm() {
 
   vector<T> expected_output = {1.f, 0.f, 0.f, 0.f};
   test.AddOutput<T>("Y", input_dims, expected_output);
-  test.Run();
+  test.Run(OpTester::ExpectResult::kExpectSuccess, "", {kOpenVINOExecutionProvider});
 }
 
 TEST(LpNormalizationTest, L2NormalizationWithZeroNorm) {
diff --git a/onnxruntime/test/providers/cpu/rnn/rnn_op_test.cc b/onnxruntime/test/providers/cpu/rnn/rnn_op_test.cc
index d1a523b1ee..b9875b9553 100644
--- a/onnxruntime/test/providers/cpu/rnn/rnn_op_test.cc
+++ b/onnxruntime/test/providers/cpu/rnn/rnn_op_test.cc
@@ -762,7 +762,7 @@ TEST(RNNTest, RNN_invalid_sequence_lens) {
     test.AddOutput<float>("Y_h", Y_h_dims, Y_h_data);
 
     // the CUDA RNN version allows the invalid sequence lengths, so disable testing on CUDA and TensorRT
-    test.Run(OpTester::ExpectResult::kExpectFailure, error_msg, {kCudaExecutionProvider, kTensorrtExecutionProvider});
+    test.Run(OpTester::ExpectResult::kExpectFailure, error_msg, {kCudaExecutionProvider, kTensorrtExecutionProvider, kOpenVINOExecutionProvider});
   };
 
   // should batch batch_size to be valid
@@ -860,7 +860,7 @@ TEST(RNNTest, RNN_bidirectional_with_sequence_lens) {
 
   test.AddOutput<float>("Y_h", Y_h_dims, Y_h_data);
 
-  test.Run(OpTester::ExpectResult::kExpectSuccess, "", {kCudaExecutionProvider, kTensorrtExecutionProvider});
+  test.Run(OpTester::ExpectResult::kExpectSuccess, "", {kCudaExecutionProvider, kTensorrtExecutionProvider, kOpenVINOExecutionProvider});
 }
 
 TEST(RNNTest, RNN_with_invalid_activation_load_failure) {
diff --git a/onnxruntime/test/providers/cpu/tensor/compress_op.test.cc b/onnxruntime/test/providers/cpu/tensor/compress_op.test.cc
index c95ac1603a..c3d9110060 100644
--- a/onnxruntime/test/providers/cpu/tensor/compress_op.test.cc
+++ b/onnxruntime/test/providers/cpu/tensor/compress_op.test.cc
@@ -66,7 +66,7 @@ TEST(CompressTest, Compress_3dims_has_extra_condition) {
   // has condition length = 3 > input_dim[axis] = 2
   test.AddInput<bool>("condition", {3}, {0, 1, 1});
   test.AddOutput<float>("output", {2, 1, 3}, {4.0f, 5.0f, 6.0f, 10.0f, 11.0f, 12.0f});
-  test.Run();
+  test.Run(OpTester::ExpectResult::kExpectSuccess, "", {kOpenVINOExecutionProvider});
 }
 
 TEST(CompressTest, Compress_3dims_has_extra_input) {
diff --git a/onnxruntime/test/providers/cpu/tensor/unsqueeze_op_test.cc b/onnxruntime/test/providers/cpu/tensor/unsqueeze_op_test.cc
index 2120da604f..d2aa5dd428 100644
--- a/onnxruntime/test/providers/cpu/tensor/unsqueeze_op_test.cc
+++ b/onnxruntime/test/providers/cpu/tensor/unsqueeze_op_test.cc
@@ -99,7 +99,7 @@ TEST(TensorOpTest, Unsqueeze_scalar_2) {
     test.AddInput<float>("input", {}, std::vector<float>{1.0f});
     test.AddInput<int64_t>("axes", {2}, std::vector<int64_t>{0, -1}, axes_is_initializer);
     test.AddOutput<float>("output", {1, 1}, std::vector<float>{1.0f});
-    test.Run();
+    test.Run(OpTester::ExpectResult::kExpectSuccess, "", {kOpenVINOExecutionProvider});
   };
   run_test(false);
   run_test(true);
diff --git a/onnxruntime/test/python/onnx_backend_test_series.py b/onnxruntime/test/python/onnx_backend_test_series.py
index ecf4b001ee..c48b07422d 100644
--- a/onnxruntime/test/python/onnx_backend_test_series.py
+++ b/onnxruntime/test/python/onnx_backend_test_series.py
@@ -140,6 +140,9 @@ def create_backend_test(test_name=None):
         if backend.supports_device("OPENVINO_CPU_FP16"):
             current_failing_tests += apply_filters(filters, "current_failing_tests_OPENVINO_CPU_FP16")
 
+        if backend.supports_device("OPENVINO_NPU_FP16"):
+            current_failing_tests += apply_filters(filters, "current_failing_tests_OPENVINO_NPU_FP16")
+
         if backend.supports_device("OPENVINO"):
             current_failing_tests += apply_filters(filters, "current_failing_tests_OPENVINO_opset18")
 
diff --git a/onnxruntime/test/testdata/onnx_backend_test_series_filters.jsonc b/onnxruntime/test/testdata/onnx_backend_test_series_filters.jsonc
index 44db7c0078..c552ec3aea 100644
--- a/onnxruntime/test/testdata/onnx_backend_test_series_filters.jsonc
+++ b/onnxruntime/test/testdata/onnx_backend_test_series_filters.jsonc
@@ -521,6 +521,10 @@
         "test_scan_sum_cpu", // Disabled due to output mismatch with tolerance.
         "test_scan9_sum_cpu" // Disabled due to output mismatch with tolerance.
     ],
+    "current_failing_tests_OPENVINO_NPU_FP16": [
+        "^test_prelu_broadcast",
+        "test_loop11_cpu"
+    ],
     "current_failing_tests_OPENVINO_opset18": [
         // pending opset 18 support, RUNTIME_EXCEPTION : Encountered unknown exception in Initialize()
         "^test_center_crop_pad_crop_axes_chw",
diff --git a/tools/ci_build/build.py b/tools/ci_build/build.py
index 806e536cb4..a992da8ff9 100644
--- a/tools/ci_build/build.py
+++ b/tools/ci_build/build.py
@@ -66,15 +66,13 @@ _check_python_version()
 
 
 def _openvino_verify_device_type(device_read):
-    choices = ["CPU_FP32", "CPU_FP16", "GPU_FP32", "GPU_FP16", "VPUX_FP16", "VPUX_U8"]
+    choices = ["CPU_FP32", "CPU_FP16", "GPU_FP32", "GPU_FP16"]
 
     choices1 = [
         "CPU_FP32_NO_PARTITION",
         "CPU_FP16_NO_PARTITION",
         "GPU_FP32_NO_PARTITION",
         "GPU_FP16_NO_PARTITION",
-        "VPUX_FP16_NO_PARTITION",
-        "VPUX_U8_NO_PARTITION",
     ]
     status_hetero = True
     res = False
@@ -89,7 +87,7 @@ def _openvino_verify_device_type(device_read):
         if len(comma_separated_devices) < 2:
             print("At least two devices required in Hetero/Multi/Auto Mode")
             status_hetero = False
-        dev_options = ["CPU", "GPU", "VPUX"]
+        dev_options = ["CPU", "GPU"]
         for dev in comma_separated_devices:
             if dev not in dev_options:
                 status_hetero = False
@@ -100,7 +98,7 @@ def _openvino_verify_device_type(device_read):
         print("specify the keyword HETERO or MULTI or AUTO followed by the devices ")
         print("in the order of priority you want to build\n")
         print("The different hardware devices that can be added in HETERO or MULTI or AUTO")
-        print("are ['CPU','GPU', 'VPUX'] \n")
+        print("are ['CPU','GPU'] \n")
         print("An example of how to specify the hetero build type. Ex: HETERO:GPU,CPU \n")
         print("An example of how to specify the MULTI build type. Ex: MULTI:GPU,CPU \n")
         print("An example of how to specify the AUTO build type. Ex: AUTO:GPU,CPU \n")
@@ -1158,8 +1156,6 @@ def generate_build_tree(
             "-Donnxruntime_USE_OPENVINO_GPU_FP16=" + ("ON" if args.use_openvino == "GPU_FP16" else "OFF"),
             "-Donnxruntime_USE_OPENVINO_CPU_FP32=" + ("ON" if args.use_openvino == "CPU_FP32" else "OFF"),
             "-Donnxruntime_USE_OPENVINO_CPU_FP16=" + ("ON" if args.use_openvino == "CPU_FP16" else "OFF"),
-            "-Donnxruntime_USE_OPENVINO_VPUX_FP16=" + ("ON" if args.use_openvino == "VPUX_FP16" else "OFF"),
-            "-Donnxruntime_USE_OPENVINO_VPUX_U8=" + ("ON" if args.use_openvino == "VPUX_U8" else "OFF"),
             "-Donnxruntime_USE_OPENVINO_GPU_FP32_NP="
             + ("ON" if args.use_openvino == "GPU_FP32_NO_PARTITION" else "OFF"),
             "-Donnxruntime_USE_OPENVINO_GPU_FP16_NP="
@@ -1168,9 +1164,6 @@ def generate_build_tree(
             + ("ON" if args.use_openvino == "CPU_FP32_NO_PARTITION" else "OFF"),
             "-Donnxruntime_USE_OPENVINO_CPU_FP16_NP="
             + ("ON" if args.use_openvino == "CPU_FP16_NO_PARTITION" else "OFF"),
-            "-Donnxruntime_USE_OPENVINO_VPUX_FP16_NP="
-            + ("ON" if args.use_openvino == "VPUX_FP16_NP_PARTITION" else "OFF"),
-            "-Donnxruntime_USE_OPENVINO_VPUX_U8_NP=" + ("ON" if args.use_openvino == "VPUX_U8_NP_PARTITION" else "OFF"),
             "-Donnxruntime_USE_OPENVINO_HETERO=" + ("ON" if args.use_openvino.startswith("HETERO") else "OFF"),
             "-Donnxruntime_USE_OPENVINO_DEVICE=" + (args.use_openvino),
             "-Donnxruntime_USE_OPENVINO_MULTI=" + ("ON" if args.use_openvino.startswith("MULTI") else "OFF"),
diff --git a/tools/nuget/generate_nuspec_for_native_nuget.py b/tools/nuget/generate_nuspec_for_native_nuget.py
index cc27cdc293..f7b68551b9 100644
--- a/tools/nuget/generate_nuspec_for_native_nuget.py
+++ b/tools/nuget/generate_nuspec_for_native_nuget.py
@@ -552,6 +552,7 @@ def generate_files(line_list, args):
                 files_list.append(
                     "<file src=" + '"' + os.path.join(args.native_build_path, "onnxruntime.pdb") + runtimes + " />"
                 )
+
     else:
         files_list.append(
             "<file src="
@@ -706,25 +707,9 @@ def generate_files(line_list, args):
         )
 
         if is_windows():
-            if "2022" in openvino_path:
-                dll_list_path = os.path.join(openvino_path, "runtime\\bin\\intel64\\Release\\")
-                tbb_list_path = os.path.join(openvino_path, "runtime\\3rdparty\\tbb\\bin\\")
-            else:
-                dll_list_path = os.path.join(
-                    openvino_path, "deployment_tools\\inference_engine\\bin\\intel64\\Release\\"
-                )
-                tbb_list_path = os.path.join(openvino_path, "deployment_tools\\inference_engine\\external\\tbb\\bin\\")
-                ngraph_list_path = os.path.join(openvino_path, "deployment_tools\\ngraph\\lib\\")
-                for ngraph_element in os.listdir(ngraph_list_path):
-                    if ngraph_element.endswith("dll"):
-                        files_list.append(
-                            "<file src="
-                            + '"'
-                            + os.path.join(ngraph_list_path, ngraph_element)
-                            + runtimes_target
-                            + args.target_architecture
-                            + '\\native" />'
-                        )
+            dll_list_path = os.path.join(openvino_path, "runtime\\bin\\intel64\\Release\\")
+            tbb_list_path = os.path.join(openvino_path, "runtime\\3rdparty\\tbb\\bin\\")
+
             for dll_element in os.listdir(dll_list_path):
                 if dll_element.endswith("dll"):
                     files_list.append(
@@ -735,26 +720,7 @@ def generate_files(line_list, args):
                         + args.target_architecture
                         + '\\native" />'
                     )
-            # plugins.xml
-            files_list.append(
-                "<file src="
-                + '"'
-                + os.path.join(dll_list_path, "plugins.xml")
-                + runtimes_target
-                + args.target_architecture
-                + '\\native" />'
-            )
-            # usb-ma2x8x.mvcmd
-            # OpenVINO 2022.3 doesn't have usb-ma2x8x.mvcmd
-            if "2022.3" not in openvino_path:
-                files_list.append(
-                    "<file src="
-                    + '"'
-                    + os.path.join(dll_list_path, "usb-ma2x8x.mvcmd")
-                    + runtimes_target
-                    + args.target_architecture
-                    + '\\native" />'
-                )
+
             for tbb_element in os.listdir(tbb_list_path):
                 if tbb_element.endswith("dll"):
                     files_list.append(