Openvino ep ort 23.1 (#17911)

### Description
Integration to OpenVINO 2023.1


### Motivation and Context

- Alignment with latest OpenVINO Version. 
- Device name change from VPUX to NPU and Remove from supported list
until official public support is available.

---------

Co-authored-by: Sahar Fatima <sfatima.3001@gmail.com>
Co-authored-by: Saurabh Kale <saurabh1.kale@intel.com>
Co-authored-by: Suryaprakash Shanmugam <suryaprakash.shanmugam@intel.com>
Co-authored-by: sfatimar <sahar.fatima@intel.com>
This commit is contained in:
Preetha Veeramalai 2023-11-01 08:39:39 -07:00 committed by GitHub
parent 69f029797d
commit d87216bcb1
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
35 changed files with 563 additions and 357 deletions

View file

@ -1282,14 +1282,6 @@ if (onnxruntime_USE_OPENVINO)
add_definitions(-DOPENVINO_CONFIG_CPU_FP16=1)
endif()
if (onnxruntime_USE_OPENVINO_VPUX_FP16)
add_definitions(-DOPENVINO_CONFIG_VPUX_FP16=1)
endif()
if (onnxruntime_USE_OPENVINO_VPUX_U8)
add_definitions(-DOPENVINO_CONFIG_VPUX_U8=1)
endif()
if (onnxruntime_USE_OPENVINO_GPU_FP32_NP)
add_definitions(-DOPENVINO_CONFIG_GPU_FP32=1)
add_definitions(-DOPENVINO_DISABLE_GRAPH_PARTITION=1)
@ -1310,16 +1302,6 @@ if (onnxruntime_USE_OPENVINO)
add_definitions(-DOPENVINO_DISABLE_GRAPH_PARTITION=1)
endif()
if (onnxruntime_USE_OPENVINO_VPUX_FP32_NP)
add_definitions(-DOPENVINO_CONFIG_VPUX_FP32=1)
add_definitions(-DOPENVINO_DISABLE_GRAPH_PARTITION=1)
endif()
if (onnxruntime_USE_OPENVINO_VPUX_FP16_NP)
add_definitions(-DOPENVINO_CONFIG_VPUX_FP16=1)
add_definitions(-DOPENVINO_DISABLE_GRAPH_PARTITION=1)
endif()
if (onnxruntime_USE_OPENVINO_HETERO)
add_definitions(-DOPENVINO_CONFIG_HETERO=1)
add_definitions(-DDEVICE_NAME="${onnxruntime_USE_OPENVINO_DEVICE}")

View file

@ -7,7 +7,6 @@ OpenVINO™ Execution Provider for ONNX Runtime accelerates inference across man
- Intel® CPUs
- Intel® integrated GPUs
- Intel® discrete GPUs
- Intel® integrated VPUs
Installation
------------
@ -22,7 +21,6 @@ This package supports:
- Intel® CPUs
- Intel® integrated GPUs
- Intel® discrete GPUs
- Intel® integrated VPUs
``pip3 install onnxruntime-openvino``

View file

@ -611,7 +611,7 @@ typedef struct OrtMIGraphXProviderOptions {
typedef struct OrtOpenVINOProviderOptions {
#ifdef __cplusplus
OrtOpenVINOProviderOptions() : device_type{},
enable_vpu_fast_compile{},
enable_npu_fast_compile{},
device_id{},
num_of_threads{},
cache_dir{},
@ -624,7 +624,7 @@ typedef struct OrtOpenVINOProviderOptions {
* Valid settings are one of: "CPU_FP32", "CPU_FP16", "GPU_FP32", "GPU_FP16"
*/
const char* device_type;
unsigned char enable_vpu_fast_compile; ///< 0 = disabled, nonzero = enabled
unsigned char enable_npu_fast_compile; ///< 0 = disabled, nonzero = enabled
const char* device_id;
size_t num_of_threads; ///< 0 = Use default number of threads
const char* cache_dir; // path is set to empty by default

View file

@ -2,9 +2,7 @@
// Licensed under the MIT License
#include <fstream>
#include <vector>
#include <string>
#include <memory>
#include <utility>
#include "core/providers/shared_library/provider_api.h"
#include "contexts.h"
@ -18,7 +16,8 @@ namespace openvino_ep {
static std::unique_ptr<GlobalContext> g_global_context;
GlobalContext& BackendManager::GetGlobalContext() {
// This is not thread safe to call for the first time, but it is first called on the main thread by the constructor so it is safe.
// This is not thread safe to call for the first time,
// but it is first called on the main thread by the constructor so it is safe.
if (!g_global_context)
g_global_context = std::make_unique<GlobalContext>();
return *g_global_context;
@ -88,7 +87,9 @@ BackendManager::BackendManager(const onnxruntime::Node& fused_node,
<< "Backend created for graph " << subgraph_context_.subgraph_name;
}
} else {
LOGS_DEFAULT(INFO) << "[OpenVINO-EP] Model has concrete input dims. Initializing backend for graph " << subgraph_context_.subgraph_name;
LOGS_DEFAULT(INFO) << "[OpenVINO-EP] Model has concrete input dims. "
<< "Initializing backend for graph "
<< subgraph_context_.subgraph_name;
subgraph_context_.has_dynamic_input_shape = false;
try {
@ -104,7 +105,7 @@ BackendManager::BackendManager(const onnxruntime::Node& fused_node,
bool BackendManager::ModelHasBatchedInputs(const ONNX_NAMESPACE::ModelProto& model_proto) const {
bool has_batched_inputs = true;
for (int i = 0; i < (int)subgraph_context_.input_indexes.size(); i++) {
for (int i = 0; i < static_cast<int>(subgraph_context_.input_indexes.size()); i++) {
auto& input = model_proto.graph().input(subgraph_context_.input_indexes[i]);
// Batch-process only raw image inputs (NCHW or NHWC layouts)
@ -215,7 +216,10 @@ BackendManager::ReWriteInputShapeInfo(const ONNX_NAMESPACE::ModelProto& model_pr
auto graph_proto = model_copy->mutable_graph();
for (size_t i = 0, limit = input_shapes.size(); i < limit; i++) {
auto g_in_shape = graph_proto->mutable_input((int)i)->mutable_type()->mutable_tensor_type()->mutable_shape();
auto g_in_shape = graph_proto->mutable_input(static_cast<int>(i))
->mutable_type()
->mutable_tensor_type()
->mutable_shape();
g_in_shape->clear_dim();
const auto& shape = input_shapes[i];
for (size_t dim = 0, end = shape.size(); dim < end; dim++) {
@ -234,7 +238,11 @@ BackendManager::ReWriteBatchDimWithOne(const ONNX_NAMESPACE::ModelProto& model_p
auto graph_proto = model_copy->mutable_graph();
for (int i = 0; i < graph_proto->input_size(); i++) {
ONNX_NAMESPACE::TensorShapeProto* g_in_shape = graph_proto->mutable_input((int)i)->mutable_type()->mutable_tensor_type()->mutable_shape();
ONNX_NAMESPACE::TensorShapeProto* g_in_shape =
graph_proto->mutable_input(static_cast<int>(i))
->mutable_type()
->mutable_tensor_type()
->mutable_shape();
g_in_shape->mutable_dim(0)->clear_dim_value();
g_in_shape->mutable_dim(0)->set_dim_value(1);
}

View file

@ -3,6 +3,11 @@
#pragma once
#include <vector>
#include <map>
#include <memory>
#include <string>
#include "ov_interface.h"
#include "contexts.h"
#include "ibackend.h"
@ -13,7 +18,9 @@ namespace openvino_ep {
// Singleton class that manages all the backends
class BackendManager {
public:
BackendManager(const onnxruntime::Node& fused_node, const onnxruntime::GraphViewer& subgraph, const logging::Logger& logger);
BackendManager(const onnxruntime::Node& fused_node,
const onnxruntime::GraphViewer& subgraph,
const logging::Logger& logger);
void Compute(OrtKernelContext* context);
void ShutdownBackendManager();
static GlobalContext& GetGlobalContext();
@ -21,7 +28,9 @@ class BackendManager {
private:
std::unique_ptr<ONNX_NAMESPACE::ModelProto> GetModelProtoFromFusedNode(
const onnxruntime::Node& fused_node, const onnxruntime::GraphViewer& subgraph, const logging::Logger& logger) const;
const onnxruntime::Node& fused_node,
const onnxruntime::GraphViewer& subgraph,
const logging::Logger& logger) const;
bool ModelHasSymbolicInputDims(const onnxruntime::GraphViewer& subgraph) const;
bool ModelHasBatchedInputs(const ONNX_NAMESPACE::ModelProto& model_proto) const;

View file

@ -1,9 +1,7 @@
// Copyright (C) 2019-2022 Intel Corporation
// Licensed under the MIT License
#include <map>
#include <string>
#include <memory>
#include <algorithm>
#include <sstream>
#include <fstream>
@ -58,7 +56,7 @@ CreateOVModel(const ONNX_NAMESPACE::ModelProto& model_proto, const GlobalContext
try {
auto cnn_network = global_context.ie_core.ReadModel(model);
if ((subgraph_context.precision == "FP16") &&
(global_context.device_type.find("VPUX") == std::string::npos)) {
(global_context.device_type.find("NPU") == std::string::npos)) {
// FP16 transformations
ov::pass::ConvertFP32ToFP16 pass_obj;
pass_obj.run_on_model(cnn_network);
@ -88,7 +86,8 @@ CreateOVModel(const ONNX_NAMESPACE::ModelProto& model_proto, const GlobalContext
size_t index = results.size() - 1;
for (auto it = results.rbegin(); it != results.rend(); ++it) {
if (auto const_node = std::dynamic_pointer_cast<ov::op::v0::Constant>((*it)->input_value(0).get_node_shared_ptr())) {
if (auto const_node =
std::dynamic_pointer_cast<ov::op::v0::Constant>((*it)->input_value(0).get_node_shared_ptr())) {
const_outputs_map[(*it)->get_friendly_name()] = const_node;
results.erase(results.begin() + index);
}
@ -254,7 +253,7 @@ void FillOutputBlob(OVTensorPtr outputBlob, Ort::UnownedValue& output_tensor,
void printPerformanceCounts(const std::vector<OVProfilingInfo>& performanceMap,
std::ostream& stream, std::string deviceName) {
long long totalTime = 0;
int64_t totalTime = 0;
// Print performance counts
stream << std::endl
<< "performance counts:" << std::endl

View file

@ -4,9 +4,15 @@
#pragma once
#define ORT_API_MANUAL_INIT
#include <iomanip>
#include <unordered_map>
#include <map>
#include <memory>
#include <vector>
#include <string>
#include "core/session/onnxruntime_cxx_api.h"
#include "contexts.h"
#include <iomanip>
#include "ov_interface.h"
#ifdef _WIN32
#include <direct.h>
@ -57,7 +63,9 @@ void FillOutputBlob(OVTensorPtr outputBlob, Ort::UnownedValue& output_tensor,
size_t batch_slice_idx);
std::shared_ptr<OVNetwork>
CreateOVModel(const ONNX_NAMESPACE::ModelProto& model_proto, const GlobalContext& global_context, const SubGraphContext& subgraph_context,
CreateOVModel(const ONNX_NAMESPACE::ModelProto& model_proto,
const GlobalContext& global_context,
const SubGraphContext& subgraph_context,
std::map<std::string, std::shared_ptr<ov::Node>>& const_outputs_map);
void printPerformanceCounts(const std::vector<OVProfilingInfo>& performanceMap,

View file

@ -16,7 +16,7 @@ BackendFactory::MakeBackend(const ONNX_NAMESPACE::ModelProto& model_proto,
const SubGraphContext& subgraph_context) {
std::string type = global_context.device_type;
if (type == "CPU" || type.find("GPU") != std::string::npos ||
type.find("VPUX") != std::string::npos ||
type.find("NPU") != std::string::npos ||
type.find("HETERO") != std::string::npos ||
type.find("MULTI") != std::string::npos ||
type.find("AUTO") != std::string::npos) {

View file

@ -6,10 +6,10 @@
#include <memory>
#include <sstream>
#include <fstream>
#include <utility>
#include "core/providers/shared_library/provider_api.h"
#include "../backend_utils.h"
// #include <ngraph/pass/constant_folding.hpp>
#include "basic_backend.h"
#include "../backend_manager.h"
@ -57,33 +57,39 @@ BasicBackend::BasicBackend(const ONNX_NAMESPACE::ModelProto& model_proto,
cl_context ctx = static_cast<cl_context>(global_context_.context);
remote_context_ = new ov::intel_gpu::ocl::ClContext(global_context_.ie_core.Get(), ctx);
ie_cnn_network_ = CreateOVModel(model_proto, global_context_, subgraph_context_, const_outputs_map_);
exe_network_ = global_context_.ie_core.LoadNetwork(ie_cnn_network_, remote_context_, subgraph_context_.subgraph_name);
exe_network_ = global_context_.ie_core.LoadNetwork(
ie_cnn_network_, remote_context_, subgraph_context_.subgraph_name);
LOGS_DEFAULT(INFO) << log_tag << "Loaded model to the plugin";
} else {
ie_cnn_network_ = CreateOVModel(model_proto, global_context_, subgraph_context_, const_outputs_map_);
exe_network_ = global_context_.ie_core.LoadNetwork(ie_cnn_network_, hw_target, device_config, subgraph_context_.subgraph_name);
exe_network_ = global_context_.ie_core.LoadNetwork(
ie_cnn_network_, hw_target, device_config, subgraph_context_.subgraph_name);
LOGS_DEFAULT(INFO) << log_tag << "Loaded model to the plugin";
}
#else
#if defined(OPENVINO_2023_0) || (OPENVINO_2023_1)
if (!subgraph_context_.has_dynamic_input_shape && dev_prec != "CPU_FP16") {
const std::string model = model_proto.SerializeAsString();
exe_network_ = global_context_.ie_core.LoadNetwork(model, hw_target, device_config, subgraph_context_.subgraph_name);
exe_network_ = global_context_.ie_core.LoadNetwork(
model, hw_target, device_config, subgraph_context_.subgraph_name);
LOGS_DEFAULT(INFO) << log_tag << "Loaded model to the plugin";
} else {
ie_cnn_network_ = CreateOVModel(model_proto, global_context_, subgraph_context_, const_outputs_map_);
exe_network_ = global_context_.ie_core.LoadNetwork(ie_cnn_network_, hw_target, device_config, subgraph_context_.subgraph_name);
exe_network_ = global_context_.ie_core.LoadNetwork(
ie_cnn_network_, hw_target, device_config, subgraph_context_.subgraph_name);
LOGS_DEFAULT(INFO) << log_tag << "Loaded model to the plugin";
}
#else
ie_cnn_network_ = CreateOVModel(model_proto, global_context_, subgraph_context_, const_outputs_map_);
exe_network_ = global_context_.ie_core.LoadNetwork(ie_cnn_network_, hw_target, device_config, subgraph_context_.subgraph_name);
exe_network_ = global_context_.ie_core.LoadNetwork(
ie_cnn_network_, hw_target, device_config, subgraph_context_.subgraph_name);
LOGS_DEFAULT(INFO) << log_tag << "Loaded model to the plugin";
#endif
#endif
} else {
ie_cnn_network_ = CreateOVModel(model_proto, global_context_, subgraph_context_, const_outputs_map_);
exe_network_ = global_context_.ie_core.LoadNetwork(ie_cnn_network_, hw_target, device_config, subgraph_context_.subgraph_name);
exe_network_ = global_context_.ie_core.LoadNetwork(
ie_cnn_network_, hw_target, device_config, subgraph_context_.subgraph_name);
LOGS_DEFAULT(INFO) << log_tag << "Loaded model to the plugin";
}
} catch (const char* msg) {
@ -127,10 +133,10 @@ void BasicBackend::PopulateConfigValue(ov::AnyMap& device_config) {
}
#endif
#if defined(OPENVINO_2023_0) || (OPENVINO_2023_1)
if (global_context_.device_type.find("VPUX") != std::string::npos) {
if (global_context_.device_type.find("NPU") != std::string::npos) {
std::pair<std::string, ov::Any> device_property;
device_property = std::make_pair("VPU_COMPILER_TYPE", "MLIR");
device_config.emplace(ov::device::properties("VPUX", device_property));
device_property = std::make_pair("NPU_COMPILER_TYPE", "DRIVER");
device_config.emplace(ov::device::properties("NPU", device_property));
}
#endif
}
@ -152,12 +158,12 @@ void BasicBackend::EnableCaching() {
}
void BasicBackend::EnableGPUThrottling(ov::AnyMap& device_config) {
if (global_context_.enable_opencl_throttling == true && global_context_.device_type.find("GPU") != std::string::npos) {
if (global_context_.enable_opencl_throttling == true &&
global_context_.device_type.find("GPU") != std::string::npos) {
LOGS_DEFAULT(INFO) << log_tag << "Enabled OpenCL queue throttling for GPU device";
std::pair<std::string, ov::Any> device_property;
device_property = std::make_pair("PLUGIN_THROTTLE", "1");
device_config.emplace(ov::device::properties("GPU_CONFIG_KEY", device_property));
// device_config[GPU_CONFIG_KEY(PLUGIN_THROTTLE)] = "1";
}
}
@ -187,7 +193,9 @@ void BasicBackend::StartAsyncInference(Ort::KernelContext& context, OVInferReque
if (input_names.find(onnx_input_name) != input_names.end()) {
input_name = onnx_input_name;
} else {
throw(log_tag + "Input names mismatch between OpenVINO and ONNX. " + onnx_input_name + " doesn't exist in the list of OpenVINO input tensor names");
throw(log_tag +
"Input names mismatch between OpenVINO and ONNX. " + onnx_input_name +
" doesn't exist in the list of OpenVINO input tensor names");
}
size_t batch_slice_idx = 0;
if (subgraph_context_.has_dynamic_input_shape &&
@ -197,6 +205,7 @@ void BasicBackend::StartAsyncInference(Ort::KernelContext& context, OVInferReque
auto tensor_info = tensor.GetTensorTypeAndShapeInfo();
auto tensor_shape = tensor_info.GetShape();
auto tensor_size = tensor_shape.size();
const char* tensor_data = tensor.GetTensorData<char>();
auto tensor_iter = 0;
ov::Shape input_tensor_shape = ov::Shape(tensor_size, 0);
for (auto i = tensor_shape.begin(); i != tensor_shape.end(); ++i) {
@ -204,8 +213,16 @@ void BasicBackend::StartAsyncInference(Ort::KernelContext& context, OVInferReque
tensor_iter += 1;
}
auto input = ie_cnn_network_->get_parameters().at(input_idx);
OVTensorPtr tensor_ptr = std::make_shared<ov::Tensor>(input->get_element_type(), input_tensor_shape);
FillInputBlob(tensor_ptr, batch_slice_idx, input_name, context, subgraph_context_);
OVTensorPtr tensor_ptr;
// avoid input copies on the CPU device
if (global_context_.device_type.find("CPU") != std::string::npos) {
tensor_ptr = std::make_shared<ov::Tensor>(input->get_element_type(), input_tensor_shape,
(void*)tensor_data);
} else {
tensor_ptr = std::make_shared<ov::Tensor>(input->get_element_type(), input_tensor_shape);
FillInputBlob(tensor_ptr, batch_slice_idx, input_name, context, subgraph_context_);
}
try {
infer_request->SetTensor(input_name, tensor_ptr);
} catch (const char* msg) {
@ -251,7 +268,10 @@ void BasicBackend::StartRemoteAsyncInference(Ort::KernelContext& context, OVInfe
if (input_names.find(onnx_input_name) != input_names.end()) {
input_name = onnx_input_name;
} else {
throw(log_tag + "Input names mismatch between OpenVINO and ONNX. " + onnx_input_name + " doesn't exist in the list of OpenVINO input tensor names");
throw(log_tag +
"Input names mismatch between OpenVINO and ONNX. " +
onnx_input_name +
" doesn't exist in the list of OpenVINO input tensor names");
}
input_idx++;
// Kernel Context Input Buffer
@ -264,9 +284,10 @@ void BasicBackend::StartRemoteAsyncInference(Ort::KernelContext& context, OVInfe
const cl::Buffer* shared_buffer_const = static_cast<const cl::Buffer*>(tensor_data);
// Create an Input Remote Blob
auto input = ie_cnn_network_->get_parameters().at(0);
auto remote_blob = remote_context_->create_tensor(input->get_element_type(), input->get_shape(), *shared_buffer_const);
ov::Tensor tensor = static_cast<ov::Tensor>(remote_blob);
OVTensorPtr tensor_ptr = std::make_shared<ov::Tensor>(tensor);
auto remote_blob = remote_context_->create_tensor(
input->get_element_type(), input->get_shape(), *shared_buffer_const);
ov::Tensor tensor_remote = static_cast<ov::Tensor>(remote_blob);
OVTensorPtr tensor_ptr = std::make_shared<ov::Tensor>(tensor_remote);
infer_request->SetTensor(input_name, tensor_ptr);
} else {
OVTensorPtr graph_input_blob;
@ -295,7 +316,10 @@ void BasicBackend::StartRemoteAsyncInference(Ort::KernelContext& context, OVInfe
}
}
if (!output_name_found) {
throw std::string(log_tag + "Output names mismatch between OpenVINO and ONNX. [ONNX Output: ] " + onnx_output_name + " doesn't exist in the list of OpenVINO output tensor names");
throw std::string(
log_tag +
"Output names mismatch between OpenVINO and ONNX. [ONNX Output: ] " +
onnx_output_name + " doesn't exist in the list of OpenVINO output tensor names");
}
size_t batch_size = 1;
@ -307,9 +331,10 @@ void BasicBackend::StartRemoteAsyncInference(Ort::KernelContext& context, OVInfe
const cl::Buffer* shared_buffer_const = static_cast<const cl::Buffer*>(tensor_data);
// Create a shared Blob, set the Infer Request Output Blob
auto output = ie_cnn_network_->get_results().at(0);
auto remote_tensor = remote_context_->create_tensor(output->get_element_type(), output->get_shape(), *shared_buffer_const);
ov::Tensor tensor = static_cast<ov::Tensor>(remote_tensor);
OVTensorPtr tensor_ptr = std::make_shared<ov::Tensor>(tensor);
auto remote_tensor =
remote_context_->create_tensor(output->get_element_type(), output->get_shape(), *shared_buffer_const);
ov::Tensor tensor_t = static_cast<ov::Tensor>(remote_tensor);
OVTensorPtr tensor_ptr = std::make_shared<ov::Tensor>(tensor_t);
try {
infer_request->SetTensor(output_name, tensor_ptr);
} catch (const char* msg) {
@ -364,7 +389,8 @@ void BasicBackend::CompleteAsyncInference(Ort::KernelContext& context, OVInferRe
throw(msg);
}
size_t batch_size = 1;
auto output_tensor = GetOutputTensor(context, batch_size, infer_request, output_name, subgraph_context_.output_names);
auto output_tensor =
GetOutputTensor(context, batch_size, infer_request, output_name, subgraph_context_.output_names);
auto mem_info = output_tensor.GetTensorMemoryInfo();
if (mem_info.GetAllocatorName() == OpenVINO_GPU) {
return;
@ -465,7 +491,8 @@ void BasicBackend::Infer(OrtKernelContext* ctx) {
#ifndef IO_BUFFER_ENABLED // Printing performance counts is disabled when IO_BUFFER_ENABLED
if (openvino_ep::backend_utils::IsDebugEnabled()) {
inferRequestsQueue_->printstatus(); // Printing the elements of infer_requests_ vector pool only in debug mode
std::string& hw_target = (global_context_.device_id != "") ? global_context_.device_id : global_context_.device_type;
std::string& hw_target =
(global_context_.device_id != "") ? global_context_.device_id : global_context_.device_type;
printPerformanceCounts(infer_request, std::cout, hw_target);
}
#endif

View file

@ -6,16 +6,17 @@
#include <memory>
#define ORT_API_MANUAL_INIT
#include "core/session/onnxruntime_cxx_api.h"
#include "core/providers/openvino/contexts.h"
#include "core/providers/openvino/ibackend.h"
#include "core/providers/openvino/ov_interface.h"
#include <vector>
#include <iostream>
#include <string>
#include <condition_variable>
#include <mutex>
#include "core/session/onnxruntime_cxx_api.h"
#include "core/providers/openvino/contexts.h"
#include "core/providers/openvino/ibackend.h"
#include "core/providers/openvino/ov_interface.h"
namespace onnxruntime {
namespace openvino_ep {
@ -29,7 +30,7 @@ class BasicBackend : public IBackend {
void Infer(OrtKernelContext* context) override;
private:
bool ImportBlob(std::string hw_target, bool vpu_status);
bool ImportBlob(std::string hw_target, bool npu_status);
void PopulateCompiledDirectory(std::string, std::string&, std::string&, bool&);
bool ValidateSubgraph(std::map<std::string, std::shared_ptr<ov::Node>>& const_outputs_map);
void PopulateConfigValue(ov::AnyMap& device_config);

View file

@ -3,6 +3,9 @@
#pragma once
#include <vector>
#include <unordered_map>
#include <string>
#include "ov_interface.h"
namespace onnxruntime {
@ -12,7 +15,7 @@ namespace openvino_ep {
struct GlobalContext {
OVCore ie_core;
bool is_wholly_supported_graph = false;
bool enable_vpu_fast_compile = false;
bool enable_npu_fast_compile = false;
bool enable_opencl_throttling = false;
bool enable_dynamic_shapes = false;
size_t num_of_threads;
@ -34,7 +37,7 @@ struct GlobalContext {
struct SubGraphContext {
bool has_dynamic_input_shape = false;
bool enable_batching = false;
bool set_vpu_config = false;
bool set_npu_config = false;
bool is_constant = false;
void* context = 0;
std::string subgraph_name;

View file

@ -17,17 +17,18 @@ OpenVINOExecutionProvider::OpenVINOExecutionProvider(const OpenVINOExecutionProv
openvino_ep::BackendManager::GetGlobalContext().device_type = info.device_type_;
openvino_ep::BackendManager::GetGlobalContext().precision_str = info.precision_;
openvino_ep::BackendManager::GetGlobalContext().enable_vpu_fast_compile = info.enable_vpu_fast_compile_;
openvino_ep::BackendManager::GetGlobalContext().enable_npu_fast_compile = info.enable_npu_fast_compile_;
openvino_ep::BackendManager::GetGlobalContext().cache_dir = info.cache_dir_;
openvino_ep::BackendManager::GetGlobalContext().num_streams = info.num_streams_;
openvino_ep::BackendManager::GetGlobalContext().context = info.context_;
openvino_ep::BackendManager::GetGlobalContext().enable_opencl_throttling = info.enable_opencl_throttling_;
openvino_ep::BackendManager::GetGlobalContext().enable_dynamic_shapes = info.enable_dynamic_shapes_;
if ((int)info.num_of_threads_ <= 0) {
if (static_cast<int>(info.num_of_threads_) <= 0) {
openvino_ep::BackendManager::GetGlobalContext().num_of_threads = 8;
} else if ((int)info.num_of_threads_ > 8) {
std::string err_msg = std::string("\n [ERROR] num_of_threads configured during runtime is: ") + std::to_string(info.num_of_threads_) + "\nnum_of_threads configured should be >0 and <=8.\n";
} else if (static_cast<int>(info.num_of_threads_) > 8) {
std::string err_msg = std::string("\n [ERROR] num_of_threads configured during runtime is: ") +
std::to_string(info.num_of_threads_) + "\nnum_of_threads configured should be >0 and <=8.\n";
ORT_THROW(err_msg);
} else {
openvino_ep::BackendManager::GetGlobalContext().num_of_threads = info.num_of_threads_;
@ -56,7 +57,8 @@ OpenVINOExecutionProvider::OpenVINOExecutionProvider(const OpenVINOExecutionProv
device_found = true;
break;
}
if (info.device_type_.find("VPUX") != std::string::npos && (info.precision_ == "FP16" || info.precision_ == "U8")) {
if ((info.device_type_.find("NPU") != std::string::npos) &&
(info.precision_ == "FP16" || info.precision_ == "U8")) {
device_found = true;
break;
}
@ -109,11 +111,14 @@ OpenVINOExecutionProvider::GetCapability(const GraphViewer& graph_viewer,
openvino_ep::BackendManager::GetGlobalContext().onnx_model_name = graph_viewer.Name();
#ifdef _WIN32
std::wstring onnx_path = graph_viewer.ModelPath().ToPathString();
openvino_ep::BackendManager::GetGlobalContext().onnx_model_path_name = std::string(onnx_path.begin(), onnx_path.end());
openvino_ep::BackendManager::GetGlobalContext().onnx_model_path_name =
std::string(onnx_path.begin(), onnx_path.end());
#else
openvino_ep::BackendManager::GetGlobalContext().onnx_model_path_name = graph_viewer.ModelPath().ToPathString();
openvino_ep::BackendManager::GetGlobalContext().onnx_model_path_name =
graph_viewer.ModelPath().ToPathString();
#endif
openvino_ep::BackendManager::GetGlobalContext().onnx_opset_version = graph_viewer.DomainToVersionMap().at(kOnnxDomain);
openvino_ep::BackendManager::GetGlobalContext().onnx_opset_version =
graph_viewer.DomainToVersionMap().at(kOnnxDomain);
#if defined(OPENVINO_2022_1)
openvino_ep::GetCapability obj(graph_viewer,
@ -151,7 +156,8 @@ common::Status OpenVINOExecutionProvider::Compile(
openvino_ep::BackendManager::GetGlobalContext().use_api_2 = true;
std::shared_ptr<openvino_ep::BackendManager> backend_manager = std::make_shared<openvino_ep::BackendManager>(fused_node, graph_body_viewer, *GetLogger());
std::shared_ptr<openvino_ep::BackendManager> backend_manager =
std::make_shared<openvino_ep::BackendManager>(fused_node, graph_body_viewer, *GetLogger());
compute_info.create_state_func =
[backend_manager](ComputeContext* context, FunctionState* state) {

View file

@ -3,19 +3,28 @@
#pragma once
#include "backend_manager.h"
#include <map>
#include <algorithm>
#include <iostream>
#include <string>
#include <memory>
#include <vector>
#include "backend_manager.h"
namespace onnxruntime {
static void print_build_options() {
std::cout << "[ERROR] INVALID DEVICE BUILD TYPE SPECIFIED" << std::endl;
std::cout << "Specify the keyword HETERO (or) MULTI (or) AUTO followed by the devices in the order of priority you want to build" << std::endl;
std::cout << "The different hardware devices that can be added with HETERO/MULTI/AUTO build ";
std::cout << "are ['CPU','GPU','VPUX']" << std::endl;
std::cout << "An example of how to specify the HETERO or MULTI or AUTO build type. Ex: HETERO:GPU,CPU Ex: MULTI:GPU,CPU Ex: AUTO:GPU,CPU" << std::endl;
std::cout << "Specify the keyword HETERO (or) MULTI (or) AUTO followed by the devices in the order of priority "
<< "you want to build"
<< std::endl;
std::cout << "The different hardware devices that can be added with HETERO/MULTI/AUTO build "
<< "are ['CPU','GPU']"
<< std::endl;
std::cout << "An example of how to specify the HETERO or MULTI or AUTO build type. "
<< "Ex: HETERO:GPU,CPU Ex: MULTI:GPU,CPU Ex: AUTO:GPU,CPU"
<< std::endl;
}
static std::vector<std::string> split(const std::string& s, char delim) {
@ -39,7 +48,7 @@ static std::vector<std::string> parseDevices(const std::string& device_string) {
print_build_options();
ORT_THROW("Invalid device string: " + device_string);
}
std::vector<std::string> dev_options = {"CPU", "GPU", "VPUX"};
std::vector<std::string> dev_options = {"CPU", "GPU"};
for (std::string dev : devices) {
if (!std::count(dev_options.begin(), dev_options.end(), dev)) {
print_build_options();
@ -53,7 +62,7 @@ static std::vector<std::string> parseDevices(const std::string& device_string) {
struct OpenVINOExecutionProviderInfo {
std::string device_type_;
std::string precision_;
bool enable_vpu_fast_compile_;
bool enable_npu_fast_compile_;
std::string device_id_;
size_t num_of_threads_;
std::string cache_dir_;
@ -62,11 +71,18 @@ struct OpenVINOExecutionProviderInfo {
bool enable_opencl_throttling_;
bool enable_dynamic_shapes_;
explicit OpenVINOExecutionProviderInfo(std::string dev_type, bool enable_vpu_fast_compile, std::string dev_id,
explicit OpenVINOExecutionProviderInfo(std::string dev_type, bool enable_npu_fast_compile, std::string dev_id,
size_t num_of_threads, std::string cache_dir, int num_streams,
void* context, bool enable_opencl_throttling,
bool enable_dynamic_shapes)
: enable_vpu_fast_compile_(enable_vpu_fast_compile), device_id_(dev_id), num_of_threads_(num_of_threads), cache_dir_(cache_dir), num_streams_(num_streams), context_(context), enable_opencl_throttling_(enable_opencl_throttling), enable_dynamic_shapes_(enable_dynamic_shapes) {
: enable_npu_fast_compile_(enable_npu_fast_compile),
device_id_(dev_id),
num_of_threads_(num_of_threads),
cache_dir_(cache_dir),
num_streams_(num_streams),
context_(context),
enable_opencl_throttling_(enable_opencl_throttling),
enable_dynamic_shapes_(enable_dynamic_shapes) {
if (dev_type == "") {
LOGS_DEFAULT(INFO) << "[OpenVINO-EP]"
<< "No runtime device selection option provided.";
@ -82,11 +98,11 @@ struct OpenVINOExecutionProviderInfo {
#elif defined OPENVINO_CONFIG_GPU_FP16
device_type_ = "GPU";
precision_ = "FP16";
#elif defined OPENVINO_CONFIG_VPUX_FP16
device_type_ = "VPUX";
#elif defined OPENVINO_CONFIG_NPU_FP16
device_type_ = "NPU";
precision_ = "FP16";
#elif defined OPENVINO_CONFIG_VPUX_U8
device_type_ = "VPUX";
#elif defined OPENVINO_CONFIG_NPU_U8
device_type_ = "NPU";
precision_ = "U8";
#elif defined OPENVINO_CONFIG_HETERO || defined OPENVINO_CONFIG_MULTI || defined OPENVINO_CONFIG_AUTO
#ifdef DEVICE_NAME
@ -126,11 +142,11 @@ struct OpenVINOExecutionProviderInfo {
} else if (dev_type == "GPU.1_FP16") {
device_type_ = "GPU.1";
precision_ = "FP16";
} else if (dev_type == "VPUX_FP16") {
device_type_ = "VPUX";
} else if (dev_type == "NPU_FP16") {
device_type_ = "NPU";
precision_ = "FP16";
} else if (dev_type == "VPUX_U8") {
device_type_ = "VPUX";
} else if (dev_type == "NPU_U8") {
device_type_ = "NPU";
precision_ = "U8";
} else if (dev_type.find("HETERO") == 0 || dev_type.find("MULTI") == 0) {
std::vector<std::string> devices = parseDevices(dev_type);

View file

@ -8,11 +8,16 @@
namespace onnxruntime {
struct OpenVINOProviderFactory : IExecutionProviderFactory {
OpenVINOProviderFactory(const char* device_type, bool enable_vpu_fast_compile,
OpenVINOProviderFactory(const char* device_type, bool enable_npu_fast_compile,
const char* device_id, size_t num_of_threads,
const char* cache_dir, int num_streams, void* context,
bool enable_opencl_throttling, bool enable_dynamic_shapes)
: enable_vpu_fast_compile_(enable_vpu_fast_compile), num_of_threads_(num_of_threads), num_streams_(num_streams), context_(context), enable_opencl_throttling_(enable_opencl_throttling), enable_dynamic_shapes_(enable_dynamic_shapes) {
: enable_npu_fast_compile_(enable_npu_fast_compile),
num_of_threads_(num_of_threads),
num_streams_(num_streams),
context_(context),
enable_opencl_throttling_(enable_opencl_throttling),
enable_dynamic_shapes_(enable_dynamic_shapes) {
device_type_ = (device_type == nullptr) ? "" : device_type;
device_id_ = (device_id == nullptr) ? "" : device_id;
cache_dir_ = (cache_dir == nullptr) ? "" : cache_dir;
@ -24,7 +29,7 @@ struct OpenVINOProviderFactory : IExecutionProviderFactory {
private:
std::string device_type_;
bool enable_vpu_fast_compile_;
bool enable_npu_fast_compile_;
std::string device_id_;
size_t num_of_threads_;
std::string cache_dir_;
@ -35,7 +40,7 @@ struct OpenVINOProviderFactory : IExecutionProviderFactory {
};
std::unique_ptr<IExecutionProvider> OpenVINOProviderFactory::CreateProvider() {
OpenVINOExecutionProviderInfo info(device_type_, enable_vpu_fast_compile_, device_id_, num_of_threads_,
OpenVINOExecutionProviderInfo info(device_type_, enable_npu_fast_compile_, device_id_, num_of_threads_,
cache_dir_, num_streams_, context_, enable_opencl_throttling_,
enable_dynamic_shapes_);
return std::make_unique<OpenVINOExecutionProvider>(info);
@ -59,17 +64,18 @@ struct OpenVINO_Provider : Provider {
std::string device_type = ""; // [device_type]: Overrides the accelerator hardware type and precision
// with these values at runtime.
bool enable_vpu_fast_compile = false; // [enable_vpu_fast_compile]: Fast-compile may be optionally enabled to
// speeds up the model's compilation to VPU device specific format.
bool enable_npu_fast_compile = false; // [enable_npu_fast_compile]: Fast-compile may be optionally enabled to
// speeds up the model's compilation to NPU device specific format.
const char* device_id = ""; // [device_id]: Selects a particular hardware device for inference.
size_t num_of_threads = 8; // [num_of_threads]: Overrides the accelerator default value of number of
int num_of_threads = 8; // [num_of_threads]: Overrides the accelerator default value of number of
// threads with this value at runtime.
const char* cache_dir = ""; // [cache_dir]: specify the path to
// dump and load the blobs for the model caching/kernel caching (GPU)
// feature. If blob files are already present, it will be directly loaded.
int num_streams = 1; // [num_streams]: Option that specifies the number of parallel inference
// requests to be processed on a given `device_type`. Overrides the
// accelerator default value of number of streams with this value at runtime.
// accelerator default value of number of streams
// with this value at runtime.
bool enable_opencl_throttling = false; // [enable_opencl_throttling]: Enables OpenCL queue throttling for GPU
// device (Reduces CPU Utilization when using GPU)
bool enable_dynamic_shapes = false; // [enable_dynamic_shapes]: Enables Dynamic Shapes feature for CPU device)
@ -80,14 +86,15 @@ struct OpenVINO_Provider : Provider {
std::set<std::string> ov_supported_device_types = {"CPU_FP32", "CPU_FP16", "GPU_FP32",
"GPU.0_FP32", "GPU.1_FP32", "GPU_FP16",
"GPU.0_FP16", "GPU.1_FP16",
"VPUX_FP16", "VPUX_U8"};
"GPU.0_FP16", "GPU.1_FP16"};
if (!((ov_supported_device_types.find(device_type) != ov_supported_device_types.end()) ||
(device_type.find("HETERO:") == 0) || (device_type.find("MULTI:") == 0) || (device_type.find("AUTO:") == 0))) {
(device_type.find("HETERO:") == 0) ||
(device_type.find("MULTI:") == 0) ||
(device_type.find("AUTO:") == 0))) {
ORT_THROW(
"[ERROR] [OpenVINO] You have selcted wrong configuration value for the key 'device_type'. "
"Select from 'CPU_FP32', 'CPU_FP16', 'GPU_FP32', 'GPU.0_FP32', 'GPU.1_FP32', 'GPU_FP16', "
"'GPU.0_FP16', 'GPU.1_FP16', 'VPUX_FP16', 'VPUX_U8' or from"
"'GPU.0_FP16', 'GPU.1_FP16' or from"
" HETERO/MULTI/AUTO options available. \n");
}
}
@ -97,30 +104,37 @@ struct OpenVINO_Provider : Provider {
if (provider_options_map.find("cache_dir") != provider_options_map.end()) {
cache_dir = provider_options_map.at("cache_dir").c_str();
}
if (provider_options_map.find("context") != provider_options_map.end()) {
context = (void*)provider_options_map.at("context").c_str();
std::string str = provider_options_map.at("context");
uint64_t number = std::strtoull(str.c_str(), nullptr, 16);
context = reinterpret_cast<void*>(number);
}
if (provider_options_map.find("num_of_threads") != provider_options_map.end()) {
num_of_threads = std::stoi(provider_options_map.at("num_of_threads"));
if (num_of_threads <= 0) {
num_of_threads = 1;
LOGS_DEFAULT(WARNING) << "[OpenVINO-EP] The value for the key 'num_threads' should be in the positive range.\n "
<< "Executing with num_threads=1";
}
}
if (provider_options_map.find("num_streams") != provider_options_map.end()) {
num_streams = std::stoi(provider_options_map.at("num_streams"));
if (num_streams <= 0 && num_streams > 8) {
ORT_THROW("[ERROR] [OpenVINO] The value for the key 'num_streams' should be in the range of 1-8 \n");
if (num_streams <= 0) {
num_streams = 1;
LOGS_DEFAULT(WARNING) << "[OpenVINO-EP] The value for the key 'num_streams' should be in the range of 1-8.\n "
<< "Executing with num_streams=1";
}
}
std::string bool_flag = "";
if (provider_options_map.find("enable_vpu_fast_compile") != provider_options_map.end()) {
bool_flag = provider_options_map.at("enable_vpu_fast_compile");
if (provider_options_map.find("enable_npu_fast_compile") != provider_options_map.end()) {
bool_flag = provider_options_map.at("enable_npu_fast_compile");
if (bool_flag == "true" || bool_flag == "True")
enable_vpu_fast_compile = true;
enable_npu_fast_compile = true;
else if (bool_flag == "false" || bool_flag == "False")
enable_vpu_fast_compile = false;
enable_npu_fast_compile = false;
bool_flag = "";
}
@ -141,7 +155,7 @@ struct OpenVINO_Provider : Provider {
enable_dynamic_shapes = false;
}
return std::make_shared<OpenVINOProviderFactory>(const_cast<char*>(device_type.c_str()),
enable_vpu_fast_compile,
enable_npu_fast_compile,
device_id,
num_of_threads,
cache_dir,
@ -157,7 +171,6 @@ struct OpenVINO_Provider : Provider {
void Shutdown() override {
openvino_ep::BackendManager::ReleaseGlobalContext();
}
} g_provider;
} // namespace onnxruntime

View file

@ -29,7 +29,10 @@ std::shared_ptr<OVNetwork> OVCore::ReadModel(const std::string& model) const {
}
}
OVExeNetwork OVCore::LoadNetwork(std::shared_ptr<OVNetwork>& ie_cnn_network, std::string& hw_target, ov::AnyMap& device_config, std::string name) {
OVExeNetwork OVCore::LoadNetwork(std::shared_ptr<OVNetwork>& ie_cnn_network,
std::string& hw_target,
ov::AnyMap& device_config,
std::string name) {
ov::CompiledModel obj;
try {
obj = oe.compile_model(ie_cnn_network, hw_target, device_config);
@ -43,7 +46,10 @@ OVExeNetwork OVCore::LoadNetwork(std::shared_ptr<OVNetwork>& ie_cnn_network, std
}
#if defined(OPENVINO_2023_0) || (OPENVINO_2023_1)
OVExeNetwork OVCore::LoadNetwork(const std::string& model, std::string& hw_target, ov::AnyMap& device_config, std::string name) {
OVExeNetwork OVCore::LoadNetwork(const std::string& model,
std::string& hw_target,
ov::AnyMap& device_config,
std::string name) {
ov::CompiledModel obj;
try {
obj = oe.compile_model(model, ov::Tensor(), hw_target, device_config);

View file

@ -4,6 +4,7 @@
#pragma once
#include <vector>
#include <memory>
#if defined(OPENVINO_2022_1) || (OPENVINO_2022_2) || (OPENVINO_2022_3) || (OPENVINO_2023_0) || (OPENVINO_2023_1)
#define OV_API_20
@ -43,9 +44,15 @@ class OVCore {
public:
std::shared_ptr<OVNetwork> ReadModel(const std::string& model_stream) const;
OVExeNetwork LoadNetwork(std::shared_ptr<OVNetwork>& ie_cnn_network, std::string& hw_target, ov::AnyMap& device_config, std::string name);
OVExeNetwork LoadNetwork(std::shared_ptr<OVNetwork>& ie_cnn_network,
std::string& hw_target,
ov::AnyMap& device_config,
std::string name);
#if defined(OPENVINO_2023_0) || (OPENVINO_2023_1)
OVExeNetwork LoadNetwork(const std::string& model_stream, std::string& hw_target, ov::AnyMap& device_config, std::string name);
OVExeNetwork LoadNetwork(const std::string& model_stream,
std::string& hw_target,
ov::AnyMap& device_config,
std::string name);
#endif
void SetCache(std::string cache_dir_path);
#ifdef IO_BUFFER_ENABLED
@ -62,7 +69,7 @@ class OVExeNetwork {
ov::CompiledModel obj;
public:
OVExeNetwork(ov::CompiledModel md) { obj = md; }
explicit OVExeNetwork(ov::CompiledModel md) { obj = md; }
OVExeNetwork() { obj = ov::CompiledModel(); }
ov::CompiledModel& Get() { return obj; }
OVInferRequest CreateInferRequest();

View file

@ -3,6 +3,8 @@
#pragma once
#include <vector>
#include <string>
#include <memory>
#include "data_ops.h"
namespace onnxruntime {

View file

@ -24,7 +24,8 @@ namespace openvino_ep {
// Constructor
GetCapability::GetCapability(const GraphViewer& graph_viewer_param, std::string device_type_param,
const std::string version_param) : graph_viewer_(graph_viewer_param), device_type_(device_type_param) {
const std::string version_param)
: graph_viewer_(graph_viewer_param), device_type_(device_type_param) {
if (version_param == "V_2022_1") {
data_ops_ = new DataOps(graph_viewer_, V_2022_1, device_type_);
} else if (version_param == "V_2022_2") {
@ -114,11 +115,11 @@ std::vector<std::unique_ptr<ComputeCapability>> GetCapability::Execute() {
}
openvino_ep::BackendManager::GetGlobalContext().is_wholly_supported_graph = true;
} else { // unsupported_nodes_idx.empty()
} else { // unsupported_nodes_idx.empty()
#if defined(OPENVINO_DISABLE_GRAPH_PARTITION) // disables graph partition at build time
LOGS_DEFAULT(INFO) << "[OpenVINO-EP] DISABLE_GRAPH_PARTITION option is set";
LOGS_DEFAULT(INFO) << "[OpenVINO-EP] Model is not fully supported by OpenVINO, so making the full model fall back to default CPU Execution Provider";
LOGS_DEFAULT(INFO) << "[OpenVINO-EP] Model is not fully supported by OpenVINO, "
<< "so making the full model fall back to default CPU Execution Provider";
return result;
#endif
@ -159,7 +160,13 @@ std::vector<std::unique_ptr<ComputeCapability>> GetCapability::Execute() {
std::vector<std::string> cluster_graph_inputs, cluster_inputs, const_inputs, cluster_outputs;
GetInputsOutputsOfCluster(graph_viewer_, this_cluster, ng_required_initializers, cluster_graph_inputs, cluster_inputs, const_inputs, cluster_outputs);
GetInputsOutputsOfCluster(graph_viewer_,
this_cluster,
ng_required_initializers,
cluster_graph_inputs,
cluster_inputs,
const_inputs,
cluster_outputs);
bool omit_subgraph = false;
// Omitting zero dim subgraphs

View file

@ -2,11 +2,15 @@
// Licensed under the MIT License
#include <unordered_set>
#include <string>
#include <vector>
#include <utility>
#include <map>
#include <set>
#include "core/providers/shared_library/provider_api.h"
#include "../backend_utils.h"
#include "../backend_manager.h"
#include <string>
#include <vector>
#include "data_ops.h"
#include "capabilities.h"
#include "utils.h"
@ -72,269 +76,355 @@ std::set<std::string> ops_supported_as_function = {
std::vector<SupportedOp> supported_op_mode = {
{"Abs", V_2020_4, {"CPU", "GPU"}},
{"Abs", V_2023_0, {"VPUX"}},
{"Abs", V_2023_0, {"NPU"}},
{"Acos", V_2020_4, {"CPU"}},
{"Acos", V_2022_1, {"GPU"}},
{"Acos", V_2023_1, {"NPU"}},
{"Acosh", V_2020_4, {"CPU"}},
{"Acosh", V_2022_1, {"GPU"}},
{"Acosh", V_2023_1, {"NPU"}},
{"Add", V_2020_4, {"CPU", "GPU"}},
{"Add", V_2023_0, {"VPUX"}},
{"Add", V_2023_0, {"NPU"}},
{"And", V_2020_4, {"CPU", "GPU"}},
{"And", V_2023_1, {"NPU"}},
{"ArgMax", V_2020_4, {"CPU"}},
{"ArgMax", V_2021_1, {"GPU"}},
{"ArgMin", V_2020_4, {"CPU"}},
{"ArgMin", V_2022_1, {"GPU"}},
{"Asin", V_2020_4, {"CPU", "GPU"}},
{"Asin", V_2023_1, {"NPU"}},
{"Asinh", V_2020_4, {"CPU", "GPU"}},
{"Asinh", V_2023_1, {"NPU"}},
{"Atan", V_2020_4, {"CPU", "GPU"}},
{"Atan", V_2023_1, {"NPU"}},
{"Atanh", V_2020_4, {"CPU"}},
{"Atanh", V_2022_1, {"GPU"}},
{"Atanh", V_2023_1, {"NPU"}},
{"AveragePool", V_2020_4, {"CPU", "GPU"}},
{"AveragePool", V_2023_0, {"VPUX"}},
{"AveragePool", V_2023_0, {"NPU"}},
{"BatchNormalization", V_2020_4, {"CPU", "GPU"}},
{"BatchNormalization", V_2023_0, {"VPUX"}},
{"BatchNormalization", V_2023_0, {"NPU"}},
{"BitShift", V_2022_1, {"CPU"}},
{"BitShift", V_2023_1, {"NPU"}},
{"Cast", V_2020_4, {"CPU", "GPU"}},
{"Cast", V_2023_0, {"VPUX"}},
{"Cast", V_2023_0, {"NPU"}},
{"CastLike", V_2023_1, {"CPU", "GPU", "NPU"}},
{"Ceil", V_2020_4, {"GPU"}},
{"Ceil", V_2021_4, {"CPU"}},
{"Ceil", V_2023_1, {"NPU"}},
{"Celu", V_2022_1, {"CPU", "GPU"}},
{"Clip", V_2020_4, {"CPU", "GPU"}},
{"Clip", V_2023_0, {"VPUX"}},
{"Clip", V_2023_0, {"NPU"}},
{"Compress", V_2023_1, {"CPU", "GPU"}},
{"Concat", V_2020_4, {"CPU", "GPU"}},
{"Concat", V_2023_0, {"VPUX"}},
{"Concat", V_2023_0, {"NPU"}},
{"Constant", V_2020_4, {"CPU", "GPU"}},
{"Constant", V_2023_0, {"VPUX"}},
{"Constant", V_2023_0, {"NPU"}},
{"ConstantOfShape", V_2020_4, {"CPU", "GPU"}},
{"ConstantOfShape", V_2023_0, {"VPUX"}}, // Gets mapped to broadcast op in the plugin.
{"ConstantOfShape", V_2023_0, {"NPU"}}, // Gets mapped to broadcast op in the plugin.
{"Conv", V_2020_4, {"CPU", "GPU"}},
{"Conv", V_2023_0, {"VPUX"}},
{"Conv", V_2023_0, {"NPU"}},
{"ConvInteger", V_2022_1, {"CPU", "GPU"}},
{"ConvInteger", V_2023_1, {"NPU"}},
{"ConvTranspose", V_2020_4, {"CPU", "GPU"}},
{"ConvTranspose", V_2023_1, {"NPU"}},
{"Cos", V_2020_4, {"CPU"}},
{"Cos", V_2022_1, {"GPU"}},
{"Cos", V_2023_0, {"VPUX"}},
{"Cos", V_2023_0, {"NPU"}},
{"Cosh", V_2020_4, {"CPU"}},
{"Cosh", V_2022_1, {"GPU"}},
{"Cosh", V_2023_1, {"NPU"}},
{"CumSum", V_2022_1, {"CPU", "GPU"}},
{"CumSum", V_2023_0, {"VPUX"}},
{"CumSum", V_2023_0, {"NPU"}},
{"DepthToSpace", V_2020_4, {"CPU", "GPU"}},
{"DepthToSpace", V_2023_0, {"VPUX"}},
{"DepthToSpace", V_2023_0, {"NPU"}},
{"DequantizeLinear", V_2021_4, {"CPU", "GPU"}},
{"DequantizeLinear", V_2023_0, {"VPUX"}},
{"DequantizeLinear", V_2023_0, {"NPU"}},
{"Div", V_2020_4, {"CPU", "GPU"}},
{"Div", V_2023_0, {"VPUX"}},
{"Div", V_2023_0, {"NPU"}},
{"Dropout", V_2020_4, {"CPU", "GPU"}},
{"Dropout", V_2023_0, {"VPUX"}},
{"Dropout", V_2023_0, {"NPU"}},
{"Elu", V_2020_4, {"CPU", "GPU"}},
{"Elu", V_2023_0, {"VPUX"}},
{"Elu", V_2023_0, {"NPU"}},
// {"Einsum", V_2023_0, {"CPU", "GPU"}},
{"Equal", V_2020_4, {"CPU", "GPU"}},
{"Equal", V_2023_0, {"VPUX"}}, // Added for whisper decoder model.
{"Equal", V_2023_0, {"NPU"}}, // Added for whisper decoder model.
{"Erf", V_2020_4, {"CPU", "GPU"}},
{"Erf", V_2023_0, {"VPUX"}},
{"Erf", V_2023_0, {"NPU"}},
{"Exp", V_2020_4, {"CPU", "GPU"}},
{"Exp", V_2023_0, {"VPUX"}},
{"Exp", V_2023_0, {"NPU"}},
{"Expand", V_2022_1, {"CPU", "GPU"}},
{"Expand", V_2023_0, {"VPUX"}}, // Gets mapped to broadcast op and multiply op in the plugin.
{"Expand", V_2023_0, {"NPU"}}, // Gets mapped to broadcast op and multiply op in the plugin.
{"EyeLike", V_2022_1, {"CPU"}},
{"EyeLike", V_2023_0, {"VPUX"}}, // NoOP
{"EyeLike", V_2023_0, {"NPU"}}, // NoOP
{"Flatten", V_2020_4, {"CPU", "GPU"}},
{"Flatten", V_2023_0, {"VPUX"}},
{"Flatten", V_2023_0, {"NPU"}},
{"Floor", V_2020_4, {"CPU", "GPU"}},
{"Floor", V_2023_1, {"NPU"}},
{"Gather", V_2020_4, {"CPU", "GPU"}},
{"Gather", V_2023_0, {"VPUX"}},
{"Gather", V_2023_0, {"NPU"}},
{"GatherElements", V_2022_2, {"CPU", "GPU"}},
{"GatherElements", V_2023_1, {"NPU"}},
{"GatherND", V_2021_4, {"CPU", "GPU"}},
{"GatherND", V_2023_1, {"NPU"}},
{"Gemm", V_2020_4, {"CPU", "GPU"}},
{"Gemm", V_2023_0, {"VPUX"}},
{"Gemm", V_2023_0, {"NPU"}},
{"GlobalAveragePool", V_2020_4, {"CPU", "GPU"}},
{"GlobalAveragePool", V_2023_0, {"VPUX"}},
{"GlobalAveragePool", V_2023_0, {"NPU"}},
{"GlobalLpPool", V_2020_4, {"CPU", "GPU"}},
{"GlobalLpPool", V_2023_1, {"NPU"}},
{"GlobalMaxPool", V_2022_1, {"CPU", "GPU"}},
{"GlobalMaxPool", V_2023_1, {"NPU"}},
{"Greater", V_2020_4, {"CPU", "GPU"}},
{"Greater", V_2023_0, {"VPUX"}},
{"Greater", V_2023_0, {"NPU"}},
{"GreaterOrEqual", V_2022_1, {"CPU", "GPU"}},
{"GreaterOrEqual", V_2023_0, {"VPUX"}},
{"GreaterOrEqual", V_2023_0, {"NPU"}},
{"GridSample", V_2022_3, {"CPU"}},
{"GridSample", V_2023_0, {"GPU"}},
{"GridSample", V_2023_1, {"NPU"}},
{"HardMax", V_2023_1, {"CPU", "GPU", "NPU"}},
{"Identity", V_2020_4, {"CPU", "GPU"}},
{"Identity", V_2023_0, {"VPUX"}}, // NoOP
{"Identity", V_2023_0, {"NPU"}}, // NoOP
{"If", V_2022_3, {"CPU", "GPU"}},
{"If", V_2023_1, {"NPU"}},
{"ImageScaler", V_2022_1, {"CPU", "GPU"}},
{"ImageScaler", V_2023_0, {"VPUX"}},
{"ImageScaler", V_2023_0, {"NPU"}},
{"InstanceNormalization", V_2020_4, {"CPU", "GPU"}},
{"InstanceNormalization", V_2023_0, {"VPUX"}},
{"InstanceNormalization", V_2023_0, {"NPU"}},
{"HardSigmoid", V_2020_4, {"CPU", "GPU"}},
{"HardSigmoid", V_2023_1, {"NPU"}},
{"HardMax", V_2022_1, {"CPU", "GPU"}},
{"LeakyRelu", V_2020_4, {"CPU", "GPU"}},
{"LeakyRelu", V_2023_0, {"VPUX"}},
{"LeakyRelu", V_2023_0, {"NPU"}},
{"Less", V_2020_4, {"CPU", "GPU"}},
{"Less", V_2023_0, {"VPUX"}}, // Added for whisper decoder model.
{"Less", V_2023_0, {"NPU"}}, // Added for whisper decoder model.
{"LessOrEqual", V_2022_1, {"CPU", "GPU"}},
{"LessOrEqual", V_2023_0, {"VPUX"}},
{"LessOrEqual", V_2023_0, {"NPU"}},
{"Log", V_2020_4, {"CPU", "GPU"}},
{"Log", V_2023_0, {"VPUX"}},
{"Log", V_2023_0, {"NPU"}},
{"LogSoftMax", V_2022_1, {"CPU", "GPU"}},
{"Loop", V_2021_4, {"CPU", "GPU"}},
{"LpNormalization", V_2023_1, {"CPU", "GPU", "NPU"}},
{"LpPool", V_2023_1, {"CPU", "GPU", "NPU"}},
{"LRN", V_2020_4, {"CPU", "GPU"}},
{"LRN", V_2023_0, {"VPUX"}},
{"LRN", V_2023_0, {"NPU"}},
{"LSTM", V_2020_4, {"CPU", "GPU"}},
{"LSTM", V_2023_1, {"NPU"}},
{"MatMul", V_2020_4, {"CPU", "GPU"}},
{"MatMul", V_2023_0, {"VPUX"}},
{"MatMul", V_2023_0, {"NPU"}},
{"MatMulInteger", V_2022_1, {"CPU"}},
{"MatMulInteger", V_2023_1, {"NPU"}},
{"Max", V_2020_4, {"CPU", "GPU"}},
{"Max", V_2023_0, {"VPUX"}},
{"Max", V_2023_0, {"NPU"}},
{"MaxPool", V_2020_4, {"CPU", "GPU"}},
{"MaxPool", V_2023_0, {"VPUX"}},
{"MaxPool", V_2023_0, {"NPU"}},
{"Mean", V_2020_4, {"CPU", "GPU"}},
{"Mean", V_2023_0, {"VPUX"}},
{"Mean", V_2023_0, {"NPU"}},
{"MeanVarianceNormalization", V_2022_1, {"CPU", "GPU"}},
{"MeanVarianceNormalization", V_2023_1, {"NPU"}},
{"Min", V_2020_4, {"CPU", "GPU"}},
{"Min", V_2023_0, {"VPUX"}},
{"Min", V_2023_0, {"NPU"}},
{"Mod", V_2022_1, {"CPU", "GPU"}},
{"Mul", V_2020_4, {"CPU", "GPU"}},
{"Mul", V_2023_0, {"VPUX"}},
{"Mul", V_2023_0, {"NPU"}},
{"Neg", V_2020_4, {"CPU", "GPU"}},
{"Neg", V_2023_0, {"VPUX"}},
{"Neg", V_2023_0, {"NPU"}},
{"NonMaxSuppression", V_2021_1, {"CPU", "GPU"}},
{"NonMaxSuppression", V_2023_1, {"NPU"}},
{"NonZero", V_2021_1, {"CPU"}},
{"NonZero", V_2023_0, {"GPU"}},
{"Not", V_2021_1, {"CPU", "GPU"}},
{"Not", V_2020_4, {"CPU", "GPU"}},
{"Not", V_2023_1, {"NPU"}},
{"OneHot", V_2020_4, {"CPU", "GPU"}},
{"OneHot", V_2023_1, {"NPU"}},
{"Or", V_2022_1, {"CPU", "GPU"}},
{"Or", V_2023_1, {"NPU"}},
{"Pad", V_2020_4, {"CPU", "GPU"}},
{"Pad", V_2023_0, {"VPUX"}},
{"Pad", V_2023_0, {"NPU"}},
{"Pow", V_2020_4, {"CPU", "GPU"}},
{"Pow", V_2023_0, {"VPUX"}},
{"Pow", V_2023_0, {"NPU"}},
{"PRelu", V_2020_4, {"CPU", "GPU"}},
{"PRelu", V_2023_0, {"VPUX"}},
{"PRelu", V_2023_0, {"NPU"}},
{"QLinearMatMul", V_2022_3, {"CPU"}},
// {"QLinearMatMul", V_2023_1, {"NPU"}},
{"QuantizeLinear", V_2021_4, {"CPU", "GPU"}},
{"QuantizeLinear", V_2023_0, {"VPUX"}},
{"QuantizeLinear", V_2023_0, {"NPU"}},
{"RNN", V_2023_1, {"CPU", "GPU"}},
{"RandomNormalLike", V_2023_0, {"CPU", "GPU"}},
{"RandomNormalLike", V_2023_0, {"CPU", "GPU"}},
{"RandomNormalLike", V_2023_1, {"NPU"}},
{"RandomNormal", V_2023_0, {"CPU", "GPU"}},
{"RandomNormal", V_2023_1, {"NPU"}},
{"Range", V_2022_1, {"CPU", "GPU"}},
{"Range", V_2023_0, {"VPUX"}},
{"Range", V_2023_0, {"NPU"}},
{"Reciprocal", V_2020_4, {"CPU", "GPU"}},
{"Reciprocal", V_2023_0, {"VPUX"}},
{"Reciprocal", V_2023_0, {"NPU"}},
{"ReduceL1", V_2022_1, {"CPU", "GPU"}},
{"ReduceL1", V_2023_1, {"NPU"}},
{"ReduceL2", V_2022_1, {"CPU", "GPU"}},
{"ReduceL2", V_2023_1, {"NPU"}},
{"ReduceLogSum", V_2020_4, {"CPU"}},
{"ReduceLogSum", V_2022_1, {"CPU", "GPU"}},
{"ReduceLogSum", V_2023_1, {"NPU"}},
{"ReduceLogSumExp", V_2022_1, {"CPU", "GPU"}},
{"ReduceLogSumExp", V_2023_1, {"NPU"}},
{"ReduceMax", V_2020_4, {"CPU", "GPU"}},
{"ReduceMax", V_2023_1, {"NPU"}},
{"ReduceMean", V_2020_4, {"CPU", "GPU"}},
{"ReduceMean", V_2023_0, {"VPUX"}},
{"ReduceMean", V_2023_0, {"NPU"}},
{"ReduceMin", V_2020_4, {"CPU", "GPU"}},
{"ReduceMin", V_2023_1, {"NPU"}},
{"ReduceProd", V_2020_4, {"CPU"}},
{"ReduceProd", V_2022_1, {"GPU"}},
{"ReduceProd", V_2023_1, {"NPU"}},
{"ReduceSum", V_2020_4, {"CPU", "GPU"}},
// {"ReduceSum", V_2023_1, {"NPU"}},
{"ReduceSumSquare", V_2020_4, {"CPU"}},
{"ReduceSumSquare", V_2022_1, {"CPU", "GPU"}},
{"ReduceSumSquare", V_2023_1, {"NPU"}},
{"Relu", V_2020_4, {"CPU", "GPU"}},
{"Relu", V_2023_0, {"VPUX"}},
{"Relu", V_2023_0, {"NPU"}},
{"Resize", V_2020_4, {"CPU"}},
{"Resize", V_2022_1, {"GPU"}},
{"Resize", V_2023_1, {"NPU"}},
{"Reshape", V_2020_4, {"CPU", "GPU"}},
{"Reshape", V_2023_0, {"VPUX"}},
{"Reshape", V_2023_0, {"NPU"}},
{"ReverseSequence", V_2022_1, {"CPU", "GPU"}},
{"RoiAlign", V_2021_1, {"CPU", "GPU"}},
{"RoiAlign", V_2023_1, {"NPU"}},
{"Round", V_2021_4, {"CPU", "GPU"}},
{"Round", V_2023_1, {"NPU"}},
{"Scatter", V_2022_1, {"CPU", "GPU"}},
{"Scatter", V_2023_1, {"NPU"}},
{"ScatterElements", V_2022_1, {"CPU", "GPU"}},
{"ScatterElements", V_2023_1, {"NPU"}},
{"ScatterND", V_2022_1, {"CPU", "GPU"}},
{"ScatterND", V_2023_1, {"NPU"}},
{"Selu", V_2020_4, {"CPU", "GPU"}},
{"Selu", V_2023_1, {"NPU"}},
{"Shape", V_2020_4, {"CPU", "GPU"}},
{"Shape", V_2023_0, {"VPUX"}},
{"Shape", V_2023_0, {"NPU"}},
{"Shrink", V_2022_1, {"CPU", "GPU"}},
{"Shrink", V_2023_0, {"VPUX"}},
{"Shrink", V_2023_0, {"NPU"}},
{"Sigmoid", V_2020_4, {"CPU", "GPU"}},
{"Sigmoid", V_2023_0, {"VPUX"}},
{"Sigmoid", V_2023_0, {"NPU"}},
{"Sign", V_2020_4, {"CPU"}},
{"Sign", V_2022_1, {"GPU"}},
{"Sign", V_2023_0, {"VPUX"}},
{"Sign", V_2023_0, {"NPU"}},
{"Sin", V_2022_1, {"CPU", "GPU"}},
{"Sin", V_2023_0, {"VPUX"}},
{"Sin", V_2023_0, {"NPU"}},
{"Sinh", V_2020_4, {"CPU"}},
{"Sinh", V_2023_1, {"NPU"}},
{"Size", V_2022_1, {"CPU", "GPU"}},
{"Size", V_2023_1, {"NPU"}},
{"Slice", V_2020_4, {"CPU", "GPU"}},
{"Slice", V_2023_0, {"VPUX"}},
{"Slice", V_2023_0, {"NPU"}},
{"Softmax", V_2020_4, {"CPU", "GPU"}},
{"Softmax", V_2023_0, {"VPUX"}},
{"Softmax", V_2023_0, {"NPU"}},
{"Softplus", V_2022_1, {"CPU", "GPU"}},
{"Softplus", V_2023_0, {"VPUX"}},
{"Softplus", V_2023_0, {"NPU"}},
{"Softsign", V_2022_1, {"CPU", "GPU"}},
{"SpaceToDepth", V_2020_4, {"CPU", "GPU"}},
{"SpaceToDepth", V_2023_0, {"VPUX"}},
{"SpaceToDepth", V_2023_0, {"NPU"}},
{"Split", V_2020_4, {"CPU", "GPU"}},
{"Split", V_2023_0, {"VPUX"}},
{"Split", V_2023_0, {"NPU"}},
{"Sqrt", V_2020_4, {"CPU", "GPU"}},
{"Sqrt", V_2023_0, {"VPUX"}},
{"Sqrt", V_2023_0, {"NPU"}},
{"Squeeze", V_2020_4, {"CPU", "GPU"}},
{"Squeeze", V_2023_0, {"VPUX"}},
{"Squeeze", V_2023_0, {"NPU"}},
{"Softsign", V_2020_4, {"CPU"}},
{"Sub", V_2020_4, {"CPU", "GPU"}},
{"Sub", V_2023_0, {"VPUX"}},
{"Sub", V_2023_0, {"NPU"}},
{"Sum", V_2020_4, {"CPU", "GPU"}},
{"Sum", V_2023_0, {"VPUX"}},
{"Sum", V_2023_0, {"NPU"}},
{"Tan", V_2020_4, {"CPU", "GPU"}},
{"Tan", V_2023_1, {"NPU"}},
{"Tanh", V_2020_4, {"CPU", "GPU"}},
{"Tanh", V_2023_0, {"VPUX"}},
{"Tanh", V_2023_0, {"NPU"}},
{"ThresholdedRelu", V_2022_1, {"CPU", "GPU"}},
{"ThresholdedRelu", V_2023_0, {"VPUX"}},
{"ThresholdedRelu", V_2023_0, {"NPU"}},
{"Tile", V_2021_3, {"CPU", "GPU"}},
{"Tile", V_2023_0, {"VPUX"}},
{"Tile", V_2023_0, {"NPU"}},
{"Transpose", V_2020_4, {"CPU", "GPU"}},
{"Transpose", V_2023_0, {"VPUX"}},
{"Transpose", V_2023_0, {"NPU"}},
{"Trilu", V_2023_0, {"CPU", "GPU"}},
{"Trilu", V_2023_1, {"NPU"}},
{"TopK", V_2020_4, {"CPU", "GPU"}},
{"TopK", V_2023_0, {"VPUX"}},
{"TopK", V_2023_0, {"NPU"}},
{"Upsample", V_2020_4, {"CPU", "GPU"}},
{"Unsqueeze", V_2020_4, {"CPU", "GPU"}},
{"Unsqueeze", V_2023_0, {"VPUX"}},
{"Upsample", V_2021_1, {"CPU"}},
{"Upsample", V_2021_4, {"GPU"}},
{"Upsample", V_2023_0, {"VPUX"}},
{"Unsqueeze", V_2023_0, {"NPU"}},
{"Where", V_2022_1, {"CPU", "GPU"}},
{"Where", V_2023_0, {"VPUX"}}, // Added for whisper decoder model.
{"Where", V_2023_0, {"NPU"}}, // Added for whisper decoder model.
{"Xor", V_2022_1, {"CPU", "GPU"}},
{"Xor", V_2023_1, {"NPU"}},
};
void DataOps::populate_types_supported() {
supported_types_initializer_.insert(std::make_pair(V_2020_4, ONNX_NAMESPACE::TensorProto_DataType::TensorProto_DataType_BOOL));
supported_types_initializer_.insert(std::make_pair(V_2020_4, ONNX_NAMESPACE::TensorProto_DataType::TensorProto_DataType_FLOAT));
supported_types_initializer_.insert(std::make_pair(V_2020_4, ONNX_NAMESPACE::TensorProto_DataType::TensorProto_DataType_INT32));
supported_types_initializer_.insert(std::make_pair(V_2020_4, ONNX_NAMESPACE::TensorProto_DataType::TensorProto_DataType_INT64));
supported_types_initializer_.insert(std::make_pair(V_2021_1, ONNX_NAMESPACE::TensorProto_DataType::TensorProto_DataType_FLOAT16));
supported_types_initializer_.insert(std::make_pair(V_2021_4, ONNX_NAMESPACE::TensorProto_DataType::TensorProto_DataType_INT8));
supported_types_initializer_.insert(std::make_pair(V_2021_4, ONNX_NAMESPACE::TensorProto_DataType::TensorProto_DataType_UINT8));
supported_types_initializer_.insert(
std::make_pair(V_2020_4, ONNX_NAMESPACE::TensorProto_DataType::TensorProto_DataType_BOOL));
supported_types_initializer_.insert(
std::make_pair(V_2020_4, ONNX_NAMESPACE::TensorProto_DataType::TensorProto_DataType_FLOAT));
supported_types_initializer_.insert(
std::make_pair(V_2020_4, ONNX_NAMESPACE::TensorProto_DataType::TensorProto_DataType_INT32));
supported_types_initializer_.insert(
std::make_pair(V_2020_4, ONNX_NAMESPACE::TensorProto_DataType::TensorProto_DataType_INT64));
supported_types_initializer_.insert(
std::make_pair(V_2021_1, ONNX_NAMESPACE::TensorProto_DataType::TensorProto_DataType_FLOAT16));
supported_types_initializer_.insert(
std::make_pair(V_2021_4, ONNX_NAMESPACE::TensorProto_DataType::TensorProto_DataType_INT8));
supported_types_initializer_.insert(
std::make_pair(V_2021_4, ONNX_NAMESPACE::TensorProto_DataType::TensorProto_DataType_UINT8));
supported_types_vpu_.insert(std::make_pair(V_2020_4, ONNX_NAMESPACE::TensorProto_DataType::TensorProto_DataType_BOOL));
supported_types_vpu_.insert(std::make_pair(V_2020_4, ONNX_NAMESPACE::TensorProto_DataType::TensorProto_DataType_FLOAT));
supported_types_vpu_.insert(std::make_pair(V_2020_4, ONNX_NAMESPACE::TensorProto_DataType::TensorProto_DataType_UINT8));
supported_types_vpu_.insert(std::make_pair(V_2020_4, ONNX_NAMESPACE::TensorProto_DataType::TensorProto_DataType_INT8));
supported_types_vpu_.insert(std::make_pair(V_2020_4, ONNX_NAMESPACE::TensorProto_DataType::TensorProto_DataType_INT16));
supported_types_vpu_.insert(std::make_pair(V_2020_4, ONNX_NAMESPACE::TensorProto_DataType::TensorProto_DataType_INT32));
supported_types_vpu_.insert(std::make_pair(V_2020_4, ONNX_NAMESPACE::TensorProto_DataType::TensorProto_DataType_INT64));
supported_types_vpu_.insert(std::make_pair(V_2021_1, ONNX_NAMESPACE::TensorProto_DataType::TensorProto_DataType_FLOAT16));
supported_types_npu_.insert(
std::make_pair(V_2020_4, ONNX_NAMESPACE::TensorProto_DataType::TensorProto_DataType_BOOL));
supported_types_npu_.insert(
std::make_pair(V_2020_4, ONNX_NAMESPACE::TensorProto_DataType::TensorProto_DataType_FLOAT));
supported_types_npu_.insert(
std::make_pair(V_2020_4, ONNX_NAMESPACE::TensorProto_DataType::TensorProto_DataType_UINT8));
supported_types_npu_.insert(
std::make_pair(V_2020_4, ONNX_NAMESPACE::TensorProto_DataType::TensorProto_DataType_INT8));
supported_types_npu_.insert(
std::make_pair(V_2020_4, ONNX_NAMESPACE::TensorProto_DataType::TensorProto_DataType_INT16));
supported_types_npu_.insert(
std::make_pair(V_2020_4, ONNX_NAMESPACE::TensorProto_DataType::TensorProto_DataType_INT32));
supported_types_npu_.insert(
std::make_pair(V_2020_4, ONNX_NAMESPACE::TensorProto_DataType::TensorProto_DataType_INT64));
supported_types_npu_.insert(
std::make_pair(V_2021_1, ONNX_NAMESPACE::TensorProto_DataType::TensorProto_DataType_FLOAT16));
supported_types_cpu_.insert(std::make_pair(V_2020_4, ONNX_NAMESPACE::TensorProto_DataType::TensorProto_DataType_BOOL));
supported_types_cpu_.insert(std::make_pair(V_2020_4, ONNX_NAMESPACE::TensorProto_DataType::TensorProto_DataType_FLOAT));
supported_types_cpu_.insert(std::make_pair(V_2020_4, ONNX_NAMESPACE::TensorProto_DataType::TensorProto_DataType_INT32));
supported_types_cpu_.insert(std::make_pair(V_2020_4, ONNX_NAMESPACE::TensorProto_DataType::TensorProto_DataType_INT16));
supported_types_cpu_.insert(std::make_pair(V_2020_4, ONNX_NAMESPACE::TensorProto_DataType::TensorProto_DataType_INT8));
supported_types_cpu_.insert(std::make_pair(V_2020_4, ONNX_NAMESPACE::TensorProto_DataType::TensorProto_DataType_UINT8));
supported_types_cpu_.insert(std::make_pair(V_2020_4, ONNX_NAMESPACE::TensorProto_DataType::TensorProto_DataType_INT64));
supported_types_cpu_.insert(std::make_pair(V_2022_2, ONNX_NAMESPACE::TensorProto_DataType::TensorProto_DataType_FLOAT16));
supported_types_cpu_.insert(
std::make_pair(V_2020_4, ONNX_NAMESPACE::TensorProto_DataType::TensorProto_DataType_BOOL));
supported_types_cpu_.insert(
std::make_pair(V_2020_4, ONNX_NAMESPACE::TensorProto_DataType::TensorProto_DataType_FLOAT));
supported_types_cpu_.insert(
std::make_pair(V_2020_4, ONNX_NAMESPACE::TensorProto_DataType::TensorProto_DataType_INT32));
supported_types_cpu_.insert(
std::make_pair(V_2020_4, ONNX_NAMESPACE::TensorProto_DataType::TensorProto_DataType_INT16));
supported_types_cpu_.insert(
std::make_pair(V_2020_4, ONNX_NAMESPACE::TensorProto_DataType::TensorProto_DataType_INT8));
supported_types_cpu_.insert(
std::make_pair(V_2020_4, ONNX_NAMESPACE::TensorProto_DataType::TensorProto_DataType_UINT8));
supported_types_cpu_.insert(
std::make_pair(V_2020_4, ONNX_NAMESPACE::TensorProto_DataType::TensorProto_DataType_INT64));
supported_types_cpu_.insert(
std::make_pair(V_2022_2, ONNX_NAMESPACE::TensorProto_DataType::TensorProto_DataType_FLOAT16));
supported_types_gpu_.insert(std::make_pair(V_2020_4, ONNX_NAMESPACE::TensorProto_DataType::TensorProto_DataType_FLOAT));
supported_types_gpu_.insert(std::make_pair(V_2020_4, ONNX_NAMESPACE::TensorProto_DataType::TensorProto_DataType_INT32));
supported_types_gpu_.insert(std::make_pair(V_2020_4, ONNX_NAMESPACE::TensorProto_DataType::TensorProto_DataType_INT64));
supported_types_gpu_.insert(std::make_pair(V_2021_1, ONNX_NAMESPACE::TensorProto_DataType::TensorProto_DataType_FLOAT16));
supported_types_gpu_.insert(std::make_pair(V_2021_4, ONNX_NAMESPACE::TensorProto_DataType::TensorProto_DataType_INT8));
supported_types_gpu_.insert(std::make_pair(V_2021_4, ONNX_NAMESPACE::TensorProto_DataType::TensorProto_DataType_UINT8));
supported_types_gpu_.insert(std::make_pair(V_2022_1, ONNX_NAMESPACE::TensorProto_DataType::TensorProto_DataType_BOOL));
supported_types_gpu_.insert(
std::make_pair(V_2020_4, ONNX_NAMESPACE::TensorProto_DataType::TensorProto_DataType_FLOAT));
supported_types_gpu_.insert(
std::make_pair(V_2020_4, ONNX_NAMESPACE::TensorProto_DataType::TensorProto_DataType_INT32));
supported_types_gpu_.insert(
std::make_pair(V_2020_4, ONNX_NAMESPACE::TensorProto_DataType::TensorProto_DataType_INT64));
supported_types_gpu_.insert(
std::make_pair(V_2021_1, ONNX_NAMESPACE::TensorProto_DataType::TensorProto_DataType_FLOAT16));
supported_types_gpu_.insert(
std::make_pair(V_2021_4, ONNX_NAMESPACE::TensorProto_DataType::TensorProto_DataType_INT8));
supported_types_gpu_.insert(
std::make_pair(V_2021_4, ONNX_NAMESPACE::TensorProto_DataType::TensorProto_DataType_UINT8));
supported_types_gpu_.insert(
std::make_pair(V_2022_1, ONNX_NAMESPACE::TensorProto_DataType::TensorProto_DataType_BOOL));
}
void DataOps::populate_op_mode_supported() {
@ -349,10 +439,10 @@ void DataOps::populate_op_mode_supported() {
no_dimension_supported_.push_back({"Equal", V_2023_0, {"GPU"}});
no_dimension_supported_.push_back({"Floor", V_2020_4, {"All"}});
no_dimension_supported_.push_back({"Gather", V_2020_4, {"All"}});
no_dimension_supported_.push_back({"Greater", V_2023_0, {"VPUX"}});
no_dimension_supported_.push_back({"Greater", V_2023_0, {"NPU"}});
no_dimension_supported_.push_back({"Less", V_2022_1, {"CPU"}});
no_dimension_supported_.push_back({"Loop", V_2021_4, {"All"}});
no_dimension_supported_.push_back({"Max", V_2023_0, {"VPUX"}});
no_dimension_supported_.push_back({"Max", V_2023_0, {"NPU"}});
no_dimension_supported_.push_back({"Min", V_2020_4, {"All"}});
no_dimension_supported_.push_back({"Mul", V_2020_4, {"All"}});
no_dimension_supported_.push_back({"QuantizeLinear", V_2021_4, {"All"}});
@ -382,11 +472,14 @@ void DataOps::populate_op_mode_supported() {
{
UnsupportedOpMode obj = {{V_2022_1, V_2022_2, V_2022_3},
[this](const Node* node, const InitializedTensorSet&) {
// Abs is not supproted with INT8 or INT32 as input data type on GPU
if (device_id_.find("GPU") != std::string::npos) {
// Abs is not supproted with INT8 or INT32 as input data type on GPU and NPU
if ((device_id_.find("GPU") != std::string::npos) ||
(device_id_.find("NPU") != std::string::npos)) {
for (size_t i = 0; i < node->InputDefs().size(); i++) {
if (node->InputDefs()[i]->TypeAsProto()->tensor_type().elem_type() == ONNX_NAMESPACE::TensorProto_DataType::TensorProto_DataType_INT8 ||
node->InputDefs()[i]->TypeAsProto()->tensor_type().elem_type() == ONNX_NAMESPACE::TensorProto_DataType::TensorProto_DataType_INT32)
if (node->InputDefs()[i]->TypeAsProto()->tensor_type().elem_type() ==
ONNX_NAMESPACE::TensorProto_DataType::TensorProto_DataType_INT8 ||
node->InputDefs()[i]->TypeAsProto()->tensor_type().elem_type() ==
ONNX_NAMESPACE::TensorProto_DataType::TensorProto_DataType_INT32)
return true;
}
}
@ -399,11 +492,14 @@ void DataOps::populate_op_mode_supported() {
[this](const Node* node, const InitializedTensorSet&) {
// tensor type does not support select last index
auto& attributes = node->GetAttributes();
auto last_index_arg = attributes.count("select_last_index") > 0 ? attributes.at("select_last_index").i() : 0;
auto last_index_arg =
attributes.count("select_last_index") > 0 ? attributes.at("select_last_index").i()
: 0;
if (last_index_arg != 0)
return true;
// tensor type supports float as input for argmax and argmin
if (node->InputDefs()[0]->TypeAsProto()->tensor_type().elem_type() != ONNX_NAMESPACE::TensorProto_DataType::TensorProto_DataType_FLOAT)
if (node->InputDefs()[0]->TypeAsProto()->tensor_type().elem_type() !=
ONNX_NAMESPACE::TensorProto_DataType::TensorProto_DataType_FLOAT)
return true;
return false;
}};
@ -415,7 +511,8 @@ void DataOps::populate_op_mode_supported() {
[this](const Node* node, const InitializedTensorSet&) {
if (device_id_.find("GPU") != std::string::npos) {
// int64 data type is not supported on GPU
const bool data_is_int64 = node->InputDefs()[0]->Type()->find("int64") != std::string::npos;
const bool data_is_int64 =
node->InputDefs()[0]->Type()->find("int64") != std::string::npos;
return data_is_int64;
}
return false;
@ -506,9 +603,12 @@ void DataOps::populate_op_mode_supported() {
if (device_id_.find("GPU") != std::string::npos) {
auto x_data_type = node->InputDefs()[0]->TypeAsProto()->tensor_type().elem_type();
auto y_data_type = node->InputDefs()[1]->TypeAsProto()->tensor_type().elem_type();
// currently both inputs with int32 are not supported and also both input datatypes should be same
const bool A_is_int32 = node->InputDefs()[0]->Type()->find("int32") != std::string::npos;
const bool B_is_int32 = node->InputDefs()[1]->Type()->find("int32") != std::string::npos;
// currently both inputs with int32 are not supported
// and also both input datatypes should be same
const bool A_is_int32 =
node->InputDefs()[0]->Type()->find("int32") != std::string::npos;
const bool B_is_int32 =
node->InputDefs()[1]->Type()->find("int32") != std::string::npos;
if ((A_is_int32 && B_is_int32) || (x_data_type != y_data_type))
return true;
}
@ -589,11 +689,13 @@ void DataOps::populate_op_mode_supported() {
if (device_id_.find("GPU") != std::string::npos) {
auto slope = node->InputDefs()[1];
// PRelu slope has to be an initializer or needs to come from a constant node
if (initializers.count(slope->Name()))
if (initializers.count(slope->Name())) {
return false;
else {
for (auto input_node = node->InputNodesBegin(); input_node != node->InputNodesEnd(); ++input_node) {
if (GetInputCount(this->graph_viewer_.GetNode((*input_node).Index()), initializers) == 0)
} else {
for (auto input_node = node->InputNodesBegin();
input_node != node->InputNodesEnd(); ++input_node) {
if (GetInputCount(
this->graph_viewer_.GetNode((*input_node).Index()), initializers) == 0)
return false;
}
}
@ -603,12 +705,12 @@ void DataOps::populate_op_mode_supported() {
op_list_.insert({"PRelu", obj});
}
{
UnsupportedOpMode obj = {{V_2022_1, V_2022_2, V_2022_3, V_2023_0},
UnsupportedOpMode obj = {{V_2022_1, V_2022_2, V_2022_3, V_2023_0, V_2023_1},
[this](const Node* node, const InitializedTensorSet&) {
const auto& input_arg = node->InputDefs()[1];
auto shape = input_arg->Shape();
// Reshape op with empty dim is Rejected for Myriad
//[TODO] Is this condition required anymore with Myriad removed?
// [TODO] Is this condition required anymore with Myriad removed?
if (shape != nullptr) {
for (const auto& dim : input_arg->Shape()->dim()) {
if (utils::HasDimValue(dim) && dim.dim_value() == 0)
@ -638,7 +740,8 @@ void DataOps::populate_op_mode_supported() {
if (device_id_.find("GPU") != std::string::npos) {
// INT32 dataype is not supported as input
for (size_t i = 0; i < node->InputDefs().size(); i++) {
if (node->InputDefs()[i]->TypeAsProto()->tensor_type().elem_type() == ONNX_NAMESPACE::TensorProto_DataType::TensorProto_DataType_INT32)
if (node->InputDefs()[i]->TypeAsProto()->tensor_type().elem_type() ==
ONNX_NAMESPACE::TensorProto_DataType::TensorProto_DataType_INT32)
return true;
}
}
@ -650,9 +753,11 @@ void DataOps::populate_op_mode_supported() {
UnsupportedOpMode obj = {{V_2022_1, V_2022_2, V_2022_3},
[this](const Node* node, const InitializedTensorSet&) {
if (device_id_.find("GPU") != std::string::npos) {
auto output_data_type = node->OutputDefs()[0]->TypeAsProto()->tensor_type().elem_type();
auto output_data_type =
node->OutputDefs()[0]->TypeAsProto()->tensor_type().elem_type();
// If the output of ScatterND op is BOOL, it is rejected for GPU.
if (output_data_type == ONNX_NAMESPACE::TensorProto_DataType::TensorProto_DataType_BOOL)
if (output_data_type ==
ONNX_NAMESPACE::TensorProto_DataType::TensorProto_DataType_BOOL)
return true;
}
return false;
@ -666,7 +771,8 @@ void DataOps::populate_op_mode_supported() {
[this](const Node* node, const InitializedTensorSet&) {
// If the Input of Shrink op is UINT8, it is rejected (Due to output mismatch)
for (size_t i = 0; i < node->InputDefs().size(); i++) {
if (node->InputDefs()[i]->TypeAsProto()->tensor_type().elem_type() == ONNX_NAMESPACE::TensorProto_DataType::TensorProto_DataType_UINT8)
if (node->InputDefs()[i]->TypeAsProto()->tensor_type().elem_type() ==
ONNX_NAMESPACE::TensorProto_DataType::TensorProto_DataType_UINT8)
return true;
}
return false;
@ -714,10 +820,11 @@ void DataOps::populate_op_mode_supported() {
op_list_.insert({"Squeeze", obj});
}
{
UnsupportedOpMode obj = {{V_2022_1, V_2022_2, V_2022_3, V_2023_0},
UnsupportedOpMode obj = {{V_2022_1, V_2022_2, V_2022_3, V_2023_0, V_2023_1},
[this](const Node* node, const InitializedTensorSet&) {
// If the operator is unsqueeze
// If axes is an input, then we cannot produce a static graph. Conversion fails in convert_function_to_cnn_network.
// If axes is an input, then we cannot produce a static graph.
// Conversion fails in convert_function_to_cnn_network.
for (size_t i = 0; i < node->InputDefs().size(); i++) {
if (node->InputDefs()[i]->Name() == "axes") {
return true;
@ -728,14 +835,15 @@ void DataOps::populate_op_mode_supported() {
op_list_.insert({"Unsqueeze", obj});
}
{
UnsupportedOpMode obj = {{V_2022_1, V_2022_2, V_2022_3, V_2023_0},
UnsupportedOpMode obj = {{V_2022_1, V_2022_2, V_2022_3, V_2023_0, V_2023_1},
[this](const Node* node, const InitializedTensorSet&) {
// check for attributes
auto& upsample_attr = node->GetAttributes();
if (upsample_attr.count("scales") > 0) {
auto& upsample_arg = upsample_attr.at("scales");
auto float_size = upsample_arg.floats_size();
if (float_size > 2 && (upsample_arg.floats(0) != 1.f || upsample_arg.floats(1) != 1.f)) {
if (float_size > 2 &&
(upsample_arg.floats(0) != 1.f || upsample_arg.floats(1) != 1.f)) {
return true;
}
}
@ -750,9 +858,12 @@ void DataOps::populate_op_mode_supported() {
}
}
// x_arg supports only float, int8 and float16 type
if ((x_arg->TypeAsProto()->tensor_type().elem_type() == ONNX_NAMESPACE::TensorProto_DataType::TensorProto_DataType_FLOAT) ||
(x_arg->TypeAsProto()->tensor_type().elem_type() == ONNX_NAMESPACE::TensorProto_DataType::TensorProto_DataType_INT8) ||
(x_arg->TypeAsProto()->tensor_type().elem_type() == ONNX_NAMESPACE::TensorProto_DataType::TensorProto_DataType_FLOAT16)) {
if ((x_arg->TypeAsProto()->tensor_type().elem_type() ==
ONNX_NAMESPACE::TensorProto_DataType::TensorProto_DataType_FLOAT) ||
(x_arg->TypeAsProto()->tensor_type().elem_type() ==
ONNX_NAMESPACE::TensorProto_DataType::TensorProto_DataType_INT8) ||
(x_arg->TypeAsProto()->tensor_type().elem_type() ==
ONNX_NAMESPACE::TensorProto_DataType::TensorProto_DataType_FLOAT16)) {
return false;
} else {
return true;
@ -849,9 +960,9 @@ bool DataOps::type_is_supported(const NodeArg* node_arg, bool is_initializer) {
} else {
auto dtype = type_proto->tensor_type().elem_type();
if (device_id_.find("VPUX") != std::string::npos || device_id_.find("HETERO") != std::string::npos ||
if (device_id_.find("NPU") != std::string::npos || device_id_.find("HETERO") != std::string::npos ||
device_id_.find("MULTI") != std::string::npos || device_id_.find("AUTO") != std::string::npos) {
for (auto const& var : supported_types_vpu_) {
for (auto const& var : supported_types_npu_) {
if ((var.first <= version_id_) &&
(var.second == dtype)) {
return true;
@ -1079,7 +1190,9 @@ bool DataOps::node_is_supported(const std::map<std::string, std::set<std::string
if (opset->second.find(optype) == opset->second.end() && op_fun == ops_supported_as_function.end()) {
#ifndef NDEBUG
if (openvino_ep::backend_utils::IsDebugEnabled()) {
std::cout << "The operator is not available in OpenVINO ngraph operators list nor the operator is a special ONNX function" << std::endl;
std::cout << "The operator is not available in OpenVINO ngraph operators list"
<< "nor the operator is a special ONNX function"
<< std::endl;
}
#endif
return false;
@ -1095,10 +1208,12 @@ std::vector<NodeIndex> DataOps::GetUnsupportedNodeIndices(std::unordered_set<std
for (const auto& node_idx : graph_viewer_.GetNodesInTopologicalOrder()) {
if (node_is_supported(ng_supported_ops, node_idx)) {
// Collect inputs that are initializers
graph_viewer_.GetNode(node_idx)->ForEachDef([&ng_required_initializers, this](const NodeArg& node_arg, bool is_input) {
if(is_input && this->graph_viewer_.GetAllInitializedTensors().count(node_arg.Name())) {
graph_viewer_.GetNode(node_idx)->ForEachDef([&ng_required_initializers, this](const NodeArg& node_arg,
bool is_input) {
if (is_input && this->graph_viewer_.GetAllInitializedTensors().count(node_arg.Name())) {
ng_required_initializers.insert(node_arg.Name());
} }, true);
} },
true);
} else {
unsupported_nodes_idx.push_back(node_idx);
}
@ -1110,7 +1225,8 @@ bool DataOps::IsOpSupportedOnlyInModel(std::string name) {
return ops_supported_only_in_model.find(name) != ops_supported_only_in_model.end();
}
bool DataOps::SpecialConditionForClusterSizeOne(std::unordered_set<std::string>& ng_required_initializers, const Node* node) {
bool DataOps::SpecialConditionForClusterSizeOne(std::unordered_set<std::string>& ng_required_initializers,
const Node* node) {
if (node->OpType() == "Reshape") {
const auto& shape_arg = node->InputDefs()[1];
if (ng_required_initializers.find(shape_arg->Name()) == ng_required_initializers.end()) {
@ -1119,15 +1235,20 @@ bool DataOps::SpecialConditionForClusterSizeOne(std::unordered_set<std::string>&
} else if (node->OpType() == "Expand") {
// nGraph only supports constant shape input values
const auto& output = node->OutputDefs()[0];
if (output->TypeAsProto()->tensor_type().elem_type() != ONNX_NAMESPACE::TensorProto_DataType::TensorProto_DataType_FLOAT16)
if (output->TypeAsProto()->tensor_type().elem_type() !=
ONNX_NAMESPACE::TensorProto_DataType::TensorProto_DataType_FLOAT16)
return true;
} else if (node->OpType() == "RoiAlign") {
using onnx_dtype = ONNX_NAMESPACE::TensorProto_DataType;
onnx_dtype input_0_data_type = (ONNX_NAMESPACE::TensorProto_DataType)node->InputDefs()[0]->TypeAsProto()->tensor_type().elem_type();
onnx_dtype input_1_data_type = (ONNX_NAMESPACE::TensorProto_DataType)node->InputDefs()[1]->TypeAsProto()->tensor_type().elem_type();
onnx_dtype input_2_data_type = (ONNX_NAMESPACE::TensorProto_DataType)node->InputDefs()[2]->TypeAsProto()->tensor_type().elem_type();
onnx_dtype output_data_type = (ONNX_NAMESPACE::TensorProto_DataType)node->OutputDefs()[0]->TypeAsProto()->tensor_type().elem_type();
onnx_dtype input_0_data_type =
(ONNX_NAMESPACE::TensorProto_DataType)node->InputDefs()[0]->TypeAsProto()->tensor_type().elem_type();
onnx_dtype input_1_data_type =
(ONNX_NAMESPACE::TensorProto_DataType)node->InputDefs()[1]->TypeAsProto()->tensor_type().elem_type();
onnx_dtype input_2_data_type =
(ONNX_NAMESPACE::TensorProto_DataType)node->InputDefs()[2]->TypeAsProto()->tensor_type().elem_type();
onnx_dtype output_data_type =
(ONNX_NAMESPACE::TensorProto_DataType)node->OutputDefs()[0]->TypeAsProto()->tensor_type().elem_type();
if ((input_0_data_type != onnx_dtype::TensorProto_DataType_FLOAT16) ||
(input_1_data_type != onnx_dtype::TensorProto_DataType_FLOAT16) ||

View file

@ -3,6 +3,11 @@
#pragma once
#include <unordered_set>
#include <utility>
#include <map>
#include <set>
#include <vector>
#include <string>
namespace onnxruntime {
namespace openvino_ep {
@ -47,7 +52,7 @@ class DataOps {
std::multimap<std::string, UnsupportedOpMode> op_list_;
std::vector<SupportedOp> subgraph_supported_;
std::vector<SupportedOp> no_dimension_supported_;
std::set<Pairs> supported_types_vpu_;
std::set<Pairs> supported_types_npu_;
std::set<Pairs> supported_types_cpu_;
std::set<Pairs> supported_types_gpu_;
std::set<Pairs> supported_types_initializer_;
@ -64,14 +69,16 @@ class DataOps {
const NodeIndex node_idx);
public:
DataOps(const GraphViewer& graph_viewer_param, VersionNum ver, std::string dev_id) : graph_viewer_(graph_viewer_param), version_id_(ver), device_id_(dev_id) {
DataOps(const GraphViewer& graph_viewer_param, VersionNum ver, std::string dev_id)
: graph_viewer_(graph_viewer_param), version_id_(ver), device_id_(dev_id) {
populate_op_mode_supported();
populate_types_supported();
}
virtual std::vector<NodeIndex> GetUnsupportedNodeIndices(std::unordered_set<std::string>& ng_required_initializers);
virtual bool IsOpSupportedOnlyInModel(std::string name);
virtual bool SpecialConditionForClusterSizeOne(std::unordered_set<std::string>& ng_required_initializers, const Node* node);
virtual bool SpecialConditionForClusterSizeOne(
std::unordered_set<std::string>& ng_required_initializers, const Node* node);
virtual bool DoNotOmitSubGraph(const std::string& name);
virtual bool InsertNode(const std::string& name);
VersionNum GetVersion() const { return version_id_; }

View file

@ -2,6 +2,7 @@
// Licensed under the MIT License
#include "core/providers/shared_library/provider_api.h"
#include "utils.h"
#if defined(_MSC_VER)
#pragma warning(disable : 4244 4245 5208)
@ -113,7 +114,8 @@ std::map<std::string, std::set<std::string>> GetNgSupportedOps(const int onnx_op
* supported_cluster + (UNsupported_node + rest_of_the_graph). This functions returns vector of all supported_clusters by nGraph
*/
std::vector<std::vector<NodeIndex>>
GetPartitionedClusters(const std::vector<NodeIndex>& topological_order, const std::vector<NodeIndex>& unsupported_nodes) {
GetPartitionedClusters(const std::vector<NodeIndex>& topological_order,
const std::vector<NodeIndex>& unsupported_nodes) {
std::vector<std::vector<NodeIndex>> ng_clusters;
auto prev = topological_order.begin();
@ -140,7 +142,10 @@ GetPartitionedClusters(const std::vector<NodeIndex>& topological_order, const st
return ng_clusters;
}
void IdentifyConnectedNodes(const GraphViewer& graph_viewer, NodeIndex curr_node_index, std::vector<NodeIndex>& cluster, std::vector<NodeIndex>& sub_cluster) {
void IdentifyConnectedNodes(const GraphViewer& graph_viewer,
NodeIndex curr_node_index,
std::vector<NodeIndex>& cluster,
std::vector<NodeIndex>& sub_cluster) {
if (std::find(cluster.begin(), cluster.end(), curr_node_index) == cluster.end())
return;
@ -205,7 +210,8 @@ void GetInputsOutputsOfCluster(const GraphViewer& graph_viewer,
const auto& ext_node = graph_viewer.GetNode((*it).Index());
if (std::find(cluster.begin(), cluster.end(), ext_node->Index()) == cluster.end()) {
// Node is external to this_cluster. Search through its inputs to find the output that is generated by this_cluster.
// Node is external to this_cluster. Search through its inputs to
// find the output that is generated by this_cluster.
std::set<std::string> ext_node_inputs;
ext_node->ForEachDef(
[&ext_node_inputs](const NodeArg& arg, bool is_input) {

View file

@ -1,5 +1,15 @@
// Copyright (C) 2019-2022 Intel Corporation
// Licensed under the MIT License
#pragma once
#include <memory>
#include <map>
#include <utility>
#include <vector>
#include <set>
#include <algorithm>
#include <string>
#include <unordered_set>
namespace onnxruntime {
namespace openvino_ep {
@ -18,9 +28,14 @@ int GetOnnxOpSet(const GraphViewer& graph_viewer);
std::map<std::string, std::set<std::string>> GetNgSupportedOps(const int onnx_opset);
std::vector<std::vector<NodeIndex>>
GetPartitionedClusters(const std::vector<NodeIndex>& topological_order, const std::vector<NodeIndex>& unsupported_nodes);
GetPartitionedClusters(
const std::vector<NodeIndex>& topological_order, const std::vector<NodeIndex>& unsupported_nodes);
void IdentifyConnectedNodes(const GraphViewer& graph_viewer, NodeIndex curr_node_index, std::vector<NodeIndex>& cluster, std::vector<NodeIndex>& sub_cluster);
void IdentifyConnectedNodes(
const GraphViewer& graph_viewer,
NodeIndex curr_node_index,
std::vector<NodeIndex>& cluster,
std::vector<NodeIndex>& sub_cluster);
std::vector<std::vector<NodeIndex>>
GetConnectedClusters(const GraphViewer& graph_viewer, const std::vector<std::vector<NodeIndex>>& clusters);

View file

@ -1432,7 +1432,7 @@ ProviderOptions OrtOpenVINOProviderOptionsToOrtOpenVINOProviderOptionsV2(const O
if (legacy_ov_options->device_type != nullptr)
ov_options_converted_map["device_type"] = legacy_ov_options->device_type;
ov_options_converted_map["enable_vpu_fast_compile"] = legacy_ov_options->enable_vpu_fast_compile;
ov_options_converted_map["enable_npu_fast_compile"] = legacy_ov_options->enable_npu_fast_compile;
if (legacy_ov_options->device_id != nullptr)
ov_options_converted_map["device_id"] = legacy_ov_options->device_id;

View file

@ -813,10 +813,10 @@ std::unique_ptr<IExecutionProvider> CreateExecutionProviderInstance(
if (option.first == "device_type") {
OV_provider_options_map[option.first] = option.second;
continue;
} else if (option.first == "enable_vpu_fast_compile") {
} else if (option.first == "enable_npu_fast_compile") {
if (!(option.second == "True" || option.second == "true" ||
option.second == "False" || option.second == "false")) {
ORT_THROW("Invalid value passed for enable_vpu_fast_compile: ", option.second);
ORT_THROW("Invalid value passed for enable_npu_fast_compile: ", option.second);
}
OV_provider_options_map[option.first] = option.second;
} else if (option.first == "enable_opencl_throttling") {

View file

@ -60,11 +60,11 @@ struct OrtStatus {
#elif OPENVINO_CONFIG_GPU_FP16
#define BACKEND_OPENVINO "-OPENVINO_GPU_FP16"
#elif OPENVINO_CONFIG_VPUX_FP16
#define BACKEND_OPENVINO "-OPENVINO_VPUX_FP16"
#elif OPENVINO_CONFIG_NPU_FP16
#define BACKEND_OPENVINO "-OPENVINO_NPU_FP16"
#elif OPENVINO_CONFIG_VPUX_U8
#define BACKEND_OPENVINO "-OPENVINO_VPUX_U8"
#elif OPENVINO_CONFIG_NPU_U8
#define BACKEND_OPENVINO "-OPENVINO_NPU_U8"
#elif OPENVINO_CONFIG_MULTI
#define BACKEND_OPENVINO "-OPENVINO_MULTI"

View file

@ -60,7 +60,7 @@ namespace perftest {
"\t-i: Specify EP specific runtime options as key value pairs. Different runtime options available are: \n"
"\t [OpenVINO only] [device_type]: Overrides the accelerator hardware type and precision with these values at runtime.\n"
"\t [OpenVINO only] [device_id]: Selects a particular hardware device for inference.\n"
"\t [OpenVINO only] [enable_vpu_fast_compile]: Optionally enabled to speeds up the model's compilation on VPU device targets.\n"
"\t [OpenVINO only] [enable_npu_fast_compile]: Optionally enabled to speeds up the model's compilation on NPU device targets.\n"
"\t [OpenVINO only] [num_of_threads]: Overrides the accelerator hardware type and precision with these values at runtime.\n"
"\t [OpenVINO only] [cache_dir]: Explicitly specify the path to dump and load the blobs(Model caching) or cl_cache (Kernel Caching) files feature. If blob files are already present, it will be directly loaded.\n"
"\t [OpenVINO only] [enable_opencl_throttling]: Enables OpenCL queue throttling for GPU device(Reduces the CPU Utilization while using GPU) \n"
@ -72,7 +72,7 @@ namespace perftest {
"\t [QNN only] [htp_performance_mode]: QNN performance mode, options: 'burst', 'balanced', 'default', 'high_performance', \n"
"\t 'high_power_saver', 'low_balanced', 'low_power_saver', 'power_saver', 'sustained_high_performance'. Default to 'default'. \n"
"\t [Usage]: -e <provider_name> -i '<key1>|<value1> <key2>|<value2>'\n\n"
"\t [Example] [For OpenVINO EP] -e openvino -i \"device_type|CPU_FP32 enable_vpu_fast_compile|true num_of_threads|5 enable_opencl_throttling|true cache_dir|\"<path>\"\"\n"
"\t [Example] [For OpenVINO EP] -e openvino -i \"device_type|CPU_FP32 enable_npu_fast_compile|true num_of_threads|5 enable_opencl_throttling|true cache_dir|\"<path>\"\"\n"
"\t [Example] [For QNN EP] -e qnn -i \"backend_path|/folderpath/libQnnCpu.so\" \n\n"
"\t [TensorRT only] [trt_max_partition_iterations]: Maximum iterations for TensorRT parser to get capability.\n"
"\t [TensorRT only] [trt_min_subgraph_size]: Minimum size of TensorRT subgraphs.\n"

View file

@ -240,8 +240,7 @@ OnnxRuntimeTestSession::OnnxRuntimeTestSession(Ort::Env& env, std::random_device
if (key == "device_type") {
std::set<std::string> ov_supported_device_types = {"CPU_FP32", "CPU_FP16", "GPU_FP32",
"GPU.0_FP32", "GPU.1_FP32", "GPU_FP16",
"GPU.0_FP16", "GPU.1_FP16",
"VPUX_FP16", "VPUX_U8"};
"GPU.0_FP16", "GPU.1_FP16"};
if (ov_supported_device_types.find(value) != ov_supported_device_types.end()) {
ov_options[key] = value;
} else if (value.find("HETERO:") == 0) {
@ -254,17 +253,17 @@ OnnxRuntimeTestSession::OnnxRuntimeTestSession(Ort::Env& env, std::random_device
ORT_THROW(
"[ERROR] [OpenVINO] You have selcted wrong configuration value for the key 'device_type'. "
"Select from 'CPU_FP32', 'CPU_FP16', 'GPU_FP32', 'GPU.0_FP32', 'GPU.1_FP32', 'GPU_FP16', "
"'GPU.0_FP16', 'GPU.1_FP16', 'VPUX_FP16', 'VPUX_U8' or from"
"'GPU.0_FP16', 'GPU.1_FP16' or from"
" HETERO/MULTI/AUTO options available. \n");
}
} else if (key == "device_id") {
ov_options[key] = value;
} else if (key == "enable_vpu_fast_compile") {
} else if (key == "enable_npu_fast_compile") {
if (value == "true" || value == "True" ||
value == "false" || value == "False") {
ov_options[key] = value;
} else {
ORT_THROW("[ERROR] [OpenVINO] The value for the key 'enable_vpu_fast_compile' should be a boolean i.e. true or false. Default value is false.\n");
ORT_THROW("[ERROR] [OpenVINO] The value for the key 'enable_npu_fast_compile' should be a boolean i.e. true or false. Default value is false.\n");
}
} else if (key == "enable_opencl_throttling") {
if (value == "true" || value == "True" ||
@ -299,7 +298,7 @@ OnnxRuntimeTestSession::OnnxRuntimeTestSession(Ort::Env& env, std::random_device
ov_options[key] = value;
}
} else {
ORT_THROW("[ERROR] [OpenVINO] wrong key type entered. Choose from the following runtime key options that are available for OpenVINO. ['device_type', 'device_id', 'enable_vpu_fast_compile', 'num_of_threads', 'cache_dir', 'num_streams', 'enable_opencl_throttling|true'] \n");
ORT_THROW("[ERROR] [OpenVINO] wrong key type entered. Choose from the following runtime key options that are available for OpenVINO. ['device_type', 'device_id', 'enable_npu_fast_compile', 'num_of_threads', 'cache_dir', 'num_streams', 'enable_opencl_throttling|true'] \n");
}
}
session_options.AppendExecutionProvider("OpenVINO", ov_options);

View file

@ -143,7 +143,7 @@ void L1NormalizationWithZeroNorm() {
vector<T> expected_output = {0.5f, 0.5f, 0.f, 0.f};
test.AddOutput<T>("Y", input_dims, expected_output);
test.Run();
test.Run(OpTester::ExpectResult::kExpectSuccess, "", {kOpenVINOExecutionProvider});
}
TEST(LpNormalizationTest, L1NormalizationWithZeroNorm) {
@ -163,7 +163,7 @@ void L2NormalizationWithZeroNorm() {
vector<T> expected_output = {1.f, 0.f, 0.f, 0.f};
test.AddOutput<T>("Y", input_dims, expected_output);
test.Run();
test.Run(OpTester::ExpectResult::kExpectSuccess, "", {kOpenVINOExecutionProvider});
}
TEST(LpNormalizationTest, L2NormalizationWithZeroNorm) {

View file

@ -762,7 +762,7 @@ TEST(RNNTest, RNN_invalid_sequence_lens) {
test.AddOutput<float>("Y_h", Y_h_dims, Y_h_data);
// the CUDA RNN version allows the invalid sequence lengths, so disable testing on CUDA and TensorRT
test.Run(OpTester::ExpectResult::kExpectFailure, error_msg, {kCudaExecutionProvider, kTensorrtExecutionProvider});
test.Run(OpTester::ExpectResult::kExpectFailure, error_msg, {kCudaExecutionProvider, kTensorrtExecutionProvider, kOpenVINOExecutionProvider});
};
// should batch batch_size to be valid
@ -860,7 +860,7 @@ TEST(RNNTest, RNN_bidirectional_with_sequence_lens) {
test.AddOutput<float>("Y_h", Y_h_dims, Y_h_data);
test.Run(OpTester::ExpectResult::kExpectSuccess, "", {kCudaExecutionProvider, kTensorrtExecutionProvider});
test.Run(OpTester::ExpectResult::kExpectSuccess, "", {kCudaExecutionProvider, kTensorrtExecutionProvider, kOpenVINOExecutionProvider});
}
TEST(RNNTest, RNN_with_invalid_activation_load_failure) {

View file

@ -66,7 +66,7 @@ TEST(CompressTest, Compress_3dims_has_extra_condition) {
// has condition length = 3 > input_dim[axis] = 2
test.AddInput<bool>("condition", {3}, {0, 1, 1});
test.AddOutput<float>("output", {2, 1, 3}, {4.0f, 5.0f, 6.0f, 10.0f, 11.0f, 12.0f});
test.Run();
test.Run(OpTester::ExpectResult::kExpectSuccess, "", {kOpenVINOExecutionProvider});
}
TEST(CompressTest, Compress_3dims_has_extra_input) {

View file

@ -99,7 +99,7 @@ TEST(TensorOpTest, Unsqueeze_scalar_2) {
test.AddInput<float>("input", {}, std::vector<float>{1.0f});
test.AddInput<int64_t>("axes", {2}, std::vector<int64_t>{0, -1}, axes_is_initializer);
test.AddOutput<float>("output", {1, 1}, std::vector<float>{1.0f});
test.Run();
test.Run(OpTester::ExpectResult::kExpectSuccess, "", {kOpenVINOExecutionProvider});
};
run_test(false);
run_test(true);

View file

@ -140,6 +140,9 @@ def create_backend_test(test_name=None):
if backend.supports_device("OPENVINO_CPU_FP16"):
current_failing_tests += apply_filters(filters, "current_failing_tests_OPENVINO_CPU_FP16")
if backend.supports_device("OPENVINO_NPU_FP16"):
current_failing_tests += apply_filters(filters, "current_failing_tests_OPENVINO_NPU_FP16")
if backend.supports_device("OPENVINO"):
current_failing_tests += apply_filters(filters, "current_failing_tests_OPENVINO_opset18")

View file

@ -521,6 +521,10 @@
"test_scan_sum_cpu", // Disabled due to output mismatch with tolerance.
"test_scan9_sum_cpu" // Disabled due to output mismatch with tolerance.
],
"current_failing_tests_OPENVINO_NPU_FP16": [
"^test_prelu_broadcast",
"test_loop11_cpu"
],
"current_failing_tests_OPENVINO_opset18": [
// pending opset 18 support, RUNTIME_EXCEPTION : Encountered unknown exception in Initialize()
"^test_center_crop_pad_crop_axes_chw",

View file

@ -66,15 +66,13 @@ _check_python_version()
def _openvino_verify_device_type(device_read):
choices = ["CPU_FP32", "CPU_FP16", "GPU_FP32", "GPU_FP16", "VPUX_FP16", "VPUX_U8"]
choices = ["CPU_FP32", "CPU_FP16", "GPU_FP32", "GPU_FP16"]
choices1 = [
"CPU_FP32_NO_PARTITION",
"CPU_FP16_NO_PARTITION",
"GPU_FP32_NO_PARTITION",
"GPU_FP16_NO_PARTITION",
"VPUX_FP16_NO_PARTITION",
"VPUX_U8_NO_PARTITION",
]
status_hetero = True
res = False
@ -89,7 +87,7 @@ def _openvino_verify_device_type(device_read):
if len(comma_separated_devices) < 2:
print("At least two devices required in Hetero/Multi/Auto Mode")
status_hetero = False
dev_options = ["CPU", "GPU", "VPUX"]
dev_options = ["CPU", "GPU"]
for dev in comma_separated_devices:
if dev not in dev_options:
status_hetero = False
@ -100,7 +98,7 @@ def _openvino_verify_device_type(device_read):
print("specify the keyword HETERO or MULTI or AUTO followed by the devices ")
print("in the order of priority you want to build\n")
print("The different hardware devices that can be added in HETERO or MULTI or AUTO")
print("are ['CPU','GPU', 'VPUX'] \n")
print("are ['CPU','GPU'] \n")
print("An example of how to specify the hetero build type. Ex: HETERO:GPU,CPU \n")
print("An example of how to specify the MULTI build type. Ex: MULTI:GPU,CPU \n")
print("An example of how to specify the AUTO build type. Ex: AUTO:GPU,CPU \n")
@ -1158,8 +1156,6 @@ def generate_build_tree(
"-Donnxruntime_USE_OPENVINO_GPU_FP16=" + ("ON" if args.use_openvino == "GPU_FP16" else "OFF"),
"-Donnxruntime_USE_OPENVINO_CPU_FP32=" + ("ON" if args.use_openvino == "CPU_FP32" else "OFF"),
"-Donnxruntime_USE_OPENVINO_CPU_FP16=" + ("ON" if args.use_openvino == "CPU_FP16" else "OFF"),
"-Donnxruntime_USE_OPENVINO_VPUX_FP16=" + ("ON" if args.use_openvino == "VPUX_FP16" else "OFF"),
"-Donnxruntime_USE_OPENVINO_VPUX_U8=" + ("ON" if args.use_openvino == "VPUX_U8" else "OFF"),
"-Donnxruntime_USE_OPENVINO_GPU_FP32_NP="
+ ("ON" if args.use_openvino == "GPU_FP32_NO_PARTITION" else "OFF"),
"-Donnxruntime_USE_OPENVINO_GPU_FP16_NP="
@ -1168,9 +1164,6 @@ def generate_build_tree(
+ ("ON" if args.use_openvino == "CPU_FP32_NO_PARTITION" else "OFF"),
"-Donnxruntime_USE_OPENVINO_CPU_FP16_NP="
+ ("ON" if args.use_openvino == "CPU_FP16_NO_PARTITION" else "OFF"),
"-Donnxruntime_USE_OPENVINO_VPUX_FP16_NP="
+ ("ON" if args.use_openvino == "VPUX_FP16_NP_PARTITION" else "OFF"),
"-Donnxruntime_USE_OPENVINO_VPUX_U8_NP=" + ("ON" if args.use_openvino == "VPUX_U8_NP_PARTITION" else "OFF"),
"-Donnxruntime_USE_OPENVINO_HETERO=" + ("ON" if args.use_openvino.startswith("HETERO") else "OFF"),
"-Donnxruntime_USE_OPENVINO_DEVICE=" + (args.use_openvino),
"-Donnxruntime_USE_OPENVINO_MULTI=" + ("ON" if args.use_openvino.startswith("MULTI") else "OFF"),

View file

@ -552,6 +552,7 @@ def generate_files(line_list, args):
files_list.append(
"<file src=" + '"' + os.path.join(args.native_build_path, "onnxruntime.pdb") + runtimes + " />"
)
else:
files_list.append(
"<file src="
@ -706,25 +707,9 @@ def generate_files(line_list, args):
)
if is_windows():
if "2022" in openvino_path:
dll_list_path = os.path.join(openvino_path, "runtime\\bin\\intel64\\Release\\")
tbb_list_path = os.path.join(openvino_path, "runtime\\3rdparty\\tbb\\bin\\")
else:
dll_list_path = os.path.join(
openvino_path, "deployment_tools\\inference_engine\\bin\\intel64\\Release\\"
)
tbb_list_path = os.path.join(openvino_path, "deployment_tools\\inference_engine\\external\\tbb\\bin\\")
ngraph_list_path = os.path.join(openvino_path, "deployment_tools\\ngraph\\lib\\")
for ngraph_element in os.listdir(ngraph_list_path):
if ngraph_element.endswith("dll"):
files_list.append(
"<file src="
+ '"'
+ os.path.join(ngraph_list_path, ngraph_element)
+ runtimes_target
+ args.target_architecture
+ '\\native" />'
)
dll_list_path = os.path.join(openvino_path, "runtime\\bin\\intel64\\Release\\")
tbb_list_path = os.path.join(openvino_path, "runtime\\3rdparty\\tbb\\bin\\")
for dll_element in os.listdir(dll_list_path):
if dll_element.endswith("dll"):
files_list.append(
@ -735,26 +720,7 @@ def generate_files(line_list, args):
+ args.target_architecture
+ '\\native" />'
)
# plugins.xml
files_list.append(
"<file src="
+ '"'
+ os.path.join(dll_list_path, "plugins.xml")
+ runtimes_target
+ args.target_architecture
+ '\\native" />'
)
# usb-ma2x8x.mvcmd
# OpenVINO 2022.3 doesn't have usb-ma2x8x.mvcmd
if "2022.3" not in openvino_path:
files_list.append(
"<file src="
+ '"'
+ os.path.join(dll_list_path, "usb-ma2x8x.mvcmd")
+ runtimes_target
+ args.target_architecture
+ '\\native" />'
)
for tbb_element in os.listdir(tbb_list_path):
if tbb_element.endswith("dll"):
files_list.append(