mirror of
https://github.com/saymrwulf/onnxruntime.git
synced 2026-06-06 00:03:22 +00:00
Openvino ep ort 23.1 (#17911)
### Description Integration to OpenVINO 2023.1 ### Motivation and Context - Alignment with latest OpenVINO Version. - Device name change from VPUX to NPU and Remove from supported list until official public support is available. --------- Co-authored-by: Sahar Fatima <sfatima.3001@gmail.com> Co-authored-by: Saurabh Kale <saurabh1.kale@intel.com> Co-authored-by: Suryaprakash Shanmugam <suryaprakash.shanmugam@intel.com> Co-authored-by: sfatimar <sahar.fatima@intel.com>
This commit is contained in:
parent
69f029797d
commit
d87216bcb1
35 changed files with 563 additions and 357 deletions
|
|
@ -1282,14 +1282,6 @@ if (onnxruntime_USE_OPENVINO)
|
|||
add_definitions(-DOPENVINO_CONFIG_CPU_FP16=1)
|
||||
endif()
|
||||
|
||||
if (onnxruntime_USE_OPENVINO_VPUX_FP16)
|
||||
add_definitions(-DOPENVINO_CONFIG_VPUX_FP16=1)
|
||||
endif()
|
||||
|
||||
if (onnxruntime_USE_OPENVINO_VPUX_U8)
|
||||
add_definitions(-DOPENVINO_CONFIG_VPUX_U8=1)
|
||||
endif()
|
||||
|
||||
if (onnxruntime_USE_OPENVINO_GPU_FP32_NP)
|
||||
add_definitions(-DOPENVINO_CONFIG_GPU_FP32=1)
|
||||
add_definitions(-DOPENVINO_DISABLE_GRAPH_PARTITION=1)
|
||||
|
|
@ -1310,16 +1302,6 @@ if (onnxruntime_USE_OPENVINO)
|
|||
add_definitions(-DOPENVINO_DISABLE_GRAPH_PARTITION=1)
|
||||
endif()
|
||||
|
||||
if (onnxruntime_USE_OPENVINO_VPUX_FP32_NP)
|
||||
add_definitions(-DOPENVINO_CONFIG_VPUX_FP32=1)
|
||||
add_definitions(-DOPENVINO_DISABLE_GRAPH_PARTITION=1)
|
||||
endif()
|
||||
|
||||
if (onnxruntime_USE_OPENVINO_VPUX_FP16_NP)
|
||||
add_definitions(-DOPENVINO_CONFIG_VPUX_FP16=1)
|
||||
add_definitions(-DOPENVINO_DISABLE_GRAPH_PARTITION=1)
|
||||
endif()
|
||||
|
||||
if (onnxruntime_USE_OPENVINO_HETERO)
|
||||
add_definitions(-DOPENVINO_CONFIG_HETERO=1)
|
||||
add_definitions(-DDEVICE_NAME="${onnxruntime_USE_OPENVINO_DEVICE}")
|
||||
|
|
|
|||
|
|
@ -7,7 +7,6 @@ OpenVINO™ Execution Provider for ONNX Runtime accelerates inference across man
|
|||
- Intel® CPUs
|
||||
- Intel® integrated GPUs
|
||||
- Intel® discrete GPUs
|
||||
- Intel® integrated VPUs
|
||||
|
||||
Installation
|
||||
------------
|
||||
|
|
@ -22,7 +21,6 @@ This package supports:
|
|||
- Intel® CPUs
|
||||
- Intel® integrated GPUs
|
||||
- Intel® discrete GPUs
|
||||
- Intel® integrated VPUs
|
||||
|
||||
``pip3 install onnxruntime-openvino``
|
||||
|
||||
|
|
|
|||
|
|
@ -611,7 +611,7 @@ typedef struct OrtMIGraphXProviderOptions {
|
|||
typedef struct OrtOpenVINOProviderOptions {
|
||||
#ifdef __cplusplus
|
||||
OrtOpenVINOProviderOptions() : device_type{},
|
||||
enable_vpu_fast_compile{},
|
||||
enable_npu_fast_compile{},
|
||||
device_id{},
|
||||
num_of_threads{},
|
||||
cache_dir{},
|
||||
|
|
@ -624,7 +624,7 @@ typedef struct OrtOpenVINOProviderOptions {
|
|||
* Valid settings are one of: "CPU_FP32", "CPU_FP16", "GPU_FP32", "GPU_FP16"
|
||||
*/
|
||||
const char* device_type;
|
||||
unsigned char enable_vpu_fast_compile; ///< 0 = disabled, nonzero = enabled
|
||||
unsigned char enable_npu_fast_compile; ///< 0 = disabled, nonzero = enabled
|
||||
const char* device_id;
|
||||
size_t num_of_threads; ///< 0 = Use default number of threads
|
||||
const char* cache_dir; // path is set to empty by default
|
||||
|
|
|
|||
|
|
@ -2,9 +2,7 @@
|
|||
// Licensed under the MIT License
|
||||
|
||||
#include <fstream>
|
||||
#include <vector>
|
||||
#include <string>
|
||||
#include <memory>
|
||||
#include <utility>
|
||||
|
||||
#include "core/providers/shared_library/provider_api.h"
|
||||
#include "contexts.h"
|
||||
|
|
@ -18,7 +16,8 @@ namespace openvino_ep {
|
|||
static std::unique_ptr<GlobalContext> g_global_context;
|
||||
|
||||
GlobalContext& BackendManager::GetGlobalContext() {
|
||||
// This is not thread safe to call for the first time, but it is first called on the main thread by the constructor so it is safe.
|
||||
// This is not thread safe to call for the first time,
|
||||
// but it is first called on the main thread by the constructor so it is safe.
|
||||
if (!g_global_context)
|
||||
g_global_context = std::make_unique<GlobalContext>();
|
||||
return *g_global_context;
|
||||
|
|
@ -88,7 +87,9 @@ BackendManager::BackendManager(const onnxruntime::Node& fused_node,
|
|||
<< "Backend created for graph " << subgraph_context_.subgraph_name;
|
||||
}
|
||||
} else {
|
||||
LOGS_DEFAULT(INFO) << "[OpenVINO-EP] Model has concrete input dims. Initializing backend for graph " << subgraph_context_.subgraph_name;
|
||||
LOGS_DEFAULT(INFO) << "[OpenVINO-EP] Model has concrete input dims. "
|
||||
<< "Initializing backend for graph "
|
||||
<< subgraph_context_.subgraph_name;
|
||||
|
||||
subgraph_context_.has_dynamic_input_shape = false;
|
||||
try {
|
||||
|
|
@ -104,7 +105,7 @@ BackendManager::BackendManager(const onnxruntime::Node& fused_node,
|
|||
bool BackendManager::ModelHasBatchedInputs(const ONNX_NAMESPACE::ModelProto& model_proto) const {
|
||||
bool has_batched_inputs = true;
|
||||
|
||||
for (int i = 0; i < (int)subgraph_context_.input_indexes.size(); i++) {
|
||||
for (int i = 0; i < static_cast<int>(subgraph_context_.input_indexes.size()); i++) {
|
||||
auto& input = model_proto.graph().input(subgraph_context_.input_indexes[i]);
|
||||
|
||||
// Batch-process only raw image inputs (NCHW or NHWC layouts)
|
||||
|
|
@ -215,7 +216,10 @@ BackendManager::ReWriteInputShapeInfo(const ONNX_NAMESPACE::ModelProto& model_pr
|
|||
auto graph_proto = model_copy->mutable_graph();
|
||||
|
||||
for (size_t i = 0, limit = input_shapes.size(); i < limit; i++) {
|
||||
auto g_in_shape = graph_proto->mutable_input((int)i)->mutable_type()->mutable_tensor_type()->mutable_shape();
|
||||
auto g_in_shape = graph_proto->mutable_input(static_cast<int>(i))
|
||||
->mutable_type()
|
||||
->mutable_tensor_type()
|
||||
->mutable_shape();
|
||||
g_in_shape->clear_dim();
|
||||
const auto& shape = input_shapes[i];
|
||||
for (size_t dim = 0, end = shape.size(); dim < end; dim++) {
|
||||
|
|
@ -234,7 +238,11 @@ BackendManager::ReWriteBatchDimWithOne(const ONNX_NAMESPACE::ModelProto& model_p
|
|||
auto graph_proto = model_copy->mutable_graph();
|
||||
|
||||
for (int i = 0; i < graph_proto->input_size(); i++) {
|
||||
ONNX_NAMESPACE::TensorShapeProto* g_in_shape = graph_proto->mutable_input((int)i)->mutable_type()->mutable_tensor_type()->mutable_shape();
|
||||
ONNX_NAMESPACE::TensorShapeProto* g_in_shape =
|
||||
graph_proto->mutable_input(static_cast<int>(i))
|
||||
->mutable_type()
|
||||
->mutable_tensor_type()
|
||||
->mutable_shape();
|
||||
g_in_shape->mutable_dim(0)->clear_dim_value();
|
||||
g_in_shape->mutable_dim(0)->set_dim_value(1);
|
||||
}
|
||||
|
|
|
|||
|
|
@ -3,6 +3,11 @@
|
|||
|
||||
#pragma once
|
||||
|
||||
#include <vector>
|
||||
#include <map>
|
||||
#include <memory>
|
||||
#include <string>
|
||||
|
||||
#include "ov_interface.h"
|
||||
#include "contexts.h"
|
||||
#include "ibackend.h"
|
||||
|
|
@ -13,7 +18,9 @@ namespace openvino_ep {
|
|||
// Singleton class that manages all the backends
|
||||
class BackendManager {
|
||||
public:
|
||||
BackendManager(const onnxruntime::Node& fused_node, const onnxruntime::GraphViewer& subgraph, const logging::Logger& logger);
|
||||
BackendManager(const onnxruntime::Node& fused_node,
|
||||
const onnxruntime::GraphViewer& subgraph,
|
||||
const logging::Logger& logger);
|
||||
void Compute(OrtKernelContext* context);
|
||||
void ShutdownBackendManager();
|
||||
static GlobalContext& GetGlobalContext();
|
||||
|
|
@ -21,7 +28,9 @@ class BackendManager {
|
|||
|
||||
private:
|
||||
std::unique_ptr<ONNX_NAMESPACE::ModelProto> GetModelProtoFromFusedNode(
|
||||
const onnxruntime::Node& fused_node, const onnxruntime::GraphViewer& subgraph, const logging::Logger& logger) const;
|
||||
const onnxruntime::Node& fused_node,
|
||||
const onnxruntime::GraphViewer& subgraph,
|
||||
const logging::Logger& logger) const;
|
||||
bool ModelHasSymbolicInputDims(const onnxruntime::GraphViewer& subgraph) const;
|
||||
bool ModelHasBatchedInputs(const ONNX_NAMESPACE::ModelProto& model_proto) const;
|
||||
|
||||
|
|
|
|||
|
|
@ -1,9 +1,7 @@
|
|||
// Copyright (C) 2019-2022 Intel Corporation
|
||||
// Licensed under the MIT License
|
||||
|
||||
#include <map>
|
||||
#include <string>
|
||||
#include <memory>
|
||||
#include <algorithm>
|
||||
#include <sstream>
|
||||
#include <fstream>
|
||||
|
||||
|
|
@ -58,7 +56,7 @@ CreateOVModel(const ONNX_NAMESPACE::ModelProto& model_proto, const GlobalContext
|
|||
try {
|
||||
auto cnn_network = global_context.ie_core.ReadModel(model);
|
||||
if ((subgraph_context.precision == "FP16") &&
|
||||
(global_context.device_type.find("VPUX") == std::string::npos)) {
|
||||
(global_context.device_type.find("NPU") == std::string::npos)) {
|
||||
// FP16 transformations
|
||||
ov::pass::ConvertFP32ToFP16 pass_obj;
|
||||
pass_obj.run_on_model(cnn_network);
|
||||
|
|
@ -88,7 +86,8 @@ CreateOVModel(const ONNX_NAMESPACE::ModelProto& model_proto, const GlobalContext
|
|||
size_t index = results.size() - 1;
|
||||
|
||||
for (auto it = results.rbegin(); it != results.rend(); ++it) {
|
||||
if (auto const_node = std::dynamic_pointer_cast<ov::op::v0::Constant>((*it)->input_value(0).get_node_shared_ptr())) {
|
||||
if (auto const_node =
|
||||
std::dynamic_pointer_cast<ov::op::v0::Constant>((*it)->input_value(0).get_node_shared_ptr())) {
|
||||
const_outputs_map[(*it)->get_friendly_name()] = const_node;
|
||||
results.erase(results.begin() + index);
|
||||
}
|
||||
|
|
@ -254,7 +253,7 @@ void FillOutputBlob(OVTensorPtr outputBlob, Ort::UnownedValue& output_tensor,
|
|||
|
||||
void printPerformanceCounts(const std::vector<OVProfilingInfo>& performanceMap,
|
||||
std::ostream& stream, std::string deviceName) {
|
||||
long long totalTime = 0;
|
||||
int64_t totalTime = 0;
|
||||
// Print performance counts
|
||||
stream << std::endl
|
||||
<< "performance counts:" << std::endl
|
||||
|
|
|
|||
|
|
@ -4,9 +4,15 @@
|
|||
#pragma once
|
||||
|
||||
#define ORT_API_MANUAL_INIT
|
||||
#include <iomanip>
|
||||
#include <unordered_map>
|
||||
#include <map>
|
||||
#include <memory>
|
||||
#include <vector>
|
||||
#include <string>
|
||||
|
||||
#include "core/session/onnxruntime_cxx_api.h"
|
||||
#include "contexts.h"
|
||||
#include <iomanip>
|
||||
#include "ov_interface.h"
|
||||
#ifdef _WIN32
|
||||
#include <direct.h>
|
||||
|
|
@ -57,7 +63,9 @@ void FillOutputBlob(OVTensorPtr outputBlob, Ort::UnownedValue& output_tensor,
|
|||
size_t batch_slice_idx);
|
||||
|
||||
std::shared_ptr<OVNetwork>
|
||||
CreateOVModel(const ONNX_NAMESPACE::ModelProto& model_proto, const GlobalContext& global_context, const SubGraphContext& subgraph_context,
|
||||
CreateOVModel(const ONNX_NAMESPACE::ModelProto& model_proto,
|
||||
const GlobalContext& global_context,
|
||||
const SubGraphContext& subgraph_context,
|
||||
std::map<std::string, std::shared_ptr<ov::Node>>& const_outputs_map);
|
||||
|
||||
void printPerformanceCounts(const std::vector<OVProfilingInfo>& performanceMap,
|
||||
|
|
|
|||
|
|
@ -16,7 +16,7 @@ BackendFactory::MakeBackend(const ONNX_NAMESPACE::ModelProto& model_proto,
|
|||
const SubGraphContext& subgraph_context) {
|
||||
std::string type = global_context.device_type;
|
||||
if (type == "CPU" || type.find("GPU") != std::string::npos ||
|
||||
type.find("VPUX") != std::string::npos ||
|
||||
type.find("NPU") != std::string::npos ||
|
||||
type.find("HETERO") != std::string::npos ||
|
||||
type.find("MULTI") != std::string::npos ||
|
||||
type.find("AUTO") != std::string::npos) {
|
||||
|
|
|
|||
|
|
@ -6,10 +6,10 @@
|
|||
#include <memory>
|
||||
#include <sstream>
|
||||
#include <fstream>
|
||||
#include <utility>
|
||||
|
||||
#include "core/providers/shared_library/provider_api.h"
|
||||
#include "../backend_utils.h"
|
||||
// #include <ngraph/pass/constant_folding.hpp>
|
||||
#include "basic_backend.h"
|
||||
#include "../backend_manager.h"
|
||||
|
||||
|
|
@ -57,33 +57,39 @@ BasicBackend::BasicBackend(const ONNX_NAMESPACE::ModelProto& model_proto,
|
|||
cl_context ctx = static_cast<cl_context>(global_context_.context);
|
||||
remote_context_ = new ov::intel_gpu::ocl::ClContext(global_context_.ie_core.Get(), ctx);
|
||||
ie_cnn_network_ = CreateOVModel(model_proto, global_context_, subgraph_context_, const_outputs_map_);
|
||||
exe_network_ = global_context_.ie_core.LoadNetwork(ie_cnn_network_, remote_context_, subgraph_context_.subgraph_name);
|
||||
exe_network_ = global_context_.ie_core.LoadNetwork(
|
||||
ie_cnn_network_, remote_context_, subgraph_context_.subgraph_name);
|
||||
LOGS_DEFAULT(INFO) << log_tag << "Loaded model to the plugin";
|
||||
} else {
|
||||
ie_cnn_network_ = CreateOVModel(model_proto, global_context_, subgraph_context_, const_outputs_map_);
|
||||
exe_network_ = global_context_.ie_core.LoadNetwork(ie_cnn_network_, hw_target, device_config, subgraph_context_.subgraph_name);
|
||||
exe_network_ = global_context_.ie_core.LoadNetwork(
|
||||
ie_cnn_network_, hw_target, device_config, subgraph_context_.subgraph_name);
|
||||
LOGS_DEFAULT(INFO) << log_tag << "Loaded model to the plugin";
|
||||
}
|
||||
#else
|
||||
#if defined(OPENVINO_2023_0) || (OPENVINO_2023_1)
|
||||
if (!subgraph_context_.has_dynamic_input_shape && dev_prec != "CPU_FP16") {
|
||||
const std::string model = model_proto.SerializeAsString();
|
||||
exe_network_ = global_context_.ie_core.LoadNetwork(model, hw_target, device_config, subgraph_context_.subgraph_name);
|
||||
exe_network_ = global_context_.ie_core.LoadNetwork(
|
||||
model, hw_target, device_config, subgraph_context_.subgraph_name);
|
||||
LOGS_DEFAULT(INFO) << log_tag << "Loaded model to the plugin";
|
||||
} else {
|
||||
ie_cnn_network_ = CreateOVModel(model_proto, global_context_, subgraph_context_, const_outputs_map_);
|
||||
exe_network_ = global_context_.ie_core.LoadNetwork(ie_cnn_network_, hw_target, device_config, subgraph_context_.subgraph_name);
|
||||
exe_network_ = global_context_.ie_core.LoadNetwork(
|
||||
ie_cnn_network_, hw_target, device_config, subgraph_context_.subgraph_name);
|
||||
LOGS_DEFAULT(INFO) << log_tag << "Loaded model to the plugin";
|
||||
}
|
||||
#else
|
||||
ie_cnn_network_ = CreateOVModel(model_proto, global_context_, subgraph_context_, const_outputs_map_);
|
||||
exe_network_ = global_context_.ie_core.LoadNetwork(ie_cnn_network_, hw_target, device_config, subgraph_context_.subgraph_name);
|
||||
exe_network_ = global_context_.ie_core.LoadNetwork(
|
||||
ie_cnn_network_, hw_target, device_config, subgraph_context_.subgraph_name);
|
||||
LOGS_DEFAULT(INFO) << log_tag << "Loaded model to the plugin";
|
||||
#endif
|
||||
#endif
|
||||
} else {
|
||||
ie_cnn_network_ = CreateOVModel(model_proto, global_context_, subgraph_context_, const_outputs_map_);
|
||||
exe_network_ = global_context_.ie_core.LoadNetwork(ie_cnn_network_, hw_target, device_config, subgraph_context_.subgraph_name);
|
||||
exe_network_ = global_context_.ie_core.LoadNetwork(
|
||||
ie_cnn_network_, hw_target, device_config, subgraph_context_.subgraph_name);
|
||||
LOGS_DEFAULT(INFO) << log_tag << "Loaded model to the plugin";
|
||||
}
|
||||
} catch (const char* msg) {
|
||||
|
|
@ -127,10 +133,10 @@ void BasicBackend::PopulateConfigValue(ov::AnyMap& device_config) {
|
|||
}
|
||||
#endif
|
||||
#if defined(OPENVINO_2023_0) || (OPENVINO_2023_1)
|
||||
if (global_context_.device_type.find("VPUX") != std::string::npos) {
|
||||
if (global_context_.device_type.find("NPU") != std::string::npos) {
|
||||
std::pair<std::string, ov::Any> device_property;
|
||||
device_property = std::make_pair("VPU_COMPILER_TYPE", "MLIR");
|
||||
device_config.emplace(ov::device::properties("VPUX", device_property));
|
||||
device_property = std::make_pair("NPU_COMPILER_TYPE", "DRIVER");
|
||||
device_config.emplace(ov::device::properties("NPU", device_property));
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
|
@ -152,12 +158,12 @@ void BasicBackend::EnableCaching() {
|
|||
}
|
||||
|
||||
void BasicBackend::EnableGPUThrottling(ov::AnyMap& device_config) {
|
||||
if (global_context_.enable_opencl_throttling == true && global_context_.device_type.find("GPU") != std::string::npos) {
|
||||
if (global_context_.enable_opencl_throttling == true &&
|
||||
global_context_.device_type.find("GPU") != std::string::npos) {
|
||||
LOGS_DEFAULT(INFO) << log_tag << "Enabled OpenCL queue throttling for GPU device";
|
||||
std::pair<std::string, ov::Any> device_property;
|
||||
device_property = std::make_pair("PLUGIN_THROTTLE", "1");
|
||||
device_config.emplace(ov::device::properties("GPU_CONFIG_KEY", device_property));
|
||||
// device_config[GPU_CONFIG_KEY(PLUGIN_THROTTLE)] = "1";
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -187,7 +193,9 @@ void BasicBackend::StartAsyncInference(Ort::KernelContext& context, OVInferReque
|
|||
if (input_names.find(onnx_input_name) != input_names.end()) {
|
||||
input_name = onnx_input_name;
|
||||
} else {
|
||||
throw(log_tag + "Input names mismatch between OpenVINO and ONNX. " + onnx_input_name + " doesn't exist in the list of OpenVINO input tensor names");
|
||||
throw(log_tag +
|
||||
"Input names mismatch between OpenVINO and ONNX. " + onnx_input_name +
|
||||
" doesn't exist in the list of OpenVINO input tensor names");
|
||||
}
|
||||
size_t batch_slice_idx = 0;
|
||||
if (subgraph_context_.has_dynamic_input_shape &&
|
||||
|
|
@ -197,6 +205,7 @@ void BasicBackend::StartAsyncInference(Ort::KernelContext& context, OVInferReque
|
|||
auto tensor_info = tensor.GetTensorTypeAndShapeInfo();
|
||||
auto tensor_shape = tensor_info.GetShape();
|
||||
auto tensor_size = tensor_shape.size();
|
||||
const char* tensor_data = tensor.GetTensorData<char>();
|
||||
auto tensor_iter = 0;
|
||||
ov::Shape input_tensor_shape = ov::Shape(tensor_size, 0);
|
||||
for (auto i = tensor_shape.begin(); i != tensor_shape.end(); ++i) {
|
||||
|
|
@ -204,8 +213,16 @@ void BasicBackend::StartAsyncInference(Ort::KernelContext& context, OVInferReque
|
|||
tensor_iter += 1;
|
||||
}
|
||||
auto input = ie_cnn_network_->get_parameters().at(input_idx);
|
||||
OVTensorPtr tensor_ptr = std::make_shared<ov::Tensor>(input->get_element_type(), input_tensor_shape);
|
||||
FillInputBlob(tensor_ptr, batch_slice_idx, input_name, context, subgraph_context_);
|
||||
OVTensorPtr tensor_ptr;
|
||||
// avoid input copies on the CPU device
|
||||
if (global_context_.device_type.find("CPU") != std::string::npos) {
|
||||
tensor_ptr = std::make_shared<ov::Tensor>(input->get_element_type(), input_tensor_shape,
|
||||
(void*)tensor_data);
|
||||
} else {
|
||||
tensor_ptr = std::make_shared<ov::Tensor>(input->get_element_type(), input_tensor_shape);
|
||||
FillInputBlob(tensor_ptr, batch_slice_idx, input_name, context, subgraph_context_);
|
||||
}
|
||||
|
||||
try {
|
||||
infer_request->SetTensor(input_name, tensor_ptr);
|
||||
} catch (const char* msg) {
|
||||
|
|
@ -251,7 +268,10 @@ void BasicBackend::StartRemoteAsyncInference(Ort::KernelContext& context, OVInfe
|
|||
if (input_names.find(onnx_input_name) != input_names.end()) {
|
||||
input_name = onnx_input_name;
|
||||
} else {
|
||||
throw(log_tag + "Input names mismatch between OpenVINO and ONNX. " + onnx_input_name + " doesn't exist in the list of OpenVINO input tensor names");
|
||||
throw(log_tag +
|
||||
"Input names mismatch between OpenVINO and ONNX. " +
|
||||
onnx_input_name +
|
||||
" doesn't exist in the list of OpenVINO input tensor names");
|
||||
}
|
||||
input_idx++;
|
||||
// Kernel Context Input Buffer
|
||||
|
|
@ -264,9 +284,10 @@ void BasicBackend::StartRemoteAsyncInference(Ort::KernelContext& context, OVInfe
|
|||
const cl::Buffer* shared_buffer_const = static_cast<const cl::Buffer*>(tensor_data);
|
||||
// Create an Input Remote Blob
|
||||
auto input = ie_cnn_network_->get_parameters().at(0);
|
||||
auto remote_blob = remote_context_->create_tensor(input->get_element_type(), input->get_shape(), *shared_buffer_const);
|
||||
ov::Tensor tensor = static_cast<ov::Tensor>(remote_blob);
|
||||
OVTensorPtr tensor_ptr = std::make_shared<ov::Tensor>(tensor);
|
||||
auto remote_blob = remote_context_->create_tensor(
|
||||
input->get_element_type(), input->get_shape(), *shared_buffer_const);
|
||||
ov::Tensor tensor_remote = static_cast<ov::Tensor>(remote_blob);
|
||||
OVTensorPtr tensor_ptr = std::make_shared<ov::Tensor>(tensor_remote);
|
||||
infer_request->SetTensor(input_name, tensor_ptr);
|
||||
} else {
|
||||
OVTensorPtr graph_input_blob;
|
||||
|
|
@ -295,7 +316,10 @@ void BasicBackend::StartRemoteAsyncInference(Ort::KernelContext& context, OVInfe
|
|||
}
|
||||
}
|
||||
if (!output_name_found) {
|
||||
throw std::string(log_tag + "Output names mismatch between OpenVINO and ONNX. [ONNX Output: ] " + onnx_output_name + " doesn't exist in the list of OpenVINO output tensor names");
|
||||
throw std::string(
|
||||
log_tag +
|
||||
"Output names mismatch between OpenVINO and ONNX. [ONNX Output: ] " +
|
||||
onnx_output_name + " doesn't exist in the list of OpenVINO output tensor names");
|
||||
}
|
||||
|
||||
size_t batch_size = 1;
|
||||
|
|
@ -307,9 +331,10 @@ void BasicBackend::StartRemoteAsyncInference(Ort::KernelContext& context, OVInfe
|
|||
const cl::Buffer* shared_buffer_const = static_cast<const cl::Buffer*>(tensor_data);
|
||||
// Create a shared Blob, set the Infer Request Output Blob
|
||||
auto output = ie_cnn_network_->get_results().at(0);
|
||||
auto remote_tensor = remote_context_->create_tensor(output->get_element_type(), output->get_shape(), *shared_buffer_const);
|
||||
ov::Tensor tensor = static_cast<ov::Tensor>(remote_tensor);
|
||||
OVTensorPtr tensor_ptr = std::make_shared<ov::Tensor>(tensor);
|
||||
auto remote_tensor =
|
||||
remote_context_->create_tensor(output->get_element_type(), output->get_shape(), *shared_buffer_const);
|
||||
ov::Tensor tensor_t = static_cast<ov::Tensor>(remote_tensor);
|
||||
OVTensorPtr tensor_ptr = std::make_shared<ov::Tensor>(tensor_t);
|
||||
try {
|
||||
infer_request->SetTensor(output_name, tensor_ptr);
|
||||
} catch (const char* msg) {
|
||||
|
|
@ -364,7 +389,8 @@ void BasicBackend::CompleteAsyncInference(Ort::KernelContext& context, OVInferRe
|
|||
throw(msg);
|
||||
}
|
||||
size_t batch_size = 1;
|
||||
auto output_tensor = GetOutputTensor(context, batch_size, infer_request, output_name, subgraph_context_.output_names);
|
||||
auto output_tensor =
|
||||
GetOutputTensor(context, batch_size, infer_request, output_name, subgraph_context_.output_names);
|
||||
auto mem_info = output_tensor.GetTensorMemoryInfo();
|
||||
if (mem_info.GetAllocatorName() == OpenVINO_GPU) {
|
||||
return;
|
||||
|
|
@ -465,7 +491,8 @@ void BasicBackend::Infer(OrtKernelContext* ctx) {
|
|||
#ifndef IO_BUFFER_ENABLED // Printing performance counts is disabled when IO_BUFFER_ENABLED
|
||||
if (openvino_ep::backend_utils::IsDebugEnabled()) {
|
||||
inferRequestsQueue_->printstatus(); // Printing the elements of infer_requests_ vector pool only in debug mode
|
||||
std::string& hw_target = (global_context_.device_id != "") ? global_context_.device_id : global_context_.device_type;
|
||||
std::string& hw_target =
|
||||
(global_context_.device_id != "") ? global_context_.device_id : global_context_.device_type;
|
||||
printPerformanceCounts(infer_request, std::cout, hw_target);
|
||||
}
|
||||
#endif
|
||||
|
|
|
|||
|
|
@ -6,16 +6,17 @@
|
|||
#include <memory>
|
||||
|
||||
#define ORT_API_MANUAL_INIT
|
||||
#include "core/session/onnxruntime_cxx_api.h"
|
||||
#include "core/providers/openvino/contexts.h"
|
||||
#include "core/providers/openvino/ibackend.h"
|
||||
#include "core/providers/openvino/ov_interface.h"
|
||||
#include <vector>
|
||||
#include <iostream>
|
||||
#include <string>
|
||||
#include <condition_variable>
|
||||
#include <mutex>
|
||||
|
||||
#include "core/session/onnxruntime_cxx_api.h"
|
||||
#include "core/providers/openvino/contexts.h"
|
||||
#include "core/providers/openvino/ibackend.h"
|
||||
#include "core/providers/openvino/ov_interface.h"
|
||||
|
||||
namespace onnxruntime {
|
||||
namespace openvino_ep {
|
||||
|
||||
|
|
@ -29,7 +30,7 @@ class BasicBackend : public IBackend {
|
|||
void Infer(OrtKernelContext* context) override;
|
||||
|
||||
private:
|
||||
bool ImportBlob(std::string hw_target, bool vpu_status);
|
||||
bool ImportBlob(std::string hw_target, bool npu_status);
|
||||
void PopulateCompiledDirectory(std::string, std::string&, std::string&, bool&);
|
||||
bool ValidateSubgraph(std::map<std::string, std::shared_ptr<ov::Node>>& const_outputs_map);
|
||||
void PopulateConfigValue(ov::AnyMap& device_config);
|
||||
|
|
|
|||
|
|
@ -3,6 +3,9 @@
|
|||
|
||||
#pragma once
|
||||
|
||||
#include <vector>
|
||||
#include <unordered_map>
|
||||
#include <string>
|
||||
#include "ov_interface.h"
|
||||
|
||||
namespace onnxruntime {
|
||||
|
|
@ -12,7 +15,7 @@ namespace openvino_ep {
|
|||
struct GlobalContext {
|
||||
OVCore ie_core;
|
||||
bool is_wholly_supported_graph = false;
|
||||
bool enable_vpu_fast_compile = false;
|
||||
bool enable_npu_fast_compile = false;
|
||||
bool enable_opencl_throttling = false;
|
||||
bool enable_dynamic_shapes = false;
|
||||
size_t num_of_threads;
|
||||
|
|
@ -34,7 +37,7 @@ struct GlobalContext {
|
|||
struct SubGraphContext {
|
||||
bool has_dynamic_input_shape = false;
|
||||
bool enable_batching = false;
|
||||
bool set_vpu_config = false;
|
||||
bool set_npu_config = false;
|
||||
bool is_constant = false;
|
||||
void* context = 0;
|
||||
std::string subgraph_name;
|
||||
|
|
|
|||
|
|
@ -17,17 +17,18 @@ OpenVINOExecutionProvider::OpenVINOExecutionProvider(const OpenVINOExecutionProv
|
|||
|
||||
openvino_ep::BackendManager::GetGlobalContext().device_type = info.device_type_;
|
||||
openvino_ep::BackendManager::GetGlobalContext().precision_str = info.precision_;
|
||||
openvino_ep::BackendManager::GetGlobalContext().enable_vpu_fast_compile = info.enable_vpu_fast_compile_;
|
||||
openvino_ep::BackendManager::GetGlobalContext().enable_npu_fast_compile = info.enable_npu_fast_compile_;
|
||||
openvino_ep::BackendManager::GetGlobalContext().cache_dir = info.cache_dir_;
|
||||
openvino_ep::BackendManager::GetGlobalContext().num_streams = info.num_streams_;
|
||||
openvino_ep::BackendManager::GetGlobalContext().context = info.context_;
|
||||
openvino_ep::BackendManager::GetGlobalContext().enable_opencl_throttling = info.enable_opencl_throttling_;
|
||||
openvino_ep::BackendManager::GetGlobalContext().enable_dynamic_shapes = info.enable_dynamic_shapes_;
|
||||
|
||||
if ((int)info.num_of_threads_ <= 0) {
|
||||
if (static_cast<int>(info.num_of_threads_) <= 0) {
|
||||
openvino_ep::BackendManager::GetGlobalContext().num_of_threads = 8;
|
||||
} else if ((int)info.num_of_threads_ > 8) {
|
||||
std::string err_msg = std::string("\n [ERROR] num_of_threads configured during runtime is: ") + std::to_string(info.num_of_threads_) + "\nnum_of_threads configured should be >0 and <=8.\n";
|
||||
} else if (static_cast<int>(info.num_of_threads_) > 8) {
|
||||
std::string err_msg = std::string("\n [ERROR] num_of_threads configured during runtime is: ") +
|
||||
std::to_string(info.num_of_threads_) + "\nnum_of_threads configured should be >0 and <=8.\n";
|
||||
ORT_THROW(err_msg);
|
||||
} else {
|
||||
openvino_ep::BackendManager::GetGlobalContext().num_of_threads = info.num_of_threads_;
|
||||
|
|
@ -56,7 +57,8 @@ OpenVINOExecutionProvider::OpenVINOExecutionProvider(const OpenVINOExecutionProv
|
|||
device_found = true;
|
||||
break;
|
||||
}
|
||||
if (info.device_type_.find("VPUX") != std::string::npos && (info.precision_ == "FP16" || info.precision_ == "U8")) {
|
||||
if ((info.device_type_.find("NPU") != std::string::npos) &&
|
||||
(info.precision_ == "FP16" || info.precision_ == "U8")) {
|
||||
device_found = true;
|
||||
break;
|
||||
}
|
||||
|
|
@ -109,11 +111,14 @@ OpenVINOExecutionProvider::GetCapability(const GraphViewer& graph_viewer,
|
|||
openvino_ep::BackendManager::GetGlobalContext().onnx_model_name = graph_viewer.Name();
|
||||
#ifdef _WIN32
|
||||
std::wstring onnx_path = graph_viewer.ModelPath().ToPathString();
|
||||
openvino_ep::BackendManager::GetGlobalContext().onnx_model_path_name = std::string(onnx_path.begin(), onnx_path.end());
|
||||
openvino_ep::BackendManager::GetGlobalContext().onnx_model_path_name =
|
||||
std::string(onnx_path.begin(), onnx_path.end());
|
||||
#else
|
||||
openvino_ep::BackendManager::GetGlobalContext().onnx_model_path_name = graph_viewer.ModelPath().ToPathString();
|
||||
openvino_ep::BackendManager::GetGlobalContext().onnx_model_path_name =
|
||||
graph_viewer.ModelPath().ToPathString();
|
||||
#endif
|
||||
openvino_ep::BackendManager::GetGlobalContext().onnx_opset_version = graph_viewer.DomainToVersionMap().at(kOnnxDomain);
|
||||
openvino_ep::BackendManager::GetGlobalContext().onnx_opset_version =
|
||||
graph_viewer.DomainToVersionMap().at(kOnnxDomain);
|
||||
|
||||
#if defined(OPENVINO_2022_1)
|
||||
openvino_ep::GetCapability obj(graph_viewer,
|
||||
|
|
@ -151,7 +156,8 @@ common::Status OpenVINOExecutionProvider::Compile(
|
|||
|
||||
openvino_ep::BackendManager::GetGlobalContext().use_api_2 = true;
|
||||
|
||||
std::shared_ptr<openvino_ep::BackendManager> backend_manager = std::make_shared<openvino_ep::BackendManager>(fused_node, graph_body_viewer, *GetLogger());
|
||||
std::shared_ptr<openvino_ep::BackendManager> backend_manager =
|
||||
std::make_shared<openvino_ep::BackendManager>(fused_node, graph_body_viewer, *GetLogger());
|
||||
|
||||
compute_info.create_state_func =
|
||||
[backend_manager](ComputeContext* context, FunctionState* state) {
|
||||
|
|
|
|||
|
|
@ -3,19 +3,28 @@
|
|||
|
||||
#pragma once
|
||||
|
||||
#include "backend_manager.h"
|
||||
#include <map>
|
||||
#include <algorithm>
|
||||
#include <iostream>
|
||||
#include <string>
|
||||
#include <memory>
|
||||
#include <vector>
|
||||
|
||||
#include "backend_manager.h"
|
||||
|
||||
namespace onnxruntime {
|
||||
|
||||
static void print_build_options() {
|
||||
std::cout << "[ERROR] INVALID DEVICE BUILD TYPE SPECIFIED" << std::endl;
|
||||
std::cout << "Specify the keyword HETERO (or) MULTI (or) AUTO followed by the devices in the order of priority you want to build" << std::endl;
|
||||
std::cout << "The different hardware devices that can be added with HETERO/MULTI/AUTO build ";
|
||||
std::cout << "are ['CPU','GPU','VPUX']" << std::endl;
|
||||
std::cout << "An example of how to specify the HETERO or MULTI or AUTO build type. Ex: HETERO:GPU,CPU Ex: MULTI:GPU,CPU Ex: AUTO:GPU,CPU" << std::endl;
|
||||
std::cout << "Specify the keyword HETERO (or) MULTI (or) AUTO followed by the devices in the order of priority "
|
||||
<< "you want to build"
|
||||
<< std::endl;
|
||||
std::cout << "The different hardware devices that can be added with HETERO/MULTI/AUTO build "
|
||||
<< "are ['CPU','GPU']"
|
||||
<< std::endl;
|
||||
std::cout << "An example of how to specify the HETERO or MULTI or AUTO build type. "
|
||||
<< "Ex: HETERO:GPU,CPU Ex: MULTI:GPU,CPU Ex: AUTO:GPU,CPU"
|
||||
<< std::endl;
|
||||
}
|
||||
|
||||
static std::vector<std::string> split(const std::string& s, char delim) {
|
||||
|
|
@ -39,7 +48,7 @@ static std::vector<std::string> parseDevices(const std::string& device_string) {
|
|||
print_build_options();
|
||||
ORT_THROW("Invalid device string: " + device_string);
|
||||
}
|
||||
std::vector<std::string> dev_options = {"CPU", "GPU", "VPUX"};
|
||||
std::vector<std::string> dev_options = {"CPU", "GPU"};
|
||||
for (std::string dev : devices) {
|
||||
if (!std::count(dev_options.begin(), dev_options.end(), dev)) {
|
||||
print_build_options();
|
||||
|
|
@ -53,7 +62,7 @@ static std::vector<std::string> parseDevices(const std::string& device_string) {
|
|||
struct OpenVINOExecutionProviderInfo {
|
||||
std::string device_type_;
|
||||
std::string precision_;
|
||||
bool enable_vpu_fast_compile_;
|
||||
bool enable_npu_fast_compile_;
|
||||
std::string device_id_;
|
||||
size_t num_of_threads_;
|
||||
std::string cache_dir_;
|
||||
|
|
@ -62,11 +71,18 @@ struct OpenVINOExecutionProviderInfo {
|
|||
bool enable_opencl_throttling_;
|
||||
bool enable_dynamic_shapes_;
|
||||
|
||||
explicit OpenVINOExecutionProviderInfo(std::string dev_type, bool enable_vpu_fast_compile, std::string dev_id,
|
||||
explicit OpenVINOExecutionProviderInfo(std::string dev_type, bool enable_npu_fast_compile, std::string dev_id,
|
||||
size_t num_of_threads, std::string cache_dir, int num_streams,
|
||||
void* context, bool enable_opencl_throttling,
|
||||
bool enable_dynamic_shapes)
|
||||
: enable_vpu_fast_compile_(enable_vpu_fast_compile), device_id_(dev_id), num_of_threads_(num_of_threads), cache_dir_(cache_dir), num_streams_(num_streams), context_(context), enable_opencl_throttling_(enable_opencl_throttling), enable_dynamic_shapes_(enable_dynamic_shapes) {
|
||||
: enable_npu_fast_compile_(enable_npu_fast_compile),
|
||||
device_id_(dev_id),
|
||||
num_of_threads_(num_of_threads),
|
||||
cache_dir_(cache_dir),
|
||||
num_streams_(num_streams),
|
||||
context_(context),
|
||||
enable_opencl_throttling_(enable_opencl_throttling),
|
||||
enable_dynamic_shapes_(enable_dynamic_shapes) {
|
||||
if (dev_type == "") {
|
||||
LOGS_DEFAULT(INFO) << "[OpenVINO-EP]"
|
||||
<< "No runtime device selection option provided.";
|
||||
|
|
@ -82,11 +98,11 @@ struct OpenVINOExecutionProviderInfo {
|
|||
#elif defined OPENVINO_CONFIG_GPU_FP16
|
||||
device_type_ = "GPU";
|
||||
precision_ = "FP16";
|
||||
#elif defined OPENVINO_CONFIG_VPUX_FP16
|
||||
device_type_ = "VPUX";
|
||||
#elif defined OPENVINO_CONFIG_NPU_FP16
|
||||
device_type_ = "NPU";
|
||||
precision_ = "FP16";
|
||||
#elif defined OPENVINO_CONFIG_VPUX_U8
|
||||
device_type_ = "VPUX";
|
||||
#elif defined OPENVINO_CONFIG_NPU_U8
|
||||
device_type_ = "NPU";
|
||||
precision_ = "U8";
|
||||
#elif defined OPENVINO_CONFIG_HETERO || defined OPENVINO_CONFIG_MULTI || defined OPENVINO_CONFIG_AUTO
|
||||
#ifdef DEVICE_NAME
|
||||
|
|
@ -126,11 +142,11 @@ struct OpenVINOExecutionProviderInfo {
|
|||
} else if (dev_type == "GPU.1_FP16") {
|
||||
device_type_ = "GPU.1";
|
||||
precision_ = "FP16";
|
||||
} else if (dev_type == "VPUX_FP16") {
|
||||
device_type_ = "VPUX";
|
||||
} else if (dev_type == "NPU_FP16") {
|
||||
device_type_ = "NPU";
|
||||
precision_ = "FP16";
|
||||
} else if (dev_type == "VPUX_U8") {
|
||||
device_type_ = "VPUX";
|
||||
} else if (dev_type == "NPU_U8") {
|
||||
device_type_ = "NPU";
|
||||
precision_ = "U8";
|
||||
} else if (dev_type.find("HETERO") == 0 || dev_type.find("MULTI") == 0) {
|
||||
std::vector<std::string> devices = parseDevices(dev_type);
|
||||
|
|
|
|||
|
|
@ -8,11 +8,16 @@
|
|||
|
||||
namespace onnxruntime {
|
||||
struct OpenVINOProviderFactory : IExecutionProviderFactory {
|
||||
OpenVINOProviderFactory(const char* device_type, bool enable_vpu_fast_compile,
|
||||
OpenVINOProviderFactory(const char* device_type, bool enable_npu_fast_compile,
|
||||
const char* device_id, size_t num_of_threads,
|
||||
const char* cache_dir, int num_streams, void* context,
|
||||
bool enable_opencl_throttling, bool enable_dynamic_shapes)
|
||||
: enable_vpu_fast_compile_(enable_vpu_fast_compile), num_of_threads_(num_of_threads), num_streams_(num_streams), context_(context), enable_opencl_throttling_(enable_opencl_throttling), enable_dynamic_shapes_(enable_dynamic_shapes) {
|
||||
: enable_npu_fast_compile_(enable_npu_fast_compile),
|
||||
num_of_threads_(num_of_threads),
|
||||
num_streams_(num_streams),
|
||||
context_(context),
|
||||
enable_opencl_throttling_(enable_opencl_throttling),
|
||||
enable_dynamic_shapes_(enable_dynamic_shapes) {
|
||||
device_type_ = (device_type == nullptr) ? "" : device_type;
|
||||
device_id_ = (device_id == nullptr) ? "" : device_id;
|
||||
cache_dir_ = (cache_dir == nullptr) ? "" : cache_dir;
|
||||
|
|
@ -24,7 +29,7 @@ struct OpenVINOProviderFactory : IExecutionProviderFactory {
|
|||
|
||||
private:
|
||||
std::string device_type_;
|
||||
bool enable_vpu_fast_compile_;
|
||||
bool enable_npu_fast_compile_;
|
||||
std::string device_id_;
|
||||
size_t num_of_threads_;
|
||||
std::string cache_dir_;
|
||||
|
|
@ -35,7 +40,7 @@ struct OpenVINOProviderFactory : IExecutionProviderFactory {
|
|||
};
|
||||
|
||||
std::unique_ptr<IExecutionProvider> OpenVINOProviderFactory::CreateProvider() {
|
||||
OpenVINOExecutionProviderInfo info(device_type_, enable_vpu_fast_compile_, device_id_, num_of_threads_,
|
||||
OpenVINOExecutionProviderInfo info(device_type_, enable_npu_fast_compile_, device_id_, num_of_threads_,
|
||||
cache_dir_, num_streams_, context_, enable_opencl_throttling_,
|
||||
enable_dynamic_shapes_);
|
||||
return std::make_unique<OpenVINOExecutionProvider>(info);
|
||||
|
|
@ -59,17 +64,18 @@ struct OpenVINO_Provider : Provider {
|
|||
|
||||
std::string device_type = ""; // [device_type]: Overrides the accelerator hardware type and precision
|
||||
// with these values at runtime.
|
||||
bool enable_vpu_fast_compile = false; // [enable_vpu_fast_compile]: Fast-compile may be optionally enabled to
|
||||
// speeds up the model's compilation to VPU device specific format.
|
||||
bool enable_npu_fast_compile = false; // [enable_npu_fast_compile]: Fast-compile may be optionally enabled to
|
||||
// speeds up the model's compilation to NPU device specific format.
|
||||
const char* device_id = ""; // [device_id]: Selects a particular hardware device for inference.
|
||||
size_t num_of_threads = 8; // [num_of_threads]: Overrides the accelerator default value of number of
|
||||
int num_of_threads = 8; // [num_of_threads]: Overrides the accelerator default value of number of
|
||||
// threads with this value at runtime.
|
||||
const char* cache_dir = ""; // [cache_dir]: specify the path to
|
||||
// dump and load the blobs for the model caching/kernel caching (GPU)
|
||||
// feature. If blob files are already present, it will be directly loaded.
|
||||
int num_streams = 1; // [num_streams]: Option that specifies the number of parallel inference
|
||||
// requests to be processed on a given `device_type`. Overrides the
|
||||
// accelerator default value of number of streams with this value at runtime.
|
||||
// accelerator default value of number of streams
|
||||
// with this value at runtime.
|
||||
bool enable_opencl_throttling = false; // [enable_opencl_throttling]: Enables OpenCL queue throttling for GPU
|
||||
// device (Reduces CPU Utilization when using GPU)
|
||||
bool enable_dynamic_shapes = false; // [enable_dynamic_shapes]: Enables Dynamic Shapes feature for CPU device)
|
||||
|
|
@ -80,14 +86,15 @@ struct OpenVINO_Provider : Provider {
|
|||
|
||||
std::set<std::string> ov_supported_device_types = {"CPU_FP32", "CPU_FP16", "GPU_FP32",
|
||||
"GPU.0_FP32", "GPU.1_FP32", "GPU_FP16",
|
||||
"GPU.0_FP16", "GPU.1_FP16",
|
||||
"VPUX_FP16", "VPUX_U8"};
|
||||
"GPU.0_FP16", "GPU.1_FP16"};
|
||||
if (!((ov_supported_device_types.find(device_type) != ov_supported_device_types.end()) ||
|
||||
(device_type.find("HETERO:") == 0) || (device_type.find("MULTI:") == 0) || (device_type.find("AUTO:") == 0))) {
|
||||
(device_type.find("HETERO:") == 0) ||
|
||||
(device_type.find("MULTI:") == 0) ||
|
||||
(device_type.find("AUTO:") == 0))) {
|
||||
ORT_THROW(
|
||||
"[ERROR] [OpenVINO] You have selcted wrong configuration value for the key 'device_type'. "
|
||||
"Select from 'CPU_FP32', 'CPU_FP16', 'GPU_FP32', 'GPU.0_FP32', 'GPU.1_FP32', 'GPU_FP16', "
|
||||
"'GPU.0_FP16', 'GPU.1_FP16', 'VPUX_FP16', 'VPUX_U8' or from"
|
||||
"'GPU.0_FP16', 'GPU.1_FP16' or from"
|
||||
" HETERO/MULTI/AUTO options available. \n");
|
||||
}
|
||||
}
|
||||
|
|
@ -97,30 +104,37 @@ struct OpenVINO_Provider : Provider {
|
|||
if (provider_options_map.find("cache_dir") != provider_options_map.end()) {
|
||||
cache_dir = provider_options_map.at("cache_dir").c_str();
|
||||
}
|
||||
|
||||
if (provider_options_map.find("context") != provider_options_map.end()) {
|
||||
context = (void*)provider_options_map.at("context").c_str();
|
||||
std::string str = provider_options_map.at("context");
|
||||
uint64_t number = std::strtoull(str.c_str(), nullptr, 16);
|
||||
context = reinterpret_cast<void*>(number);
|
||||
}
|
||||
|
||||
if (provider_options_map.find("num_of_threads") != provider_options_map.end()) {
|
||||
num_of_threads = std::stoi(provider_options_map.at("num_of_threads"));
|
||||
if (num_of_threads <= 0) {
|
||||
num_of_threads = 1;
|
||||
LOGS_DEFAULT(WARNING) << "[OpenVINO-EP] The value for the key 'num_threads' should be in the positive range.\n "
|
||||
<< "Executing with num_threads=1";
|
||||
}
|
||||
}
|
||||
|
||||
if (provider_options_map.find("num_streams") != provider_options_map.end()) {
|
||||
num_streams = std::stoi(provider_options_map.at("num_streams"));
|
||||
if (num_streams <= 0 && num_streams > 8) {
|
||||
ORT_THROW("[ERROR] [OpenVINO] The value for the key 'num_streams' should be in the range of 1-8 \n");
|
||||
if (num_streams <= 0) {
|
||||
num_streams = 1;
|
||||
LOGS_DEFAULT(WARNING) << "[OpenVINO-EP] The value for the key 'num_streams' should be in the range of 1-8.\n "
|
||||
<< "Executing with num_streams=1";
|
||||
}
|
||||
}
|
||||
std::string bool_flag = "";
|
||||
if (provider_options_map.find("enable_vpu_fast_compile") != provider_options_map.end()) {
|
||||
bool_flag = provider_options_map.at("enable_vpu_fast_compile");
|
||||
if (provider_options_map.find("enable_npu_fast_compile") != provider_options_map.end()) {
|
||||
bool_flag = provider_options_map.at("enable_npu_fast_compile");
|
||||
if (bool_flag == "true" || bool_flag == "True")
|
||||
enable_vpu_fast_compile = true;
|
||||
enable_npu_fast_compile = true;
|
||||
else if (bool_flag == "false" || bool_flag == "False")
|
||||
enable_vpu_fast_compile = false;
|
||||
enable_npu_fast_compile = false;
|
||||
bool_flag = "";
|
||||
}
|
||||
|
||||
|
|
@ -141,7 +155,7 @@ struct OpenVINO_Provider : Provider {
|
|||
enable_dynamic_shapes = false;
|
||||
}
|
||||
return std::make_shared<OpenVINOProviderFactory>(const_cast<char*>(device_type.c_str()),
|
||||
enable_vpu_fast_compile,
|
||||
enable_npu_fast_compile,
|
||||
device_id,
|
||||
num_of_threads,
|
||||
cache_dir,
|
||||
|
|
@ -157,7 +171,6 @@ struct OpenVINO_Provider : Provider {
|
|||
void Shutdown() override {
|
||||
openvino_ep::BackendManager::ReleaseGlobalContext();
|
||||
}
|
||||
|
||||
} g_provider;
|
||||
|
||||
} // namespace onnxruntime
|
||||
|
|
|
|||
|
|
@ -29,7 +29,10 @@ std::shared_ptr<OVNetwork> OVCore::ReadModel(const std::string& model) const {
|
|||
}
|
||||
}
|
||||
|
||||
OVExeNetwork OVCore::LoadNetwork(std::shared_ptr<OVNetwork>& ie_cnn_network, std::string& hw_target, ov::AnyMap& device_config, std::string name) {
|
||||
OVExeNetwork OVCore::LoadNetwork(std::shared_ptr<OVNetwork>& ie_cnn_network,
|
||||
std::string& hw_target,
|
||||
ov::AnyMap& device_config,
|
||||
std::string name) {
|
||||
ov::CompiledModel obj;
|
||||
try {
|
||||
obj = oe.compile_model(ie_cnn_network, hw_target, device_config);
|
||||
|
|
@ -43,7 +46,10 @@ OVExeNetwork OVCore::LoadNetwork(std::shared_ptr<OVNetwork>& ie_cnn_network, std
|
|||
}
|
||||
|
||||
#if defined(OPENVINO_2023_0) || (OPENVINO_2023_1)
|
||||
OVExeNetwork OVCore::LoadNetwork(const std::string& model, std::string& hw_target, ov::AnyMap& device_config, std::string name) {
|
||||
OVExeNetwork OVCore::LoadNetwork(const std::string& model,
|
||||
std::string& hw_target,
|
||||
ov::AnyMap& device_config,
|
||||
std::string name) {
|
||||
ov::CompiledModel obj;
|
||||
try {
|
||||
obj = oe.compile_model(model, ov::Tensor(), hw_target, device_config);
|
||||
|
|
|
|||
|
|
@ -4,6 +4,7 @@
|
|||
#pragma once
|
||||
|
||||
#include <vector>
|
||||
#include <memory>
|
||||
|
||||
#if defined(OPENVINO_2022_1) || (OPENVINO_2022_2) || (OPENVINO_2022_3) || (OPENVINO_2023_0) || (OPENVINO_2023_1)
|
||||
#define OV_API_20
|
||||
|
|
@ -43,9 +44,15 @@ class OVCore {
|
|||
|
||||
public:
|
||||
std::shared_ptr<OVNetwork> ReadModel(const std::string& model_stream) const;
|
||||
OVExeNetwork LoadNetwork(std::shared_ptr<OVNetwork>& ie_cnn_network, std::string& hw_target, ov::AnyMap& device_config, std::string name);
|
||||
OVExeNetwork LoadNetwork(std::shared_ptr<OVNetwork>& ie_cnn_network,
|
||||
std::string& hw_target,
|
||||
ov::AnyMap& device_config,
|
||||
std::string name);
|
||||
#if defined(OPENVINO_2023_0) || (OPENVINO_2023_1)
|
||||
OVExeNetwork LoadNetwork(const std::string& model_stream, std::string& hw_target, ov::AnyMap& device_config, std::string name);
|
||||
OVExeNetwork LoadNetwork(const std::string& model_stream,
|
||||
std::string& hw_target,
|
||||
ov::AnyMap& device_config,
|
||||
std::string name);
|
||||
#endif
|
||||
void SetCache(std::string cache_dir_path);
|
||||
#ifdef IO_BUFFER_ENABLED
|
||||
|
|
@ -62,7 +69,7 @@ class OVExeNetwork {
|
|||
ov::CompiledModel obj;
|
||||
|
||||
public:
|
||||
OVExeNetwork(ov::CompiledModel md) { obj = md; }
|
||||
explicit OVExeNetwork(ov::CompiledModel md) { obj = md; }
|
||||
OVExeNetwork() { obj = ov::CompiledModel(); }
|
||||
ov::CompiledModel& Get() { return obj; }
|
||||
OVInferRequest CreateInferRequest();
|
||||
|
|
|
|||
|
|
@ -3,6 +3,8 @@
|
|||
|
||||
#pragma once
|
||||
#include <vector>
|
||||
#include <string>
|
||||
#include <memory>
|
||||
#include "data_ops.h"
|
||||
|
||||
namespace onnxruntime {
|
||||
|
|
|
|||
|
|
@ -24,7 +24,8 @@ namespace openvino_ep {
|
|||
|
||||
// Constructor
|
||||
GetCapability::GetCapability(const GraphViewer& graph_viewer_param, std::string device_type_param,
|
||||
const std::string version_param) : graph_viewer_(graph_viewer_param), device_type_(device_type_param) {
|
||||
const std::string version_param)
|
||||
: graph_viewer_(graph_viewer_param), device_type_(device_type_param) {
|
||||
if (version_param == "V_2022_1") {
|
||||
data_ops_ = new DataOps(graph_viewer_, V_2022_1, device_type_);
|
||||
} else if (version_param == "V_2022_2") {
|
||||
|
|
@ -114,11 +115,11 @@ std::vector<std::unique_ptr<ComputeCapability>> GetCapability::Execute() {
|
|||
}
|
||||
openvino_ep::BackendManager::GetGlobalContext().is_wholly_supported_graph = true;
|
||||
|
||||
} else { // unsupported_nodes_idx.empty()
|
||||
|
||||
} else { // unsupported_nodes_idx.empty()
|
||||
#if defined(OPENVINO_DISABLE_GRAPH_PARTITION) // disables graph partition at build time
|
||||
LOGS_DEFAULT(INFO) << "[OpenVINO-EP] DISABLE_GRAPH_PARTITION option is set";
|
||||
LOGS_DEFAULT(INFO) << "[OpenVINO-EP] Model is not fully supported by OpenVINO, so making the full model fall back to default CPU Execution Provider";
|
||||
LOGS_DEFAULT(INFO) << "[OpenVINO-EP] Model is not fully supported by OpenVINO, "
|
||||
<< "so making the full model fall back to default CPU Execution Provider";
|
||||
return result;
|
||||
#endif
|
||||
|
||||
|
|
@ -159,7 +160,13 @@ std::vector<std::unique_ptr<ComputeCapability>> GetCapability::Execute() {
|
|||
|
||||
std::vector<std::string> cluster_graph_inputs, cluster_inputs, const_inputs, cluster_outputs;
|
||||
|
||||
GetInputsOutputsOfCluster(graph_viewer_, this_cluster, ng_required_initializers, cluster_graph_inputs, cluster_inputs, const_inputs, cluster_outputs);
|
||||
GetInputsOutputsOfCluster(graph_viewer_,
|
||||
this_cluster,
|
||||
ng_required_initializers,
|
||||
cluster_graph_inputs,
|
||||
cluster_inputs,
|
||||
const_inputs,
|
||||
cluster_outputs);
|
||||
|
||||
bool omit_subgraph = false;
|
||||
// Omitting zero dim subgraphs
|
||||
|
|
|
|||
|
|
@ -2,11 +2,15 @@
|
|||
// Licensed under the MIT License
|
||||
|
||||
#include <unordered_set>
|
||||
#include <string>
|
||||
#include <vector>
|
||||
#include <utility>
|
||||
#include <map>
|
||||
#include <set>
|
||||
|
||||
#include "core/providers/shared_library/provider_api.h"
|
||||
#include "../backend_utils.h"
|
||||
#include "../backend_manager.h"
|
||||
#include <string>
|
||||
#include <vector>
|
||||
#include "data_ops.h"
|
||||
#include "capabilities.h"
|
||||
#include "utils.h"
|
||||
|
|
@ -72,269 +76,355 @@ std::set<std::string> ops_supported_as_function = {
|
|||
|
||||
std::vector<SupportedOp> supported_op_mode = {
|
||||
{"Abs", V_2020_4, {"CPU", "GPU"}},
|
||||
{"Abs", V_2023_0, {"VPUX"}},
|
||||
{"Abs", V_2023_0, {"NPU"}},
|
||||
{"Acos", V_2020_4, {"CPU"}},
|
||||
{"Acos", V_2022_1, {"GPU"}},
|
||||
{"Acos", V_2023_1, {"NPU"}},
|
||||
{"Acosh", V_2020_4, {"CPU"}},
|
||||
{"Acosh", V_2022_1, {"GPU"}},
|
||||
{"Acosh", V_2023_1, {"NPU"}},
|
||||
{"Add", V_2020_4, {"CPU", "GPU"}},
|
||||
{"Add", V_2023_0, {"VPUX"}},
|
||||
{"Add", V_2023_0, {"NPU"}},
|
||||
{"And", V_2020_4, {"CPU", "GPU"}},
|
||||
{"And", V_2023_1, {"NPU"}},
|
||||
{"ArgMax", V_2020_4, {"CPU"}},
|
||||
{"ArgMax", V_2021_1, {"GPU"}},
|
||||
{"ArgMin", V_2020_4, {"CPU"}},
|
||||
{"ArgMin", V_2022_1, {"GPU"}},
|
||||
{"Asin", V_2020_4, {"CPU", "GPU"}},
|
||||
{"Asin", V_2023_1, {"NPU"}},
|
||||
{"Asinh", V_2020_4, {"CPU", "GPU"}},
|
||||
{"Asinh", V_2023_1, {"NPU"}},
|
||||
{"Atan", V_2020_4, {"CPU", "GPU"}},
|
||||
{"Atan", V_2023_1, {"NPU"}},
|
||||
{"Atanh", V_2020_4, {"CPU"}},
|
||||
{"Atanh", V_2022_1, {"GPU"}},
|
||||
{"Atanh", V_2023_1, {"NPU"}},
|
||||
{"AveragePool", V_2020_4, {"CPU", "GPU"}},
|
||||
{"AveragePool", V_2023_0, {"VPUX"}},
|
||||
{"AveragePool", V_2023_0, {"NPU"}},
|
||||
{"BatchNormalization", V_2020_4, {"CPU", "GPU"}},
|
||||
{"BatchNormalization", V_2023_0, {"VPUX"}},
|
||||
{"BatchNormalization", V_2023_0, {"NPU"}},
|
||||
{"BitShift", V_2022_1, {"CPU"}},
|
||||
{"BitShift", V_2023_1, {"NPU"}},
|
||||
{"Cast", V_2020_4, {"CPU", "GPU"}},
|
||||
{"Cast", V_2023_0, {"VPUX"}},
|
||||
{"Cast", V_2023_0, {"NPU"}},
|
||||
{"CastLike", V_2023_1, {"CPU", "GPU", "NPU"}},
|
||||
{"Ceil", V_2020_4, {"GPU"}},
|
||||
{"Ceil", V_2021_4, {"CPU"}},
|
||||
{"Ceil", V_2023_1, {"NPU"}},
|
||||
{"Celu", V_2022_1, {"CPU", "GPU"}},
|
||||
{"Clip", V_2020_4, {"CPU", "GPU"}},
|
||||
{"Clip", V_2023_0, {"VPUX"}},
|
||||
{"Clip", V_2023_0, {"NPU"}},
|
||||
{"Compress", V_2023_1, {"CPU", "GPU"}},
|
||||
{"Concat", V_2020_4, {"CPU", "GPU"}},
|
||||
{"Concat", V_2023_0, {"VPUX"}},
|
||||
{"Concat", V_2023_0, {"NPU"}},
|
||||
{"Constant", V_2020_4, {"CPU", "GPU"}},
|
||||
{"Constant", V_2023_0, {"VPUX"}},
|
||||
{"Constant", V_2023_0, {"NPU"}},
|
||||
{"ConstantOfShape", V_2020_4, {"CPU", "GPU"}},
|
||||
{"ConstantOfShape", V_2023_0, {"VPUX"}}, // Gets mapped to broadcast op in the plugin.
|
||||
{"ConstantOfShape", V_2023_0, {"NPU"}}, // Gets mapped to broadcast op in the plugin.
|
||||
{"Conv", V_2020_4, {"CPU", "GPU"}},
|
||||
{"Conv", V_2023_0, {"VPUX"}},
|
||||
{"Conv", V_2023_0, {"NPU"}},
|
||||
{"ConvInteger", V_2022_1, {"CPU", "GPU"}},
|
||||
{"ConvInteger", V_2023_1, {"NPU"}},
|
||||
{"ConvTranspose", V_2020_4, {"CPU", "GPU"}},
|
||||
{"ConvTranspose", V_2023_1, {"NPU"}},
|
||||
{"Cos", V_2020_4, {"CPU"}},
|
||||
{"Cos", V_2022_1, {"GPU"}},
|
||||
{"Cos", V_2023_0, {"VPUX"}},
|
||||
{"Cos", V_2023_0, {"NPU"}},
|
||||
{"Cosh", V_2020_4, {"CPU"}},
|
||||
{"Cosh", V_2022_1, {"GPU"}},
|
||||
{"Cosh", V_2023_1, {"NPU"}},
|
||||
{"CumSum", V_2022_1, {"CPU", "GPU"}},
|
||||
{"CumSum", V_2023_0, {"VPUX"}},
|
||||
{"CumSum", V_2023_0, {"NPU"}},
|
||||
{"DepthToSpace", V_2020_4, {"CPU", "GPU"}},
|
||||
{"DepthToSpace", V_2023_0, {"VPUX"}},
|
||||
{"DepthToSpace", V_2023_0, {"NPU"}},
|
||||
{"DequantizeLinear", V_2021_4, {"CPU", "GPU"}},
|
||||
{"DequantizeLinear", V_2023_0, {"VPUX"}},
|
||||
{"DequantizeLinear", V_2023_0, {"NPU"}},
|
||||
{"Div", V_2020_4, {"CPU", "GPU"}},
|
||||
{"Div", V_2023_0, {"VPUX"}},
|
||||
{"Div", V_2023_0, {"NPU"}},
|
||||
{"Dropout", V_2020_4, {"CPU", "GPU"}},
|
||||
{"Dropout", V_2023_0, {"VPUX"}},
|
||||
{"Dropout", V_2023_0, {"NPU"}},
|
||||
{"Elu", V_2020_4, {"CPU", "GPU"}},
|
||||
{"Elu", V_2023_0, {"VPUX"}},
|
||||
{"Elu", V_2023_0, {"NPU"}},
|
||||
// {"Einsum", V_2023_0, {"CPU", "GPU"}},
|
||||
{"Equal", V_2020_4, {"CPU", "GPU"}},
|
||||
{"Equal", V_2023_0, {"VPUX"}}, // Added for whisper decoder model.
|
||||
{"Equal", V_2023_0, {"NPU"}}, // Added for whisper decoder model.
|
||||
{"Erf", V_2020_4, {"CPU", "GPU"}},
|
||||
{"Erf", V_2023_0, {"VPUX"}},
|
||||
{"Erf", V_2023_0, {"NPU"}},
|
||||
{"Exp", V_2020_4, {"CPU", "GPU"}},
|
||||
{"Exp", V_2023_0, {"VPUX"}},
|
||||
{"Exp", V_2023_0, {"NPU"}},
|
||||
{"Expand", V_2022_1, {"CPU", "GPU"}},
|
||||
{"Expand", V_2023_0, {"VPUX"}}, // Gets mapped to broadcast op and multiply op in the plugin.
|
||||
{"Expand", V_2023_0, {"NPU"}}, // Gets mapped to broadcast op and multiply op in the plugin.
|
||||
{"EyeLike", V_2022_1, {"CPU"}},
|
||||
{"EyeLike", V_2023_0, {"VPUX"}}, // NoOP
|
||||
{"EyeLike", V_2023_0, {"NPU"}}, // NoOP
|
||||
{"Flatten", V_2020_4, {"CPU", "GPU"}},
|
||||
{"Flatten", V_2023_0, {"VPUX"}},
|
||||
{"Flatten", V_2023_0, {"NPU"}},
|
||||
{"Floor", V_2020_4, {"CPU", "GPU"}},
|
||||
{"Floor", V_2023_1, {"NPU"}},
|
||||
{"Gather", V_2020_4, {"CPU", "GPU"}},
|
||||
{"Gather", V_2023_0, {"VPUX"}},
|
||||
{"Gather", V_2023_0, {"NPU"}},
|
||||
{"GatherElements", V_2022_2, {"CPU", "GPU"}},
|
||||
{"GatherElements", V_2023_1, {"NPU"}},
|
||||
{"GatherND", V_2021_4, {"CPU", "GPU"}},
|
||||
{"GatherND", V_2023_1, {"NPU"}},
|
||||
{"Gemm", V_2020_4, {"CPU", "GPU"}},
|
||||
{"Gemm", V_2023_0, {"VPUX"}},
|
||||
{"Gemm", V_2023_0, {"NPU"}},
|
||||
{"GlobalAveragePool", V_2020_4, {"CPU", "GPU"}},
|
||||
{"GlobalAveragePool", V_2023_0, {"VPUX"}},
|
||||
{"GlobalAveragePool", V_2023_0, {"NPU"}},
|
||||
{"GlobalLpPool", V_2020_4, {"CPU", "GPU"}},
|
||||
{"GlobalLpPool", V_2023_1, {"NPU"}},
|
||||
{"GlobalMaxPool", V_2022_1, {"CPU", "GPU"}},
|
||||
{"GlobalMaxPool", V_2023_1, {"NPU"}},
|
||||
{"Greater", V_2020_4, {"CPU", "GPU"}},
|
||||
{"Greater", V_2023_0, {"VPUX"}},
|
||||
{"Greater", V_2023_0, {"NPU"}},
|
||||
{"GreaterOrEqual", V_2022_1, {"CPU", "GPU"}},
|
||||
{"GreaterOrEqual", V_2023_0, {"VPUX"}},
|
||||
{"GreaterOrEqual", V_2023_0, {"NPU"}},
|
||||
{"GridSample", V_2022_3, {"CPU"}},
|
||||
{"GridSample", V_2023_0, {"GPU"}},
|
||||
{"GridSample", V_2023_1, {"NPU"}},
|
||||
{"HardMax", V_2023_1, {"CPU", "GPU", "NPU"}},
|
||||
{"Identity", V_2020_4, {"CPU", "GPU"}},
|
||||
{"Identity", V_2023_0, {"VPUX"}}, // NoOP
|
||||
{"Identity", V_2023_0, {"NPU"}}, // NoOP
|
||||
{"If", V_2022_3, {"CPU", "GPU"}},
|
||||
{"If", V_2023_1, {"NPU"}},
|
||||
{"ImageScaler", V_2022_1, {"CPU", "GPU"}},
|
||||
{"ImageScaler", V_2023_0, {"VPUX"}},
|
||||
{"ImageScaler", V_2023_0, {"NPU"}},
|
||||
{"InstanceNormalization", V_2020_4, {"CPU", "GPU"}},
|
||||
{"InstanceNormalization", V_2023_0, {"VPUX"}},
|
||||
{"InstanceNormalization", V_2023_0, {"NPU"}},
|
||||
{"HardSigmoid", V_2020_4, {"CPU", "GPU"}},
|
||||
{"HardSigmoid", V_2023_1, {"NPU"}},
|
||||
{"HardMax", V_2022_1, {"CPU", "GPU"}},
|
||||
{"LeakyRelu", V_2020_4, {"CPU", "GPU"}},
|
||||
{"LeakyRelu", V_2023_0, {"VPUX"}},
|
||||
{"LeakyRelu", V_2023_0, {"NPU"}},
|
||||
{"Less", V_2020_4, {"CPU", "GPU"}},
|
||||
{"Less", V_2023_0, {"VPUX"}}, // Added for whisper decoder model.
|
||||
{"Less", V_2023_0, {"NPU"}}, // Added for whisper decoder model.
|
||||
{"LessOrEqual", V_2022_1, {"CPU", "GPU"}},
|
||||
{"LessOrEqual", V_2023_0, {"VPUX"}},
|
||||
{"LessOrEqual", V_2023_0, {"NPU"}},
|
||||
{"Log", V_2020_4, {"CPU", "GPU"}},
|
||||
{"Log", V_2023_0, {"VPUX"}},
|
||||
{"Log", V_2023_0, {"NPU"}},
|
||||
{"LogSoftMax", V_2022_1, {"CPU", "GPU"}},
|
||||
{"Loop", V_2021_4, {"CPU", "GPU"}},
|
||||
{"LpNormalization", V_2023_1, {"CPU", "GPU", "NPU"}},
|
||||
{"LpPool", V_2023_1, {"CPU", "GPU", "NPU"}},
|
||||
{"LRN", V_2020_4, {"CPU", "GPU"}},
|
||||
{"LRN", V_2023_0, {"VPUX"}},
|
||||
{"LRN", V_2023_0, {"NPU"}},
|
||||
{"LSTM", V_2020_4, {"CPU", "GPU"}},
|
||||
{"LSTM", V_2023_1, {"NPU"}},
|
||||
{"MatMul", V_2020_4, {"CPU", "GPU"}},
|
||||
{"MatMul", V_2023_0, {"VPUX"}},
|
||||
{"MatMul", V_2023_0, {"NPU"}},
|
||||
{"MatMulInteger", V_2022_1, {"CPU"}},
|
||||
{"MatMulInteger", V_2023_1, {"NPU"}},
|
||||
{"Max", V_2020_4, {"CPU", "GPU"}},
|
||||
{"Max", V_2023_0, {"VPUX"}},
|
||||
{"Max", V_2023_0, {"NPU"}},
|
||||
{"MaxPool", V_2020_4, {"CPU", "GPU"}},
|
||||
{"MaxPool", V_2023_0, {"VPUX"}},
|
||||
{"MaxPool", V_2023_0, {"NPU"}},
|
||||
{"Mean", V_2020_4, {"CPU", "GPU"}},
|
||||
{"Mean", V_2023_0, {"VPUX"}},
|
||||
{"Mean", V_2023_0, {"NPU"}},
|
||||
{"MeanVarianceNormalization", V_2022_1, {"CPU", "GPU"}},
|
||||
{"MeanVarianceNormalization", V_2023_1, {"NPU"}},
|
||||
{"Min", V_2020_4, {"CPU", "GPU"}},
|
||||
{"Min", V_2023_0, {"VPUX"}},
|
||||
{"Min", V_2023_0, {"NPU"}},
|
||||
{"Mod", V_2022_1, {"CPU", "GPU"}},
|
||||
{"Mul", V_2020_4, {"CPU", "GPU"}},
|
||||
{"Mul", V_2023_0, {"VPUX"}},
|
||||
{"Mul", V_2023_0, {"NPU"}},
|
||||
{"Neg", V_2020_4, {"CPU", "GPU"}},
|
||||
{"Neg", V_2023_0, {"VPUX"}},
|
||||
{"Neg", V_2023_0, {"NPU"}},
|
||||
{"NonMaxSuppression", V_2021_1, {"CPU", "GPU"}},
|
||||
{"NonMaxSuppression", V_2023_1, {"NPU"}},
|
||||
{"NonZero", V_2021_1, {"CPU"}},
|
||||
{"NonZero", V_2023_0, {"GPU"}},
|
||||
{"Not", V_2021_1, {"CPU", "GPU"}},
|
||||
{"Not", V_2020_4, {"CPU", "GPU"}},
|
||||
{"Not", V_2023_1, {"NPU"}},
|
||||
{"OneHot", V_2020_4, {"CPU", "GPU"}},
|
||||
{"OneHot", V_2023_1, {"NPU"}},
|
||||
{"Or", V_2022_1, {"CPU", "GPU"}},
|
||||
{"Or", V_2023_1, {"NPU"}},
|
||||
{"Pad", V_2020_4, {"CPU", "GPU"}},
|
||||
{"Pad", V_2023_0, {"VPUX"}},
|
||||
{"Pad", V_2023_0, {"NPU"}},
|
||||
{"Pow", V_2020_4, {"CPU", "GPU"}},
|
||||
{"Pow", V_2023_0, {"VPUX"}},
|
||||
{"Pow", V_2023_0, {"NPU"}},
|
||||
{"PRelu", V_2020_4, {"CPU", "GPU"}},
|
||||
{"PRelu", V_2023_0, {"VPUX"}},
|
||||
{"PRelu", V_2023_0, {"NPU"}},
|
||||
{"QLinearMatMul", V_2022_3, {"CPU"}},
|
||||
// {"QLinearMatMul", V_2023_1, {"NPU"}},
|
||||
{"QuantizeLinear", V_2021_4, {"CPU", "GPU"}},
|
||||
{"QuantizeLinear", V_2023_0, {"VPUX"}},
|
||||
{"QuantizeLinear", V_2023_0, {"NPU"}},
|
||||
{"RNN", V_2023_1, {"CPU", "GPU"}},
|
||||
{"RandomNormalLike", V_2023_0, {"CPU", "GPU"}},
|
||||
{"RandomNormalLike", V_2023_0, {"CPU", "GPU"}},
|
||||
{"RandomNormalLike", V_2023_1, {"NPU"}},
|
||||
{"RandomNormal", V_2023_0, {"CPU", "GPU"}},
|
||||
{"RandomNormal", V_2023_1, {"NPU"}},
|
||||
{"Range", V_2022_1, {"CPU", "GPU"}},
|
||||
{"Range", V_2023_0, {"VPUX"}},
|
||||
{"Range", V_2023_0, {"NPU"}},
|
||||
{"Reciprocal", V_2020_4, {"CPU", "GPU"}},
|
||||
{"Reciprocal", V_2023_0, {"VPUX"}},
|
||||
{"Reciprocal", V_2023_0, {"NPU"}},
|
||||
{"ReduceL1", V_2022_1, {"CPU", "GPU"}},
|
||||
{"ReduceL1", V_2023_1, {"NPU"}},
|
||||
{"ReduceL2", V_2022_1, {"CPU", "GPU"}},
|
||||
{"ReduceL2", V_2023_1, {"NPU"}},
|
||||
{"ReduceLogSum", V_2020_4, {"CPU"}},
|
||||
{"ReduceLogSum", V_2022_1, {"CPU", "GPU"}},
|
||||
{"ReduceLogSum", V_2023_1, {"NPU"}},
|
||||
{"ReduceLogSumExp", V_2022_1, {"CPU", "GPU"}},
|
||||
{"ReduceLogSumExp", V_2023_1, {"NPU"}},
|
||||
{"ReduceMax", V_2020_4, {"CPU", "GPU"}},
|
||||
{"ReduceMax", V_2023_1, {"NPU"}},
|
||||
{"ReduceMean", V_2020_4, {"CPU", "GPU"}},
|
||||
{"ReduceMean", V_2023_0, {"VPUX"}},
|
||||
{"ReduceMean", V_2023_0, {"NPU"}},
|
||||
{"ReduceMin", V_2020_4, {"CPU", "GPU"}},
|
||||
{"ReduceMin", V_2023_1, {"NPU"}},
|
||||
{"ReduceProd", V_2020_4, {"CPU"}},
|
||||
{"ReduceProd", V_2022_1, {"GPU"}},
|
||||
{"ReduceProd", V_2023_1, {"NPU"}},
|
||||
{"ReduceSum", V_2020_4, {"CPU", "GPU"}},
|
||||
// {"ReduceSum", V_2023_1, {"NPU"}},
|
||||
{"ReduceSumSquare", V_2020_4, {"CPU"}},
|
||||
{"ReduceSumSquare", V_2022_1, {"CPU", "GPU"}},
|
||||
{"ReduceSumSquare", V_2023_1, {"NPU"}},
|
||||
{"Relu", V_2020_4, {"CPU", "GPU"}},
|
||||
{"Relu", V_2023_0, {"VPUX"}},
|
||||
{"Relu", V_2023_0, {"NPU"}},
|
||||
{"Resize", V_2020_4, {"CPU"}},
|
||||
{"Resize", V_2022_1, {"GPU"}},
|
||||
{"Resize", V_2023_1, {"NPU"}},
|
||||
{"Reshape", V_2020_4, {"CPU", "GPU"}},
|
||||
{"Reshape", V_2023_0, {"VPUX"}},
|
||||
{"Reshape", V_2023_0, {"NPU"}},
|
||||
{"ReverseSequence", V_2022_1, {"CPU", "GPU"}},
|
||||
{"RoiAlign", V_2021_1, {"CPU", "GPU"}},
|
||||
{"RoiAlign", V_2023_1, {"NPU"}},
|
||||
{"Round", V_2021_4, {"CPU", "GPU"}},
|
||||
{"Round", V_2023_1, {"NPU"}},
|
||||
{"Scatter", V_2022_1, {"CPU", "GPU"}},
|
||||
{"Scatter", V_2023_1, {"NPU"}},
|
||||
{"ScatterElements", V_2022_1, {"CPU", "GPU"}},
|
||||
{"ScatterElements", V_2023_1, {"NPU"}},
|
||||
{"ScatterND", V_2022_1, {"CPU", "GPU"}},
|
||||
{"ScatterND", V_2023_1, {"NPU"}},
|
||||
{"Selu", V_2020_4, {"CPU", "GPU"}},
|
||||
{"Selu", V_2023_1, {"NPU"}},
|
||||
{"Shape", V_2020_4, {"CPU", "GPU"}},
|
||||
{"Shape", V_2023_0, {"VPUX"}},
|
||||
{"Shape", V_2023_0, {"NPU"}},
|
||||
{"Shrink", V_2022_1, {"CPU", "GPU"}},
|
||||
{"Shrink", V_2023_0, {"VPUX"}},
|
||||
{"Shrink", V_2023_0, {"NPU"}},
|
||||
{"Sigmoid", V_2020_4, {"CPU", "GPU"}},
|
||||
{"Sigmoid", V_2023_0, {"VPUX"}},
|
||||
{"Sigmoid", V_2023_0, {"NPU"}},
|
||||
{"Sign", V_2020_4, {"CPU"}},
|
||||
{"Sign", V_2022_1, {"GPU"}},
|
||||
{"Sign", V_2023_0, {"VPUX"}},
|
||||
{"Sign", V_2023_0, {"NPU"}},
|
||||
{"Sin", V_2022_1, {"CPU", "GPU"}},
|
||||
{"Sin", V_2023_0, {"VPUX"}},
|
||||
{"Sin", V_2023_0, {"NPU"}},
|
||||
{"Sinh", V_2020_4, {"CPU"}},
|
||||
{"Sinh", V_2023_1, {"NPU"}},
|
||||
{"Size", V_2022_1, {"CPU", "GPU"}},
|
||||
{"Size", V_2023_1, {"NPU"}},
|
||||
{"Slice", V_2020_4, {"CPU", "GPU"}},
|
||||
{"Slice", V_2023_0, {"VPUX"}},
|
||||
{"Slice", V_2023_0, {"NPU"}},
|
||||
{"Softmax", V_2020_4, {"CPU", "GPU"}},
|
||||
{"Softmax", V_2023_0, {"VPUX"}},
|
||||
{"Softmax", V_2023_0, {"NPU"}},
|
||||
{"Softplus", V_2022_1, {"CPU", "GPU"}},
|
||||
{"Softplus", V_2023_0, {"VPUX"}},
|
||||
{"Softplus", V_2023_0, {"NPU"}},
|
||||
{"Softsign", V_2022_1, {"CPU", "GPU"}},
|
||||
{"SpaceToDepth", V_2020_4, {"CPU", "GPU"}},
|
||||
{"SpaceToDepth", V_2023_0, {"VPUX"}},
|
||||
{"SpaceToDepth", V_2023_0, {"NPU"}},
|
||||
{"Split", V_2020_4, {"CPU", "GPU"}},
|
||||
{"Split", V_2023_0, {"VPUX"}},
|
||||
{"Split", V_2023_0, {"NPU"}},
|
||||
{"Sqrt", V_2020_4, {"CPU", "GPU"}},
|
||||
{"Sqrt", V_2023_0, {"VPUX"}},
|
||||
{"Sqrt", V_2023_0, {"NPU"}},
|
||||
{"Squeeze", V_2020_4, {"CPU", "GPU"}},
|
||||
{"Squeeze", V_2023_0, {"VPUX"}},
|
||||
{"Squeeze", V_2023_0, {"NPU"}},
|
||||
{"Softsign", V_2020_4, {"CPU"}},
|
||||
{"Sub", V_2020_4, {"CPU", "GPU"}},
|
||||
{"Sub", V_2023_0, {"VPUX"}},
|
||||
{"Sub", V_2023_0, {"NPU"}},
|
||||
{"Sum", V_2020_4, {"CPU", "GPU"}},
|
||||
{"Sum", V_2023_0, {"VPUX"}},
|
||||
{"Sum", V_2023_0, {"NPU"}},
|
||||
{"Tan", V_2020_4, {"CPU", "GPU"}},
|
||||
{"Tan", V_2023_1, {"NPU"}},
|
||||
{"Tanh", V_2020_4, {"CPU", "GPU"}},
|
||||
{"Tanh", V_2023_0, {"VPUX"}},
|
||||
{"Tanh", V_2023_0, {"NPU"}},
|
||||
{"ThresholdedRelu", V_2022_1, {"CPU", "GPU"}},
|
||||
{"ThresholdedRelu", V_2023_0, {"VPUX"}},
|
||||
{"ThresholdedRelu", V_2023_0, {"NPU"}},
|
||||
{"Tile", V_2021_3, {"CPU", "GPU"}},
|
||||
{"Tile", V_2023_0, {"VPUX"}},
|
||||
{"Tile", V_2023_0, {"NPU"}},
|
||||
{"Transpose", V_2020_4, {"CPU", "GPU"}},
|
||||
{"Transpose", V_2023_0, {"VPUX"}},
|
||||
{"Transpose", V_2023_0, {"NPU"}},
|
||||
{"Trilu", V_2023_0, {"CPU", "GPU"}},
|
||||
{"Trilu", V_2023_1, {"NPU"}},
|
||||
{"TopK", V_2020_4, {"CPU", "GPU"}},
|
||||
{"TopK", V_2023_0, {"VPUX"}},
|
||||
{"TopK", V_2023_0, {"NPU"}},
|
||||
{"Upsample", V_2020_4, {"CPU", "GPU"}},
|
||||
{"Unsqueeze", V_2020_4, {"CPU", "GPU"}},
|
||||
{"Unsqueeze", V_2023_0, {"VPUX"}},
|
||||
{"Upsample", V_2021_1, {"CPU"}},
|
||||
{"Upsample", V_2021_4, {"GPU"}},
|
||||
{"Upsample", V_2023_0, {"VPUX"}},
|
||||
{"Unsqueeze", V_2023_0, {"NPU"}},
|
||||
{"Where", V_2022_1, {"CPU", "GPU"}},
|
||||
{"Where", V_2023_0, {"VPUX"}}, // Added for whisper decoder model.
|
||||
{"Where", V_2023_0, {"NPU"}}, // Added for whisper decoder model.
|
||||
{"Xor", V_2022_1, {"CPU", "GPU"}},
|
||||
{"Xor", V_2023_1, {"NPU"}},
|
||||
};
|
||||
|
||||
void DataOps::populate_types_supported() {
|
||||
supported_types_initializer_.insert(std::make_pair(V_2020_4, ONNX_NAMESPACE::TensorProto_DataType::TensorProto_DataType_BOOL));
|
||||
supported_types_initializer_.insert(std::make_pair(V_2020_4, ONNX_NAMESPACE::TensorProto_DataType::TensorProto_DataType_FLOAT));
|
||||
supported_types_initializer_.insert(std::make_pair(V_2020_4, ONNX_NAMESPACE::TensorProto_DataType::TensorProto_DataType_INT32));
|
||||
supported_types_initializer_.insert(std::make_pair(V_2020_4, ONNX_NAMESPACE::TensorProto_DataType::TensorProto_DataType_INT64));
|
||||
supported_types_initializer_.insert(std::make_pair(V_2021_1, ONNX_NAMESPACE::TensorProto_DataType::TensorProto_DataType_FLOAT16));
|
||||
supported_types_initializer_.insert(std::make_pair(V_2021_4, ONNX_NAMESPACE::TensorProto_DataType::TensorProto_DataType_INT8));
|
||||
supported_types_initializer_.insert(std::make_pair(V_2021_4, ONNX_NAMESPACE::TensorProto_DataType::TensorProto_DataType_UINT8));
|
||||
supported_types_initializer_.insert(
|
||||
std::make_pair(V_2020_4, ONNX_NAMESPACE::TensorProto_DataType::TensorProto_DataType_BOOL));
|
||||
supported_types_initializer_.insert(
|
||||
std::make_pair(V_2020_4, ONNX_NAMESPACE::TensorProto_DataType::TensorProto_DataType_FLOAT));
|
||||
supported_types_initializer_.insert(
|
||||
std::make_pair(V_2020_4, ONNX_NAMESPACE::TensorProto_DataType::TensorProto_DataType_INT32));
|
||||
supported_types_initializer_.insert(
|
||||
std::make_pair(V_2020_4, ONNX_NAMESPACE::TensorProto_DataType::TensorProto_DataType_INT64));
|
||||
supported_types_initializer_.insert(
|
||||
std::make_pair(V_2021_1, ONNX_NAMESPACE::TensorProto_DataType::TensorProto_DataType_FLOAT16));
|
||||
supported_types_initializer_.insert(
|
||||
std::make_pair(V_2021_4, ONNX_NAMESPACE::TensorProto_DataType::TensorProto_DataType_INT8));
|
||||
supported_types_initializer_.insert(
|
||||
std::make_pair(V_2021_4, ONNX_NAMESPACE::TensorProto_DataType::TensorProto_DataType_UINT8));
|
||||
|
||||
supported_types_vpu_.insert(std::make_pair(V_2020_4, ONNX_NAMESPACE::TensorProto_DataType::TensorProto_DataType_BOOL));
|
||||
supported_types_vpu_.insert(std::make_pair(V_2020_4, ONNX_NAMESPACE::TensorProto_DataType::TensorProto_DataType_FLOAT));
|
||||
supported_types_vpu_.insert(std::make_pair(V_2020_4, ONNX_NAMESPACE::TensorProto_DataType::TensorProto_DataType_UINT8));
|
||||
supported_types_vpu_.insert(std::make_pair(V_2020_4, ONNX_NAMESPACE::TensorProto_DataType::TensorProto_DataType_INT8));
|
||||
supported_types_vpu_.insert(std::make_pair(V_2020_4, ONNX_NAMESPACE::TensorProto_DataType::TensorProto_DataType_INT16));
|
||||
supported_types_vpu_.insert(std::make_pair(V_2020_4, ONNX_NAMESPACE::TensorProto_DataType::TensorProto_DataType_INT32));
|
||||
supported_types_vpu_.insert(std::make_pair(V_2020_4, ONNX_NAMESPACE::TensorProto_DataType::TensorProto_DataType_INT64));
|
||||
supported_types_vpu_.insert(std::make_pair(V_2021_1, ONNX_NAMESPACE::TensorProto_DataType::TensorProto_DataType_FLOAT16));
|
||||
supported_types_npu_.insert(
|
||||
std::make_pair(V_2020_4, ONNX_NAMESPACE::TensorProto_DataType::TensorProto_DataType_BOOL));
|
||||
supported_types_npu_.insert(
|
||||
std::make_pair(V_2020_4, ONNX_NAMESPACE::TensorProto_DataType::TensorProto_DataType_FLOAT));
|
||||
supported_types_npu_.insert(
|
||||
std::make_pair(V_2020_4, ONNX_NAMESPACE::TensorProto_DataType::TensorProto_DataType_UINT8));
|
||||
supported_types_npu_.insert(
|
||||
std::make_pair(V_2020_4, ONNX_NAMESPACE::TensorProto_DataType::TensorProto_DataType_INT8));
|
||||
supported_types_npu_.insert(
|
||||
std::make_pair(V_2020_4, ONNX_NAMESPACE::TensorProto_DataType::TensorProto_DataType_INT16));
|
||||
supported_types_npu_.insert(
|
||||
std::make_pair(V_2020_4, ONNX_NAMESPACE::TensorProto_DataType::TensorProto_DataType_INT32));
|
||||
supported_types_npu_.insert(
|
||||
std::make_pair(V_2020_4, ONNX_NAMESPACE::TensorProto_DataType::TensorProto_DataType_INT64));
|
||||
supported_types_npu_.insert(
|
||||
std::make_pair(V_2021_1, ONNX_NAMESPACE::TensorProto_DataType::TensorProto_DataType_FLOAT16));
|
||||
|
||||
supported_types_cpu_.insert(std::make_pair(V_2020_4, ONNX_NAMESPACE::TensorProto_DataType::TensorProto_DataType_BOOL));
|
||||
supported_types_cpu_.insert(std::make_pair(V_2020_4, ONNX_NAMESPACE::TensorProto_DataType::TensorProto_DataType_FLOAT));
|
||||
supported_types_cpu_.insert(std::make_pair(V_2020_4, ONNX_NAMESPACE::TensorProto_DataType::TensorProto_DataType_INT32));
|
||||
supported_types_cpu_.insert(std::make_pair(V_2020_4, ONNX_NAMESPACE::TensorProto_DataType::TensorProto_DataType_INT16));
|
||||
supported_types_cpu_.insert(std::make_pair(V_2020_4, ONNX_NAMESPACE::TensorProto_DataType::TensorProto_DataType_INT8));
|
||||
supported_types_cpu_.insert(std::make_pair(V_2020_4, ONNX_NAMESPACE::TensorProto_DataType::TensorProto_DataType_UINT8));
|
||||
supported_types_cpu_.insert(std::make_pair(V_2020_4, ONNX_NAMESPACE::TensorProto_DataType::TensorProto_DataType_INT64));
|
||||
supported_types_cpu_.insert(std::make_pair(V_2022_2, ONNX_NAMESPACE::TensorProto_DataType::TensorProto_DataType_FLOAT16));
|
||||
supported_types_cpu_.insert(
|
||||
std::make_pair(V_2020_4, ONNX_NAMESPACE::TensorProto_DataType::TensorProto_DataType_BOOL));
|
||||
supported_types_cpu_.insert(
|
||||
std::make_pair(V_2020_4, ONNX_NAMESPACE::TensorProto_DataType::TensorProto_DataType_FLOAT));
|
||||
supported_types_cpu_.insert(
|
||||
std::make_pair(V_2020_4, ONNX_NAMESPACE::TensorProto_DataType::TensorProto_DataType_INT32));
|
||||
supported_types_cpu_.insert(
|
||||
std::make_pair(V_2020_4, ONNX_NAMESPACE::TensorProto_DataType::TensorProto_DataType_INT16));
|
||||
supported_types_cpu_.insert(
|
||||
std::make_pair(V_2020_4, ONNX_NAMESPACE::TensorProto_DataType::TensorProto_DataType_INT8));
|
||||
supported_types_cpu_.insert(
|
||||
std::make_pair(V_2020_4, ONNX_NAMESPACE::TensorProto_DataType::TensorProto_DataType_UINT8));
|
||||
supported_types_cpu_.insert(
|
||||
std::make_pair(V_2020_4, ONNX_NAMESPACE::TensorProto_DataType::TensorProto_DataType_INT64));
|
||||
supported_types_cpu_.insert(
|
||||
std::make_pair(V_2022_2, ONNX_NAMESPACE::TensorProto_DataType::TensorProto_DataType_FLOAT16));
|
||||
|
||||
supported_types_gpu_.insert(std::make_pair(V_2020_4, ONNX_NAMESPACE::TensorProto_DataType::TensorProto_DataType_FLOAT));
|
||||
supported_types_gpu_.insert(std::make_pair(V_2020_4, ONNX_NAMESPACE::TensorProto_DataType::TensorProto_DataType_INT32));
|
||||
supported_types_gpu_.insert(std::make_pair(V_2020_4, ONNX_NAMESPACE::TensorProto_DataType::TensorProto_DataType_INT64));
|
||||
supported_types_gpu_.insert(std::make_pair(V_2021_1, ONNX_NAMESPACE::TensorProto_DataType::TensorProto_DataType_FLOAT16));
|
||||
supported_types_gpu_.insert(std::make_pair(V_2021_4, ONNX_NAMESPACE::TensorProto_DataType::TensorProto_DataType_INT8));
|
||||
supported_types_gpu_.insert(std::make_pair(V_2021_4, ONNX_NAMESPACE::TensorProto_DataType::TensorProto_DataType_UINT8));
|
||||
supported_types_gpu_.insert(std::make_pair(V_2022_1, ONNX_NAMESPACE::TensorProto_DataType::TensorProto_DataType_BOOL));
|
||||
supported_types_gpu_.insert(
|
||||
std::make_pair(V_2020_4, ONNX_NAMESPACE::TensorProto_DataType::TensorProto_DataType_FLOAT));
|
||||
supported_types_gpu_.insert(
|
||||
std::make_pair(V_2020_4, ONNX_NAMESPACE::TensorProto_DataType::TensorProto_DataType_INT32));
|
||||
supported_types_gpu_.insert(
|
||||
std::make_pair(V_2020_4, ONNX_NAMESPACE::TensorProto_DataType::TensorProto_DataType_INT64));
|
||||
supported_types_gpu_.insert(
|
||||
std::make_pair(V_2021_1, ONNX_NAMESPACE::TensorProto_DataType::TensorProto_DataType_FLOAT16));
|
||||
supported_types_gpu_.insert(
|
||||
std::make_pair(V_2021_4, ONNX_NAMESPACE::TensorProto_DataType::TensorProto_DataType_INT8));
|
||||
supported_types_gpu_.insert(
|
||||
std::make_pair(V_2021_4, ONNX_NAMESPACE::TensorProto_DataType::TensorProto_DataType_UINT8));
|
||||
supported_types_gpu_.insert(
|
||||
std::make_pair(V_2022_1, ONNX_NAMESPACE::TensorProto_DataType::TensorProto_DataType_BOOL));
|
||||
}
|
||||
|
||||
void DataOps::populate_op_mode_supported() {
|
||||
|
|
@ -349,10 +439,10 @@ void DataOps::populate_op_mode_supported() {
|
|||
no_dimension_supported_.push_back({"Equal", V_2023_0, {"GPU"}});
|
||||
no_dimension_supported_.push_back({"Floor", V_2020_4, {"All"}});
|
||||
no_dimension_supported_.push_back({"Gather", V_2020_4, {"All"}});
|
||||
no_dimension_supported_.push_back({"Greater", V_2023_0, {"VPUX"}});
|
||||
no_dimension_supported_.push_back({"Greater", V_2023_0, {"NPU"}});
|
||||
no_dimension_supported_.push_back({"Less", V_2022_1, {"CPU"}});
|
||||
no_dimension_supported_.push_back({"Loop", V_2021_4, {"All"}});
|
||||
no_dimension_supported_.push_back({"Max", V_2023_0, {"VPUX"}});
|
||||
no_dimension_supported_.push_back({"Max", V_2023_0, {"NPU"}});
|
||||
no_dimension_supported_.push_back({"Min", V_2020_4, {"All"}});
|
||||
no_dimension_supported_.push_back({"Mul", V_2020_4, {"All"}});
|
||||
no_dimension_supported_.push_back({"QuantizeLinear", V_2021_4, {"All"}});
|
||||
|
|
@ -382,11 +472,14 @@ void DataOps::populate_op_mode_supported() {
|
|||
{
|
||||
UnsupportedOpMode obj = {{V_2022_1, V_2022_2, V_2022_3},
|
||||
[this](const Node* node, const InitializedTensorSet&) {
|
||||
// Abs is not supproted with INT8 or INT32 as input data type on GPU
|
||||
if (device_id_.find("GPU") != std::string::npos) {
|
||||
// Abs is not supproted with INT8 or INT32 as input data type on GPU and NPU
|
||||
if ((device_id_.find("GPU") != std::string::npos) ||
|
||||
(device_id_.find("NPU") != std::string::npos)) {
|
||||
for (size_t i = 0; i < node->InputDefs().size(); i++) {
|
||||
if (node->InputDefs()[i]->TypeAsProto()->tensor_type().elem_type() == ONNX_NAMESPACE::TensorProto_DataType::TensorProto_DataType_INT8 ||
|
||||
node->InputDefs()[i]->TypeAsProto()->tensor_type().elem_type() == ONNX_NAMESPACE::TensorProto_DataType::TensorProto_DataType_INT32)
|
||||
if (node->InputDefs()[i]->TypeAsProto()->tensor_type().elem_type() ==
|
||||
ONNX_NAMESPACE::TensorProto_DataType::TensorProto_DataType_INT8 ||
|
||||
node->InputDefs()[i]->TypeAsProto()->tensor_type().elem_type() ==
|
||||
ONNX_NAMESPACE::TensorProto_DataType::TensorProto_DataType_INT32)
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
|
@ -399,11 +492,14 @@ void DataOps::populate_op_mode_supported() {
|
|||
[this](const Node* node, const InitializedTensorSet&) {
|
||||
// tensor type does not support select last index
|
||||
auto& attributes = node->GetAttributes();
|
||||
auto last_index_arg = attributes.count("select_last_index") > 0 ? attributes.at("select_last_index").i() : 0;
|
||||
auto last_index_arg =
|
||||
attributes.count("select_last_index") > 0 ? attributes.at("select_last_index").i()
|
||||
: 0;
|
||||
if (last_index_arg != 0)
|
||||
return true;
|
||||
// tensor type supports float as input for argmax and argmin
|
||||
if (node->InputDefs()[0]->TypeAsProto()->tensor_type().elem_type() != ONNX_NAMESPACE::TensorProto_DataType::TensorProto_DataType_FLOAT)
|
||||
if (node->InputDefs()[0]->TypeAsProto()->tensor_type().elem_type() !=
|
||||
ONNX_NAMESPACE::TensorProto_DataType::TensorProto_DataType_FLOAT)
|
||||
return true;
|
||||
return false;
|
||||
}};
|
||||
|
|
@ -415,7 +511,8 @@ void DataOps::populate_op_mode_supported() {
|
|||
[this](const Node* node, const InitializedTensorSet&) {
|
||||
if (device_id_.find("GPU") != std::string::npos) {
|
||||
// int64 data type is not supported on GPU
|
||||
const bool data_is_int64 = node->InputDefs()[0]->Type()->find("int64") != std::string::npos;
|
||||
const bool data_is_int64 =
|
||||
node->InputDefs()[0]->Type()->find("int64") != std::string::npos;
|
||||
return data_is_int64;
|
||||
}
|
||||
return false;
|
||||
|
|
@ -506,9 +603,12 @@ void DataOps::populate_op_mode_supported() {
|
|||
if (device_id_.find("GPU") != std::string::npos) {
|
||||
auto x_data_type = node->InputDefs()[0]->TypeAsProto()->tensor_type().elem_type();
|
||||
auto y_data_type = node->InputDefs()[1]->TypeAsProto()->tensor_type().elem_type();
|
||||
// currently both inputs with int32 are not supported and also both input datatypes should be same
|
||||
const bool A_is_int32 = node->InputDefs()[0]->Type()->find("int32") != std::string::npos;
|
||||
const bool B_is_int32 = node->InputDefs()[1]->Type()->find("int32") != std::string::npos;
|
||||
// currently both inputs with int32 are not supported
|
||||
// and also both input datatypes should be same
|
||||
const bool A_is_int32 =
|
||||
node->InputDefs()[0]->Type()->find("int32") != std::string::npos;
|
||||
const bool B_is_int32 =
|
||||
node->InputDefs()[1]->Type()->find("int32") != std::string::npos;
|
||||
if ((A_is_int32 && B_is_int32) || (x_data_type != y_data_type))
|
||||
return true;
|
||||
}
|
||||
|
|
@ -589,11 +689,13 @@ void DataOps::populate_op_mode_supported() {
|
|||
if (device_id_.find("GPU") != std::string::npos) {
|
||||
auto slope = node->InputDefs()[1];
|
||||
// PRelu slope has to be an initializer or needs to come from a constant node
|
||||
if (initializers.count(slope->Name()))
|
||||
if (initializers.count(slope->Name())) {
|
||||
return false;
|
||||
else {
|
||||
for (auto input_node = node->InputNodesBegin(); input_node != node->InputNodesEnd(); ++input_node) {
|
||||
if (GetInputCount(this->graph_viewer_.GetNode((*input_node).Index()), initializers) == 0)
|
||||
} else {
|
||||
for (auto input_node = node->InputNodesBegin();
|
||||
input_node != node->InputNodesEnd(); ++input_node) {
|
||||
if (GetInputCount(
|
||||
this->graph_viewer_.GetNode((*input_node).Index()), initializers) == 0)
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
|
@ -603,12 +705,12 @@ void DataOps::populate_op_mode_supported() {
|
|||
op_list_.insert({"PRelu", obj});
|
||||
}
|
||||
{
|
||||
UnsupportedOpMode obj = {{V_2022_1, V_2022_2, V_2022_3, V_2023_0},
|
||||
UnsupportedOpMode obj = {{V_2022_1, V_2022_2, V_2022_3, V_2023_0, V_2023_1},
|
||||
[this](const Node* node, const InitializedTensorSet&) {
|
||||
const auto& input_arg = node->InputDefs()[1];
|
||||
auto shape = input_arg->Shape();
|
||||
// Reshape op with empty dim is Rejected for Myriad
|
||||
//[TODO] Is this condition required anymore with Myriad removed?
|
||||
// [TODO] Is this condition required anymore with Myriad removed?
|
||||
if (shape != nullptr) {
|
||||
for (const auto& dim : input_arg->Shape()->dim()) {
|
||||
if (utils::HasDimValue(dim) && dim.dim_value() == 0)
|
||||
|
|
@ -638,7 +740,8 @@ void DataOps::populate_op_mode_supported() {
|
|||
if (device_id_.find("GPU") != std::string::npos) {
|
||||
// INT32 dataype is not supported as input
|
||||
for (size_t i = 0; i < node->InputDefs().size(); i++) {
|
||||
if (node->InputDefs()[i]->TypeAsProto()->tensor_type().elem_type() == ONNX_NAMESPACE::TensorProto_DataType::TensorProto_DataType_INT32)
|
||||
if (node->InputDefs()[i]->TypeAsProto()->tensor_type().elem_type() ==
|
||||
ONNX_NAMESPACE::TensorProto_DataType::TensorProto_DataType_INT32)
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
|
@ -650,9 +753,11 @@ void DataOps::populate_op_mode_supported() {
|
|||
UnsupportedOpMode obj = {{V_2022_1, V_2022_2, V_2022_3},
|
||||
[this](const Node* node, const InitializedTensorSet&) {
|
||||
if (device_id_.find("GPU") != std::string::npos) {
|
||||
auto output_data_type = node->OutputDefs()[0]->TypeAsProto()->tensor_type().elem_type();
|
||||
auto output_data_type =
|
||||
node->OutputDefs()[0]->TypeAsProto()->tensor_type().elem_type();
|
||||
// If the output of ScatterND op is BOOL, it is rejected for GPU.
|
||||
if (output_data_type == ONNX_NAMESPACE::TensorProto_DataType::TensorProto_DataType_BOOL)
|
||||
if (output_data_type ==
|
||||
ONNX_NAMESPACE::TensorProto_DataType::TensorProto_DataType_BOOL)
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
|
|
@ -666,7 +771,8 @@ void DataOps::populate_op_mode_supported() {
|
|||
[this](const Node* node, const InitializedTensorSet&) {
|
||||
// If the Input of Shrink op is UINT8, it is rejected (Due to output mismatch)
|
||||
for (size_t i = 0; i < node->InputDefs().size(); i++) {
|
||||
if (node->InputDefs()[i]->TypeAsProto()->tensor_type().elem_type() == ONNX_NAMESPACE::TensorProto_DataType::TensorProto_DataType_UINT8)
|
||||
if (node->InputDefs()[i]->TypeAsProto()->tensor_type().elem_type() ==
|
||||
ONNX_NAMESPACE::TensorProto_DataType::TensorProto_DataType_UINT8)
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
|
|
@ -714,10 +820,11 @@ void DataOps::populate_op_mode_supported() {
|
|||
op_list_.insert({"Squeeze", obj});
|
||||
}
|
||||
{
|
||||
UnsupportedOpMode obj = {{V_2022_1, V_2022_2, V_2022_3, V_2023_0},
|
||||
UnsupportedOpMode obj = {{V_2022_1, V_2022_2, V_2022_3, V_2023_0, V_2023_1},
|
||||
[this](const Node* node, const InitializedTensorSet&) {
|
||||
// If the operator is unsqueeze
|
||||
// If axes is an input, then we cannot produce a static graph. Conversion fails in convert_function_to_cnn_network.
|
||||
// If axes is an input, then we cannot produce a static graph.
|
||||
// Conversion fails in convert_function_to_cnn_network.
|
||||
for (size_t i = 0; i < node->InputDefs().size(); i++) {
|
||||
if (node->InputDefs()[i]->Name() == "axes") {
|
||||
return true;
|
||||
|
|
@ -728,14 +835,15 @@ void DataOps::populate_op_mode_supported() {
|
|||
op_list_.insert({"Unsqueeze", obj});
|
||||
}
|
||||
{
|
||||
UnsupportedOpMode obj = {{V_2022_1, V_2022_2, V_2022_3, V_2023_0},
|
||||
UnsupportedOpMode obj = {{V_2022_1, V_2022_2, V_2022_3, V_2023_0, V_2023_1},
|
||||
[this](const Node* node, const InitializedTensorSet&) {
|
||||
// check for attributes
|
||||
auto& upsample_attr = node->GetAttributes();
|
||||
if (upsample_attr.count("scales") > 0) {
|
||||
auto& upsample_arg = upsample_attr.at("scales");
|
||||
auto float_size = upsample_arg.floats_size();
|
||||
if (float_size > 2 && (upsample_arg.floats(0) != 1.f || upsample_arg.floats(1) != 1.f)) {
|
||||
if (float_size > 2 &&
|
||||
(upsample_arg.floats(0) != 1.f || upsample_arg.floats(1) != 1.f)) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
|
@ -750,9 +858,12 @@ void DataOps::populate_op_mode_supported() {
|
|||
}
|
||||
}
|
||||
// x_arg supports only float, int8 and float16 type
|
||||
if ((x_arg->TypeAsProto()->tensor_type().elem_type() == ONNX_NAMESPACE::TensorProto_DataType::TensorProto_DataType_FLOAT) ||
|
||||
(x_arg->TypeAsProto()->tensor_type().elem_type() == ONNX_NAMESPACE::TensorProto_DataType::TensorProto_DataType_INT8) ||
|
||||
(x_arg->TypeAsProto()->tensor_type().elem_type() == ONNX_NAMESPACE::TensorProto_DataType::TensorProto_DataType_FLOAT16)) {
|
||||
if ((x_arg->TypeAsProto()->tensor_type().elem_type() ==
|
||||
ONNX_NAMESPACE::TensorProto_DataType::TensorProto_DataType_FLOAT) ||
|
||||
(x_arg->TypeAsProto()->tensor_type().elem_type() ==
|
||||
ONNX_NAMESPACE::TensorProto_DataType::TensorProto_DataType_INT8) ||
|
||||
(x_arg->TypeAsProto()->tensor_type().elem_type() ==
|
||||
ONNX_NAMESPACE::TensorProto_DataType::TensorProto_DataType_FLOAT16)) {
|
||||
return false;
|
||||
} else {
|
||||
return true;
|
||||
|
|
@ -849,9 +960,9 @@ bool DataOps::type_is_supported(const NodeArg* node_arg, bool is_initializer) {
|
|||
} else {
|
||||
auto dtype = type_proto->tensor_type().elem_type();
|
||||
|
||||
if (device_id_.find("VPUX") != std::string::npos || device_id_.find("HETERO") != std::string::npos ||
|
||||
if (device_id_.find("NPU") != std::string::npos || device_id_.find("HETERO") != std::string::npos ||
|
||||
device_id_.find("MULTI") != std::string::npos || device_id_.find("AUTO") != std::string::npos) {
|
||||
for (auto const& var : supported_types_vpu_) {
|
||||
for (auto const& var : supported_types_npu_) {
|
||||
if ((var.first <= version_id_) &&
|
||||
(var.second == dtype)) {
|
||||
return true;
|
||||
|
|
@ -1079,7 +1190,9 @@ bool DataOps::node_is_supported(const std::map<std::string, std::set<std::string
|
|||
if (opset->second.find(optype) == opset->second.end() && op_fun == ops_supported_as_function.end()) {
|
||||
#ifndef NDEBUG
|
||||
if (openvino_ep::backend_utils::IsDebugEnabled()) {
|
||||
std::cout << "The operator is not available in OpenVINO ngraph operators list nor the operator is a special ONNX function" << std::endl;
|
||||
std::cout << "The operator is not available in OpenVINO ngraph operators list"
|
||||
<< "nor the operator is a special ONNX function"
|
||||
<< std::endl;
|
||||
}
|
||||
#endif
|
||||
return false;
|
||||
|
|
@ -1095,10 +1208,12 @@ std::vector<NodeIndex> DataOps::GetUnsupportedNodeIndices(std::unordered_set<std
|
|||
for (const auto& node_idx : graph_viewer_.GetNodesInTopologicalOrder()) {
|
||||
if (node_is_supported(ng_supported_ops, node_idx)) {
|
||||
// Collect inputs that are initializers
|
||||
graph_viewer_.GetNode(node_idx)->ForEachDef([&ng_required_initializers, this](const NodeArg& node_arg, bool is_input) {
|
||||
if(is_input && this->graph_viewer_.GetAllInitializedTensors().count(node_arg.Name())) {
|
||||
graph_viewer_.GetNode(node_idx)->ForEachDef([&ng_required_initializers, this](const NodeArg& node_arg,
|
||||
bool is_input) {
|
||||
if (is_input && this->graph_viewer_.GetAllInitializedTensors().count(node_arg.Name())) {
|
||||
ng_required_initializers.insert(node_arg.Name());
|
||||
} }, true);
|
||||
} },
|
||||
true);
|
||||
} else {
|
||||
unsupported_nodes_idx.push_back(node_idx);
|
||||
}
|
||||
|
|
@ -1110,7 +1225,8 @@ bool DataOps::IsOpSupportedOnlyInModel(std::string name) {
|
|||
return ops_supported_only_in_model.find(name) != ops_supported_only_in_model.end();
|
||||
}
|
||||
|
||||
bool DataOps::SpecialConditionForClusterSizeOne(std::unordered_set<std::string>& ng_required_initializers, const Node* node) {
|
||||
bool DataOps::SpecialConditionForClusterSizeOne(std::unordered_set<std::string>& ng_required_initializers,
|
||||
const Node* node) {
|
||||
if (node->OpType() == "Reshape") {
|
||||
const auto& shape_arg = node->InputDefs()[1];
|
||||
if (ng_required_initializers.find(shape_arg->Name()) == ng_required_initializers.end()) {
|
||||
|
|
@ -1119,15 +1235,20 @@ bool DataOps::SpecialConditionForClusterSizeOne(std::unordered_set<std::string>&
|
|||
} else if (node->OpType() == "Expand") {
|
||||
// nGraph only supports constant shape input values
|
||||
const auto& output = node->OutputDefs()[0];
|
||||
if (output->TypeAsProto()->tensor_type().elem_type() != ONNX_NAMESPACE::TensorProto_DataType::TensorProto_DataType_FLOAT16)
|
||||
if (output->TypeAsProto()->tensor_type().elem_type() !=
|
||||
ONNX_NAMESPACE::TensorProto_DataType::TensorProto_DataType_FLOAT16)
|
||||
return true;
|
||||
} else if (node->OpType() == "RoiAlign") {
|
||||
using onnx_dtype = ONNX_NAMESPACE::TensorProto_DataType;
|
||||
|
||||
onnx_dtype input_0_data_type = (ONNX_NAMESPACE::TensorProto_DataType)node->InputDefs()[0]->TypeAsProto()->tensor_type().elem_type();
|
||||
onnx_dtype input_1_data_type = (ONNX_NAMESPACE::TensorProto_DataType)node->InputDefs()[1]->TypeAsProto()->tensor_type().elem_type();
|
||||
onnx_dtype input_2_data_type = (ONNX_NAMESPACE::TensorProto_DataType)node->InputDefs()[2]->TypeAsProto()->tensor_type().elem_type();
|
||||
onnx_dtype output_data_type = (ONNX_NAMESPACE::TensorProto_DataType)node->OutputDefs()[0]->TypeAsProto()->tensor_type().elem_type();
|
||||
onnx_dtype input_0_data_type =
|
||||
(ONNX_NAMESPACE::TensorProto_DataType)node->InputDefs()[0]->TypeAsProto()->tensor_type().elem_type();
|
||||
onnx_dtype input_1_data_type =
|
||||
(ONNX_NAMESPACE::TensorProto_DataType)node->InputDefs()[1]->TypeAsProto()->tensor_type().elem_type();
|
||||
onnx_dtype input_2_data_type =
|
||||
(ONNX_NAMESPACE::TensorProto_DataType)node->InputDefs()[2]->TypeAsProto()->tensor_type().elem_type();
|
||||
onnx_dtype output_data_type =
|
||||
(ONNX_NAMESPACE::TensorProto_DataType)node->OutputDefs()[0]->TypeAsProto()->tensor_type().elem_type();
|
||||
|
||||
if ((input_0_data_type != onnx_dtype::TensorProto_DataType_FLOAT16) ||
|
||||
(input_1_data_type != onnx_dtype::TensorProto_DataType_FLOAT16) ||
|
||||
|
|
|
|||
|
|
@ -3,6 +3,11 @@
|
|||
|
||||
#pragma once
|
||||
#include <unordered_set>
|
||||
#include <utility>
|
||||
#include <map>
|
||||
#include <set>
|
||||
#include <vector>
|
||||
#include <string>
|
||||
|
||||
namespace onnxruntime {
|
||||
namespace openvino_ep {
|
||||
|
|
@ -47,7 +52,7 @@ class DataOps {
|
|||
std::multimap<std::string, UnsupportedOpMode> op_list_;
|
||||
std::vector<SupportedOp> subgraph_supported_;
|
||||
std::vector<SupportedOp> no_dimension_supported_;
|
||||
std::set<Pairs> supported_types_vpu_;
|
||||
std::set<Pairs> supported_types_npu_;
|
||||
std::set<Pairs> supported_types_cpu_;
|
||||
std::set<Pairs> supported_types_gpu_;
|
||||
std::set<Pairs> supported_types_initializer_;
|
||||
|
|
@ -64,14 +69,16 @@ class DataOps {
|
|||
const NodeIndex node_idx);
|
||||
|
||||
public:
|
||||
DataOps(const GraphViewer& graph_viewer_param, VersionNum ver, std::string dev_id) : graph_viewer_(graph_viewer_param), version_id_(ver), device_id_(dev_id) {
|
||||
DataOps(const GraphViewer& graph_viewer_param, VersionNum ver, std::string dev_id)
|
||||
: graph_viewer_(graph_viewer_param), version_id_(ver), device_id_(dev_id) {
|
||||
populate_op_mode_supported();
|
||||
populate_types_supported();
|
||||
}
|
||||
|
||||
virtual std::vector<NodeIndex> GetUnsupportedNodeIndices(std::unordered_set<std::string>& ng_required_initializers);
|
||||
virtual bool IsOpSupportedOnlyInModel(std::string name);
|
||||
virtual bool SpecialConditionForClusterSizeOne(std::unordered_set<std::string>& ng_required_initializers, const Node* node);
|
||||
virtual bool SpecialConditionForClusterSizeOne(
|
||||
std::unordered_set<std::string>& ng_required_initializers, const Node* node);
|
||||
virtual bool DoNotOmitSubGraph(const std::string& name);
|
||||
virtual bool InsertNode(const std::string& name);
|
||||
VersionNum GetVersion() const { return version_id_; }
|
||||
|
|
|
|||
|
|
@ -2,6 +2,7 @@
|
|||
// Licensed under the MIT License
|
||||
|
||||
#include "core/providers/shared_library/provider_api.h"
|
||||
#include "utils.h"
|
||||
|
||||
#if defined(_MSC_VER)
|
||||
#pragma warning(disable : 4244 4245 5208)
|
||||
|
|
@ -113,7 +114,8 @@ std::map<std::string, std::set<std::string>> GetNgSupportedOps(const int onnx_op
|
|||
* supported_cluster + (UNsupported_node + rest_of_the_graph). This functions returns vector of all supported_clusters by nGraph
|
||||
*/
|
||||
std::vector<std::vector<NodeIndex>>
|
||||
GetPartitionedClusters(const std::vector<NodeIndex>& topological_order, const std::vector<NodeIndex>& unsupported_nodes) {
|
||||
GetPartitionedClusters(const std::vector<NodeIndex>& topological_order,
|
||||
const std::vector<NodeIndex>& unsupported_nodes) {
|
||||
std::vector<std::vector<NodeIndex>> ng_clusters;
|
||||
|
||||
auto prev = topological_order.begin();
|
||||
|
|
@ -140,7 +142,10 @@ GetPartitionedClusters(const std::vector<NodeIndex>& topological_order, const st
|
|||
return ng_clusters;
|
||||
}
|
||||
|
||||
void IdentifyConnectedNodes(const GraphViewer& graph_viewer, NodeIndex curr_node_index, std::vector<NodeIndex>& cluster, std::vector<NodeIndex>& sub_cluster) {
|
||||
void IdentifyConnectedNodes(const GraphViewer& graph_viewer,
|
||||
NodeIndex curr_node_index,
|
||||
std::vector<NodeIndex>& cluster,
|
||||
std::vector<NodeIndex>& sub_cluster) {
|
||||
if (std::find(cluster.begin(), cluster.end(), curr_node_index) == cluster.end())
|
||||
return;
|
||||
|
||||
|
|
@ -205,7 +210,8 @@ void GetInputsOutputsOfCluster(const GraphViewer& graph_viewer,
|
|||
const auto& ext_node = graph_viewer.GetNode((*it).Index());
|
||||
|
||||
if (std::find(cluster.begin(), cluster.end(), ext_node->Index()) == cluster.end()) {
|
||||
// Node is external to this_cluster. Search through its inputs to find the output that is generated by this_cluster.
|
||||
// Node is external to this_cluster. Search through its inputs to
|
||||
// find the output that is generated by this_cluster.
|
||||
std::set<std::string> ext_node_inputs;
|
||||
ext_node->ForEachDef(
|
||||
[&ext_node_inputs](const NodeArg& arg, bool is_input) {
|
||||
|
|
|
|||
|
|
@ -1,5 +1,15 @@
|
|||
// Copyright (C) 2019-2022 Intel Corporation
|
||||
// Licensed under the MIT License
|
||||
#pragma once
|
||||
|
||||
#include <memory>
|
||||
#include <map>
|
||||
#include <utility>
|
||||
#include <vector>
|
||||
#include <set>
|
||||
#include <algorithm>
|
||||
#include <string>
|
||||
#include <unordered_set>
|
||||
|
||||
namespace onnxruntime {
|
||||
namespace openvino_ep {
|
||||
|
|
@ -18,9 +28,14 @@ int GetOnnxOpSet(const GraphViewer& graph_viewer);
|
|||
std::map<std::string, std::set<std::string>> GetNgSupportedOps(const int onnx_opset);
|
||||
|
||||
std::vector<std::vector<NodeIndex>>
|
||||
GetPartitionedClusters(const std::vector<NodeIndex>& topological_order, const std::vector<NodeIndex>& unsupported_nodes);
|
||||
GetPartitionedClusters(
|
||||
const std::vector<NodeIndex>& topological_order, const std::vector<NodeIndex>& unsupported_nodes);
|
||||
|
||||
void IdentifyConnectedNodes(const GraphViewer& graph_viewer, NodeIndex curr_node_index, std::vector<NodeIndex>& cluster, std::vector<NodeIndex>& sub_cluster);
|
||||
void IdentifyConnectedNodes(
|
||||
const GraphViewer& graph_viewer,
|
||||
NodeIndex curr_node_index,
|
||||
std::vector<NodeIndex>& cluster,
|
||||
std::vector<NodeIndex>& sub_cluster);
|
||||
|
||||
std::vector<std::vector<NodeIndex>>
|
||||
GetConnectedClusters(const GraphViewer& graph_viewer, const std::vector<std::vector<NodeIndex>>& clusters);
|
||||
|
|
|
|||
|
|
@ -1432,7 +1432,7 @@ ProviderOptions OrtOpenVINOProviderOptionsToOrtOpenVINOProviderOptionsV2(const O
|
|||
if (legacy_ov_options->device_type != nullptr)
|
||||
ov_options_converted_map["device_type"] = legacy_ov_options->device_type;
|
||||
|
||||
ov_options_converted_map["enable_vpu_fast_compile"] = legacy_ov_options->enable_vpu_fast_compile;
|
||||
ov_options_converted_map["enable_npu_fast_compile"] = legacy_ov_options->enable_npu_fast_compile;
|
||||
|
||||
if (legacy_ov_options->device_id != nullptr)
|
||||
ov_options_converted_map["device_id"] = legacy_ov_options->device_id;
|
||||
|
|
|
|||
|
|
@ -813,10 +813,10 @@ std::unique_ptr<IExecutionProvider> CreateExecutionProviderInstance(
|
|||
if (option.first == "device_type") {
|
||||
OV_provider_options_map[option.first] = option.second;
|
||||
continue;
|
||||
} else if (option.first == "enable_vpu_fast_compile") {
|
||||
} else if (option.first == "enable_npu_fast_compile") {
|
||||
if (!(option.second == "True" || option.second == "true" ||
|
||||
option.second == "False" || option.second == "false")) {
|
||||
ORT_THROW("Invalid value passed for enable_vpu_fast_compile: ", option.second);
|
||||
ORT_THROW("Invalid value passed for enable_npu_fast_compile: ", option.second);
|
||||
}
|
||||
OV_provider_options_map[option.first] = option.second;
|
||||
} else if (option.first == "enable_opencl_throttling") {
|
||||
|
|
|
|||
|
|
@ -60,11 +60,11 @@ struct OrtStatus {
|
|||
#elif OPENVINO_CONFIG_GPU_FP16
|
||||
#define BACKEND_OPENVINO "-OPENVINO_GPU_FP16"
|
||||
|
||||
#elif OPENVINO_CONFIG_VPUX_FP16
|
||||
#define BACKEND_OPENVINO "-OPENVINO_VPUX_FP16"
|
||||
#elif OPENVINO_CONFIG_NPU_FP16
|
||||
#define BACKEND_OPENVINO "-OPENVINO_NPU_FP16"
|
||||
|
||||
#elif OPENVINO_CONFIG_VPUX_U8
|
||||
#define BACKEND_OPENVINO "-OPENVINO_VPUX_U8"
|
||||
#elif OPENVINO_CONFIG_NPU_U8
|
||||
#define BACKEND_OPENVINO "-OPENVINO_NPU_U8"
|
||||
|
||||
#elif OPENVINO_CONFIG_MULTI
|
||||
#define BACKEND_OPENVINO "-OPENVINO_MULTI"
|
||||
|
|
|
|||
|
|
@ -60,7 +60,7 @@ namespace perftest {
|
|||
"\t-i: Specify EP specific runtime options as key value pairs. Different runtime options available are: \n"
|
||||
"\t [OpenVINO only] [device_type]: Overrides the accelerator hardware type and precision with these values at runtime.\n"
|
||||
"\t [OpenVINO only] [device_id]: Selects a particular hardware device for inference.\n"
|
||||
"\t [OpenVINO only] [enable_vpu_fast_compile]: Optionally enabled to speeds up the model's compilation on VPU device targets.\n"
|
||||
"\t [OpenVINO only] [enable_npu_fast_compile]: Optionally enabled to speeds up the model's compilation on NPU device targets.\n"
|
||||
"\t [OpenVINO only] [num_of_threads]: Overrides the accelerator hardware type and precision with these values at runtime.\n"
|
||||
"\t [OpenVINO only] [cache_dir]: Explicitly specify the path to dump and load the blobs(Model caching) or cl_cache (Kernel Caching) files feature. If blob files are already present, it will be directly loaded.\n"
|
||||
"\t [OpenVINO only] [enable_opencl_throttling]: Enables OpenCL queue throttling for GPU device(Reduces the CPU Utilization while using GPU) \n"
|
||||
|
|
@ -72,7 +72,7 @@ namespace perftest {
|
|||
"\t [QNN only] [htp_performance_mode]: QNN performance mode, options: 'burst', 'balanced', 'default', 'high_performance', \n"
|
||||
"\t 'high_power_saver', 'low_balanced', 'low_power_saver', 'power_saver', 'sustained_high_performance'. Default to 'default'. \n"
|
||||
"\t [Usage]: -e <provider_name> -i '<key1>|<value1> <key2>|<value2>'\n\n"
|
||||
"\t [Example] [For OpenVINO EP] -e openvino -i \"device_type|CPU_FP32 enable_vpu_fast_compile|true num_of_threads|5 enable_opencl_throttling|true cache_dir|\"<path>\"\"\n"
|
||||
"\t [Example] [For OpenVINO EP] -e openvino -i \"device_type|CPU_FP32 enable_npu_fast_compile|true num_of_threads|5 enable_opencl_throttling|true cache_dir|\"<path>\"\"\n"
|
||||
"\t [Example] [For QNN EP] -e qnn -i \"backend_path|/folderpath/libQnnCpu.so\" \n\n"
|
||||
"\t [TensorRT only] [trt_max_partition_iterations]: Maximum iterations for TensorRT parser to get capability.\n"
|
||||
"\t [TensorRT only] [trt_min_subgraph_size]: Minimum size of TensorRT subgraphs.\n"
|
||||
|
|
|
|||
|
|
@ -240,8 +240,7 @@ OnnxRuntimeTestSession::OnnxRuntimeTestSession(Ort::Env& env, std::random_device
|
|||
if (key == "device_type") {
|
||||
std::set<std::string> ov_supported_device_types = {"CPU_FP32", "CPU_FP16", "GPU_FP32",
|
||||
"GPU.0_FP32", "GPU.1_FP32", "GPU_FP16",
|
||||
"GPU.0_FP16", "GPU.1_FP16",
|
||||
"VPUX_FP16", "VPUX_U8"};
|
||||
"GPU.0_FP16", "GPU.1_FP16"};
|
||||
if (ov_supported_device_types.find(value) != ov_supported_device_types.end()) {
|
||||
ov_options[key] = value;
|
||||
} else if (value.find("HETERO:") == 0) {
|
||||
|
|
@ -254,17 +253,17 @@ OnnxRuntimeTestSession::OnnxRuntimeTestSession(Ort::Env& env, std::random_device
|
|||
ORT_THROW(
|
||||
"[ERROR] [OpenVINO] You have selcted wrong configuration value for the key 'device_type'. "
|
||||
"Select from 'CPU_FP32', 'CPU_FP16', 'GPU_FP32', 'GPU.0_FP32', 'GPU.1_FP32', 'GPU_FP16', "
|
||||
"'GPU.0_FP16', 'GPU.1_FP16', 'VPUX_FP16', 'VPUX_U8' or from"
|
||||
"'GPU.0_FP16', 'GPU.1_FP16' or from"
|
||||
" HETERO/MULTI/AUTO options available. \n");
|
||||
}
|
||||
} else if (key == "device_id") {
|
||||
ov_options[key] = value;
|
||||
} else if (key == "enable_vpu_fast_compile") {
|
||||
} else if (key == "enable_npu_fast_compile") {
|
||||
if (value == "true" || value == "True" ||
|
||||
value == "false" || value == "False") {
|
||||
ov_options[key] = value;
|
||||
} else {
|
||||
ORT_THROW("[ERROR] [OpenVINO] The value for the key 'enable_vpu_fast_compile' should be a boolean i.e. true or false. Default value is false.\n");
|
||||
ORT_THROW("[ERROR] [OpenVINO] The value for the key 'enable_npu_fast_compile' should be a boolean i.e. true or false. Default value is false.\n");
|
||||
}
|
||||
} else if (key == "enable_opencl_throttling") {
|
||||
if (value == "true" || value == "True" ||
|
||||
|
|
@ -299,7 +298,7 @@ OnnxRuntimeTestSession::OnnxRuntimeTestSession(Ort::Env& env, std::random_device
|
|||
ov_options[key] = value;
|
||||
}
|
||||
} else {
|
||||
ORT_THROW("[ERROR] [OpenVINO] wrong key type entered. Choose from the following runtime key options that are available for OpenVINO. ['device_type', 'device_id', 'enable_vpu_fast_compile', 'num_of_threads', 'cache_dir', 'num_streams', 'enable_opencl_throttling|true'] \n");
|
||||
ORT_THROW("[ERROR] [OpenVINO] wrong key type entered. Choose from the following runtime key options that are available for OpenVINO. ['device_type', 'device_id', 'enable_npu_fast_compile', 'num_of_threads', 'cache_dir', 'num_streams', 'enable_opencl_throttling|true'] \n");
|
||||
}
|
||||
}
|
||||
session_options.AppendExecutionProvider("OpenVINO", ov_options);
|
||||
|
|
|
|||
|
|
@ -143,7 +143,7 @@ void L1NormalizationWithZeroNorm() {
|
|||
|
||||
vector<T> expected_output = {0.5f, 0.5f, 0.f, 0.f};
|
||||
test.AddOutput<T>("Y", input_dims, expected_output);
|
||||
test.Run();
|
||||
test.Run(OpTester::ExpectResult::kExpectSuccess, "", {kOpenVINOExecutionProvider});
|
||||
}
|
||||
|
||||
TEST(LpNormalizationTest, L1NormalizationWithZeroNorm) {
|
||||
|
|
@ -163,7 +163,7 @@ void L2NormalizationWithZeroNorm() {
|
|||
|
||||
vector<T> expected_output = {1.f, 0.f, 0.f, 0.f};
|
||||
test.AddOutput<T>("Y", input_dims, expected_output);
|
||||
test.Run();
|
||||
test.Run(OpTester::ExpectResult::kExpectSuccess, "", {kOpenVINOExecutionProvider});
|
||||
}
|
||||
|
||||
TEST(LpNormalizationTest, L2NormalizationWithZeroNorm) {
|
||||
|
|
|
|||
|
|
@ -762,7 +762,7 @@ TEST(RNNTest, RNN_invalid_sequence_lens) {
|
|||
test.AddOutput<float>("Y_h", Y_h_dims, Y_h_data);
|
||||
|
||||
// the CUDA RNN version allows the invalid sequence lengths, so disable testing on CUDA and TensorRT
|
||||
test.Run(OpTester::ExpectResult::kExpectFailure, error_msg, {kCudaExecutionProvider, kTensorrtExecutionProvider});
|
||||
test.Run(OpTester::ExpectResult::kExpectFailure, error_msg, {kCudaExecutionProvider, kTensorrtExecutionProvider, kOpenVINOExecutionProvider});
|
||||
};
|
||||
|
||||
// should batch batch_size to be valid
|
||||
|
|
@ -860,7 +860,7 @@ TEST(RNNTest, RNN_bidirectional_with_sequence_lens) {
|
|||
|
||||
test.AddOutput<float>("Y_h", Y_h_dims, Y_h_data);
|
||||
|
||||
test.Run(OpTester::ExpectResult::kExpectSuccess, "", {kCudaExecutionProvider, kTensorrtExecutionProvider});
|
||||
test.Run(OpTester::ExpectResult::kExpectSuccess, "", {kCudaExecutionProvider, kTensorrtExecutionProvider, kOpenVINOExecutionProvider});
|
||||
}
|
||||
|
||||
TEST(RNNTest, RNN_with_invalid_activation_load_failure) {
|
||||
|
|
|
|||
|
|
@ -66,7 +66,7 @@ TEST(CompressTest, Compress_3dims_has_extra_condition) {
|
|||
// has condition length = 3 > input_dim[axis] = 2
|
||||
test.AddInput<bool>("condition", {3}, {0, 1, 1});
|
||||
test.AddOutput<float>("output", {2, 1, 3}, {4.0f, 5.0f, 6.0f, 10.0f, 11.0f, 12.0f});
|
||||
test.Run();
|
||||
test.Run(OpTester::ExpectResult::kExpectSuccess, "", {kOpenVINOExecutionProvider});
|
||||
}
|
||||
|
||||
TEST(CompressTest, Compress_3dims_has_extra_input) {
|
||||
|
|
|
|||
|
|
@ -99,7 +99,7 @@ TEST(TensorOpTest, Unsqueeze_scalar_2) {
|
|||
test.AddInput<float>("input", {}, std::vector<float>{1.0f});
|
||||
test.AddInput<int64_t>("axes", {2}, std::vector<int64_t>{0, -1}, axes_is_initializer);
|
||||
test.AddOutput<float>("output", {1, 1}, std::vector<float>{1.0f});
|
||||
test.Run();
|
||||
test.Run(OpTester::ExpectResult::kExpectSuccess, "", {kOpenVINOExecutionProvider});
|
||||
};
|
||||
run_test(false);
|
||||
run_test(true);
|
||||
|
|
|
|||
|
|
@ -140,6 +140,9 @@ def create_backend_test(test_name=None):
|
|||
if backend.supports_device("OPENVINO_CPU_FP16"):
|
||||
current_failing_tests += apply_filters(filters, "current_failing_tests_OPENVINO_CPU_FP16")
|
||||
|
||||
if backend.supports_device("OPENVINO_NPU_FP16"):
|
||||
current_failing_tests += apply_filters(filters, "current_failing_tests_OPENVINO_NPU_FP16")
|
||||
|
||||
if backend.supports_device("OPENVINO"):
|
||||
current_failing_tests += apply_filters(filters, "current_failing_tests_OPENVINO_opset18")
|
||||
|
||||
|
|
|
|||
|
|
@ -521,6 +521,10 @@
|
|||
"test_scan_sum_cpu", // Disabled due to output mismatch with tolerance.
|
||||
"test_scan9_sum_cpu" // Disabled due to output mismatch with tolerance.
|
||||
],
|
||||
"current_failing_tests_OPENVINO_NPU_FP16": [
|
||||
"^test_prelu_broadcast",
|
||||
"test_loop11_cpu"
|
||||
],
|
||||
"current_failing_tests_OPENVINO_opset18": [
|
||||
// pending opset 18 support, RUNTIME_EXCEPTION : Encountered unknown exception in Initialize()
|
||||
"^test_center_crop_pad_crop_axes_chw",
|
||||
|
|
|
|||
|
|
@ -66,15 +66,13 @@ _check_python_version()
|
|||
|
||||
|
||||
def _openvino_verify_device_type(device_read):
|
||||
choices = ["CPU_FP32", "CPU_FP16", "GPU_FP32", "GPU_FP16", "VPUX_FP16", "VPUX_U8"]
|
||||
choices = ["CPU_FP32", "CPU_FP16", "GPU_FP32", "GPU_FP16"]
|
||||
|
||||
choices1 = [
|
||||
"CPU_FP32_NO_PARTITION",
|
||||
"CPU_FP16_NO_PARTITION",
|
||||
"GPU_FP32_NO_PARTITION",
|
||||
"GPU_FP16_NO_PARTITION",
|
||||
"VPUX_FP16_NO_PARTITION",
|
||||
"VPUX_U8_NO_PARTITION",
|
||||
]
|
||||
status_hetero = True
|
||||
res = False
|
||||
|
|
@ -89,7 +87,7 @@ def _openvino_verify_device_type(device_read):
|
|||
if len(comma_separated_devices) < 2:
|
||||
print("At least two devices required in Hetero/Multi/Auto Mode")
|
||||
status_hetero = False
|
||||
dev_options = ["CPU", "GPU", "VPUX"]
|
||||
dev_options = ["CPU", "GPU"]
|
||||
for dev in comma_separated_devices:
|
||||
if dev not in dev_options:
|
||||
status_hetero = False
|
||||
|
|
@ -100,7 +98,7 @@ def _openvino_verify_device_type(device_read):
|
|||
print("specify the keyword HETERO or MULTI or AUTO followed by the devices ")
|
||||
print("in the order of priority you want to build\n")
|
||||
print("The different hardware devices that can be added in HETERO or MULTI or AUTO")
|
||||
print("are ['CPU','GPU', 'VPUX'] \n")
|
||||
print("are ['CPU','GPU'] \n")
|
||||
print("An example of how to specify the hetero build type. Ex: HETERO:GPU,CPU \n")
|
||||
print("An example of how to specify the MULTI build type. Ex: MULTI:GPU,CPU \n")
|
||||
print("An example of how to specify the AUTO build type. Ex: AUTO:GPU,CPU \n")
|
||||
|
|
@ -1158,8 +1156,6 @@ def generate_build_tree(
|
|||
"-Donnxruntime_USE_OPENVINO_GPU_FP16=" + ("ON" if args.use_openvino == "GPU_FP16" else "OFF"),
|
||||
"-Donnxruntime_USE_OPENVINO_CPU_FP32=" + ("ON" if args.use_openvino == "CPU_FP32" else "OFF"),
|
||||
"-Donnxruntime_USE_OPENVINO_CPU_FP16=" + ("ON" if args.use_openvino == "CPU_FP16" else "OFF"),
|
||||
"-Donnxruntime_USE_OPENVINO_VPUX_FP16=" + ("ON" if args.use_openvino == "VPUX_FP16" else "OFF"),
|
||||
"-Donnxruntime_USE_OPENVINO_VPUX_U8=" + ("ON" if args.use_openvino == "VPUX_U8" else "OFF"),
|
||||
"-Donnxruntime_USE_OPENVINO_GPU_FP32_NP="
|
||||
+ ("ON" if args.use_openvino == "GPU_FP32_NO_PARTITION" else "OFF"),
|
||||
"-Donnxruntime_USE_OPENVINO_GPU_FP16_NP="
|
||||
|
|
@ -1168,9 +1164,6 @@ def generate_build_tree(
|
|||
+ ("ON" if args.use_openvino == "CPU_FP32_NO_PARTITION" else "OFF"),
|
||||
"-Donnxruntime_USE_OPENVINO_CPU_FP16_NP="
|
||||
+ ("ON" if args.use_openvino == "CPU_FP16_NO_PARTITION" else "OFF"),
|
||||
"-Donnxruntime_USE_OPENVINO_VPUX_FP16_NP="
|
||||
+ ("ON" if args.use_openvino == "VPUX_FP16_NP_PARTITION" else "OFF"),
|
||||
"-Donnxruntime_USE_OPENVINO_VPUX_U8_NP=" + ("ON" if args.use_openvino == "VPUX_U8_NP_PARTITION" else "OFF"),
|
||||
"-Donnxruntime_USE_OPENVINO_HETERO=" + ("ON" if args.use_openvino.startswith("HETERO") else "OFF"),
|
||||
"-Donnxruntime_USE_OPENVINO_DEVICE=" + (args.use_openvino),
|
||||
"-Donnxruntime_USE_OPENVINO_MULTI=" + ("ON" if args.use_openvino.startswith("MULTI") else "OFF"),
|
||||
|
|
|
|||
|
|
@ -552,6 +552,7 @@ def generate_files(line_list, args):
|
|||
files_list.append(
|
||||
"<file src=" + '"' + os.path.join(args.native_build_path, "onnxruntime.pdb") + runtimes + " />"
|
||||
)
|
||||
|
||||
else:
|
||||
files_list.append(
|
||||
"<file src="
|
||||
|
|
@ -706,25 +707,9 @@ def generate_files(line_list, args):
|
|||
)
|
||||
|
||||
if is_windows():
|
||||
if "2022" in openvino_path:
|
||||
dll_list_path = os.path.join(openvino_path, "runtime\\bin\\intel64\\Release\\")
|
||||
tbb_list_path = os.path.join(openvino_path, "runtime\\3rdparty\\tbb\\bin\\")
|
||||
else:
|
||||
dll_list_path = os.path.join(
|
||||
openvino_path, "deployment_tools\\inference_engine\\bin\\intel64\\Release\\"
|
||||
)
|
||||
tbb_list_path = os.path.join(openvino_path, "deployment_tools\\inference_engine\\external\\tbb\\bin\\")
|
||||
ngraph_list_path = os.path.join(openvino_path, "deployment_tools\\ngraph\\lib\\")
|
||||
for ngraph_element in os.listdir(ngraph_list_path):
|
||||
if ngraph_element.endswith("dll"):
|
||||
files_list.append(
|
||||
"<file src="
|
||||
+ '"'
|
||||
+ os.path.join(ngraph_list_path, ngraph_element)
|
||||
+ runtimes_target
|
||||
+ args.target_architecture
|
||||
+ '\\native" />'
|
||||
)
|
||||
dll_list_path = os.path.join(openvino_path, "runtime\\bin\\intel64\\Release\\")
|
||||
tbb_list_path = os.path.join(openvino_path, "runtime\\3rdparty\\tbb\\bin\\")
|
||||
|
||||
for dll_element in os.listdir(dll_list_path):
|
||||
if dll_element.endswith("dll"):
|
||||
files_list.append(
|
||||
|
|
@ -735,26 +720,7 @@ def generate_files(line_list, args):
|
|||
+ args.target_architecture
|
||||
+ '\\native" />'
|
||||
)
|
||||
# plugins.xml
|
||||
files_list.append(
|
||||
"<file src="
|
||||
+ '"'
|
||||
+ os.path.join(dll_list_path, "plugins.xml")
|
||||
+ runtimes_target
|
||||
+ args.target_architecture
|
||||
+ '\\native" />'
|
||||
)
|
||||
# usb-ma2x8x.mvcmd
|
||||
# OpenVINO 2022.3 doesn't have usb-ma2x8x.mvcmd
|
||||
if "2022.3" not in openvino_path:
|
||||
files_list.append(
|
||||
"<file src="
|
||||
+ '"'
|
||||
+ os.path.join(dll_list_path, "usb-ma2x8x.mvcmd")
|
||||
+ runtimes_target
|
||||
+ args.target_architecture
|
||||
+ '\\native" />'
|
||||
)
|
||||
|
||||
for tbb_element in os.listdir(tbb_list_path):
|
||||
if tbb_element.endswith("dll"):
|
||||
files_list.append(
|
||||
|
|
|
|||
Loading…
Reference in a new issue