Openvino ep ort 5.1 (#17042)

OpenVINO EP ORT 5.1 Branch
Changes for the new API to take in OpenVINO Provider Options
and compatibility with OV 2023.1


### Motivation and Context
The change is required for the new API to take in OpenVINO Provider
Options
and make it seamless.

---------

Signed-off-by: MaajidKhan <n.maajid.khan@intel.com>
Co-authored-by: saurabhintel0 <saurabh1.kale@intel.com>
Co-authored-by: MaajidKhan <n.maajid.khan@intel.com>
Co-authored-by: Suryaprakash Shanmugam <suryaprakash.shanmugam@intel.com>
Co-authored-by: Preetha Veeramalai <preetha.veeramalai@intel.com>
This commit is contained in:
sfatimar 2023-08-10 00:20:10 +05:30 committed by GitHub
parent 03c3e91b0d
commit 2c5d4dce77
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
27 changed files with 333 additions and 211 deletions

View file

@ -1236,9 +1236,12 @@ if (onnxruntime_USE_OPENVINO)
elseif ($ENV{INTEL_OPENVINO_DIR} MATCHES "2023.0")
set(OPENVINO_VERSION "2023.0")
add_definitions(-DOPENVINO_2023_0=1)
elseif ($ENV{INTEL_OPENVINO_DIR} MATCHES "2023.1")
set(OPENVINO_VERSION "2023.1")
add_definitions(-DOPENVINO_2023_1=1)
elseif ($ENV{INTEL_OPENVINO_DIR} MATCHES "openvino")
set(OPENVINO_VERSION "2023.0")
add_definitions(-DOPENVINO_2023_0=1)
set(OPENVINO_VERSION "2023.1")
add_definitions(-DOPENVINO_2023_1=1)
else()
message(FATAL_ERROR "Unsupported OpenVINO version: ${INTEL_OPENVINO_DIR}")
endif()

View file

@ -7,6 +7,7 @@ OpenVINO™ Execution Provider for ONNX Runtime accelerates inference across man
- Intel® CPUs
- Intel® integrated GPUs
- Intel® discrete GPUs
- Intel® integrated VPUs
Installation
------------
@ -15,12 +16,13 @@ Requirements
^^^^^^^^^^^^
- Ubuntu 18.04, 20.04, RHEL(CPU only) or Windows 10 - 64 bit
- Python 3.8, 3.9 or 3.10 for Linux and only Python3.10 for Windows
- Python 3.8 or 3.9 or 3.10 for Linux and only Python3.10 for Windows
This package supports:
- Intel® CPUs
- Intel® integrated GPUs
- Intel® discrete GPUs
- Intel® integrated VPUs
``pip3 install onnxruntime-openvino``
@ -34,7 +36,7 @@ For more details on build and installation please refer to `Build <https://onnxr
Usage
^^^^^
By default, Intel® CPU is used to run inference. However, you can change the default option to either Intel® integrated or discrete GPU.
By default, Intel® CPU is used to run inference. However, you can change the default option to either Intel® integrated or discrete GPU.
Invoke `the provider config device type argument <https://onnxruntime.ai/docs/execution-providers/OpenVINO-ExecutionProvider.html#summary-of-options>`_ to change the hardware on which inferencing is done.
For more API calls and environment variables, see `Usage <https://onnxruntime.ai/docs/execution-providers/OpenVINO-ExecutionProvider.html#configuration-options>`_.

View file

@ -7,9 +7,6 @@
#include <memory>
#include "core/providers/shared_library/provider_api.h"
#include <inference_engine.hpp>
#include "contexts.h"
#include "backend_manager.h"
#include "ibackend.h"
@ -36,11 +33,11 @@ BackendManager::BackendManager(const onnxruntime::Node& fused_node,
const logging::Logger& logger) {
auto prec_str = GetGlobalContext().precision_str;
if (prec_str == "FP32") {
subgraph_context_.precision = InferenceEngine::Precision::FP32;
subgraph_context_.precision = "FP32";
} else if (prec_str == "FP16") {
subgraph_context_.precision = InferenceEngine::Precision::FP16;
subgraph_context_.precision = "FP16";
} else if (prec_str == "U8") {
subgraph_context_.precision = InferenceEngine::Precision::U8;
subgraph_context_.precision = "U8";
} else {
throw std::string("Invalid OpenVINO Precision type: " + prec_str);
}
@ -78,19 +75,17 @@ BackendManager::BackendManager(const onnxruntime::Node& fused_node,
LOGS_DEFAULT(INFO) << "[OpenVINO-EP] Model has symbolic input dims";
if (GetGlobalContext().device_type.find("CPU") != std::string::npos ||
GetGlobalContext().device_type.find("GPU") != std::string::npos) {
if (GetGlobalContext().enable_dynamic_shapes) {
LOGS_DEFAULT(INFO) << "[OpenVINO-EP] Starting backend initialization. "
<< "Creating backend Dynamic Shapes";
try {
concrete_backend_ = BackendFactory::MakeBackend(*model_proto_,
GetGlobalContext(),
subgraph_context_);
} catch (std::string const& msg) {
throw msg;
}
LOGS_DEFAULT(INFO) << "[OpenVINO-EP] "
<< "Backend created for graph " << subgraph_context_.subgraph_name;
LOGS_DEFAULT(INFO) << "[OpenVINO-EP] Starting backend initialization. "
<< "Creating backend Dynamic Shapes";
try {
concrete_backend_ = BackendFactory::MakeBackend(*model_proto_,
GetGlobalContext(),
subgraph_context_);
} catch (std::string const& msg) {
throw msg;
}
LOGS_DEFAULT(INFO) << "[OpenVINO-EP] "
<< "Backend created for graph " << subgraph_context_.subgraph_name;
}
} else {
LOGS_DEFAULT(INFO) << "[OpenVINO-EP] Model has concrete input dims. Initializing backend for graph " << subgraph_context_.subgraph_name;
@ -257,7 +252,7 @@ void BackendManager::Compute(OrtKernelContext* context) {
}
#endif
bool use_dynamic_backend = true;
if (GetGlobalContext().enable_dynamic_shapes && subgraph_context_.has_dynamic_input_shape &&
if (subgraph_context_.has_dynamic_input_shape &&
(GetGlobalContext().device_type.find("CPU") != std::string::npos ||
GetGlobalContext().device_type.find("GPU") != std::string::npos)) {
concrete_backend_->Infer(context);

View file

@ -8,8 +8,8 @@
#include <fstream>
#include "ov_interface.h"
#include <ngraph/pass/convert_fp32_to_fp16.hpp>
#include <ngraph/pass/constant_folding.hpp>
#include "openvino/pass/convert_fp32_to_fp16.hpp"
#include "openvino/pass/constant_folding.hpp"
#include "core/providers/shared_library/provider_api.h"
#include "backend_utils.h"
@ -50,14 +50,14 @@ struct static_cast_int64 {
std::shared_ptr<OVNetwork>
CreateOVModel(const ONNX_NAMESPACE::ModelProto& model_proto, const GlobalContext& global_context,
const SubGraphContext& subgraph_context,
std::map<std::string, std::shared_ptr<ngraph::Node>>& const_outputs_map) {
std::map<std::string, std::shared_ptr<ov::Node>>& const_outputs_map) {
if (IsCILogEnabled()) {
std::cout << "CreateNgraphFunc" << std::endl;
}
const std::string model = model_proto.SerializeAsString();
try {
auto cnn_network = global_context.ie_core.ReadModel(model);
if ((subgraph_context.precision == InferenceEngine::Precision::FP16) &&
if ((subgraph_context.precision == "FP16") &&
(global_context.device_type.find("VPUX") == std::string::npos)) {
// FP16 transformations
ov::pass::ConvertFP32ToFP16 pass_obj;
@ -88,7 +88,7 @@ CreateOVModel(const ONNX_NAMESPACE::ModelProto& model_proto, const GlobalContext
size_t index = results.size() - 1;
for (auto it = results.rbegin(); it != results.rend(); ++it) {
if (auto const_node = std::dynamic_pointer_cast<ngraph::op::Constant>((*it)->input_value(0).get_node_shared_ptr())) {
if (auto const_node = std::dynamic_pointer_cast<ov::op::v0::Constant>((*it)->input_value(0).get_node_shared_ptr())) {
const_outputs_map[(*it)->get_friendly_name()] = const_node;
results.erase(results.begin() + index);
}
@ -96,12 +96,11 @@ CreateOVModel(const ONNX_NAMESPACE::ModelProto& model_proto, const GlobalContext
}
}
#ifndef NDEBUG
#if defined(OPENVINO_2022_3) || (OPENVINO_2023_0)
#if defined(OPENVINO_2022_3) || (OPENVINO_2023_0) || (OPENVINO_2023_1)
if (IsDebugEnabled()) {
std::string name = cnn_network->get_friendly_name();
ov::pass::Serialize serializer(name + ".xml", name + ".bin");
serializer.run_on_model(cnn_network);
ngraph::plot_graph(cnn_network, name + "_executable" + ".dot");
}
#endif
#endif
@ -111,31 +110,6 @@ CreateOVModel(const ONNX_NAMESPACE::ModelProto& model_proto, const GlobalContext
}
}
InferenceEngine::Precision ConvertPrecisionONNXToOpenVINO(const ONNX_NAMESPACE::TypeProto& onnx_type) {
ONNX_NAMESPACE::DataType type_string = ONNX_NAMESPACE::Utils::DataTypeUtils::ToType(onnx_type);
if (*type_string == "float" || *type_string == "tensor(float)") {
return InferenceEngine::Precision::FP32;
} else if (*type_string == "float16" || *type_string == "tensor(float16)") {
return InferenceEngine::Precision::FP16;
} else if (*type_string == "int32" || *type_string == "tensor(int32)") {
return InferenceEngine::Precision::I32;
} else if (*type_string == "int16" || *type_string == "tensor(int16)") {
return InferenceEngine::Precision::I16;
} else if (*type_string == "int8" || *type_string == "tensor(int8)") {
return InferenceEngine::Precision::I8;
} else if (*type_string == "uint16" || *type_string == "tensor(uint16)") {
return InferenceEngine::Precision::U16;
} else if (*type_string == "uint8" || *type_string == "tensor(uint8)") {
return InferenceEngine::Precision::U8;
} else if (*type_string == "bool" || *type_string == "tensor(bool)") {
return InferenceEngine::Precision::U8;
} else if (*type_string == "int64" || *type_string == "tensor(int64)") {
return InferenceEngine::Precision::I32;
} else {
throw std::string(log_tag + "Unsupported Data type");
}
}
Ort::UnownedValue
GetOutputTensor(Ort::KernelContext& context, size_t batch_size,
OVInferRequestPtr infer_request,
@ -166,7 +140,7 @@ Ort::UnownedValue
GetOutputTensor(Ort::KernelContext& context,
std::string output_name,
std::unordered_map<std::string, int> output_names,
std::shared_ptr<ngraph::Node> node) {
std::shared_ptr<ov::Node> node) {
// Find position of '/' in the output_name
int pos = output_name.find("/");
// Copy the substring from start to pos
@ -210,25 +184,25 @@ int GetFirstAvailableDevice(GlobalContext& global_context) {
return i;
}
void FillOutputsWithConstantData(std::shared_ptr<ngraph::Node> node, Ort::UnownedValue& out_tensor) {
void FillOutputsWithConstantData(std::shared_ptr<ov::Node> node, Ort::UnownedValue& out_tensor) {
switch (node->get_element_type()) {
case ngraph::element::Type_t::f32: {
case ov::element::Type_t::f32: {
FillOutputHelper<float>(out_tensor, node);
break;
}
case ngraph::element::Type_t::boolean: {
case ov::element::Type_t::boolean: {
FillOutputHelper<char>(out_tensor, node);
break;
}
case ngraph::element::Type_t::i32: {
case ov::element::Type_t::i32: {
FillOutputHelper<int32_t>(out_tensor, node);
break;
}
case ngraph::element::Type_t::i64: {
case ov::element::Type_t::i64: {
FillOutputHelper<int64_t>(out_tensor, node);
break;
}
case ngraph::element::Type_t::f16: {
case ov::element::Type_t::f16: {
FillOutputHelper<float>(out_tensor, node);
break;
}
@ -237,14 +211,22 @@ void FillOutputsWithConstantData(std::shared_ptr<ngraph::Node> node, Ort::Unowne
}
}
#if defined(_MSC_VER)
#pragma warning(disable : 4127)
#endif
template <typename T>
void FillOutputHelper(Ort::UnownedValue& out_tensor, std::shared_ptr<ngraph::Node> node) {
auto const_node = std::dynamic_pointer_cast<ngraph::op::Constant>(node);
void FillOutputHelper(Ort::UnownedValue& out_tensor, std::shared_ptr<ov::Node> node) {
auto const_node = std::dynamic_pointer_cast<ov::op::v0::Constant>(node);
auto res = const_node->cast_vector<T>();
T* tensor_data = out_tensor.GetTensorMutableData<T>();
std::copy(res.begin(), res.end(), tensor_data);
}
#if defined(_MSC_VER)
#pragma warning(default : 4127)
#endif
void FillInputBlob(OVTensorPtr inputBlob, size_t batch_slice_idx,
std::string input_name, Ort::KernelContext& context,
const SubGraphContext& subgraph_context) {

View file

@ -32,19 +32,16 @@ bool IsCILogEnabled();
int GetFirstAvailableDevice(GlobalContext& global_context);
void FillOutputsWithConstantData(std::shared_ptr<ngraph::Node> node, Ort::UnownedValue& out_tensor);
void FillOutputsWithConstantData(std::shared_ptr<ov::Node> node, Ort::UnownedValue& out_tensor);
template <typename T>
void FillOutputHelper(Ort::UnownedValue& out_tensor, std::shared_ptr<ngraph::Node> node);
void FillOutputHelper(Ort::UnownedValue& out_tensor, std::shared_ptr<ov::Node> node);
Ort::UnownedValue
GetOutputTensor(Ort::KernelContext& context,
std::string output_name,
std::unordered_map<std::string, int> output_names,
std::shared_ptr<ngraph::Node> node);
InferenceEngine::Precision
ConvertPrecisionONNXToOpenVINO(const ONNX_NAMESPACE::TypeProto& onnx_type);
std::shared_ptr<ov::Node> node);
Ort::UnownedValue
GetOutputTensor(Ort::KernelContext& context, size_t batch_size,
@ -61,7 +58,7 @@ void FillOutputBlob(OVTensorPtr outputBlob, Ort::UnownedValue& output_tensor,
std::shared_ptr<OVNetwork>
CreateOVModel(const ONNX_NAMESPACE::ModelProto& model_proto, const GlobalContext& global_context, const SubGraphContext& subgraph_context,
std::map<std::string, std::shared_ptr<ngraph::Node>>& const_outputs_map);
std::map<std::string, std::shared_ptr<ov::Node>>& const_outputs_map);
void printPerformanceCounts(const std::vector<OVProfilingInfo>& performanceMap,
std::ostream& stream, std::string deviceName);

View file

@ -9,7 +9,7 @@
#include "core/providers/shared_library/provider_api.h"
#include "../backend_utils.h"
#include <ngraph/pass/constant_folding.hpp>
// #include <ngraph/pass/constant_folding.hpp>
#include "basic_backend.h"
#include "../backend_manager.h"
@ -37,6 +37,9 @@ BasicBackend::BasicBackend(const ONNX_NAMESPACE::ModelProto& model_proto,
// Setting OpenCL queue throttling for GPU
EnableGPUThrottling(device_config);
// Enable streams; default=1 unless ovverriden by user config
EnableStreams();
#ifndef NDEBUG
if (IsDebugEnabled()) {
std::string file_name = subgraph_context.subgraph_name + "_static.onnx";
@ -45,6 +48,7 @@ BasicBackend::BasicBackend(const ONNX_NAMESPACE::ModelProto& model_proto,
}
#endif
try {
std::string dev_prec = global_context.device_type + "_" + global_context_.precision_str;
if (global_context.is_wholly_supported_graph) {
#if defined(IO_BUFFER_ENABLED)
if ((global_context.device_type.find("GPU") != std::string::npos) &&
@ -61,8 +65,8 @@ BasicBackend::BasicBackend(const ONNX_NAMESPACE::ModelProto& model_proto,
LOGS_DEFAULT(INFO) << log_tag << "Loaded model to the plugin";
}
#else
#if defined(OPENVINO_2023_0)
if (subgraph_context.precision != InferenceEngine::Precision::FP16 && global_context_.enable_dynamic_shapes == false) {
#if defined(OPENVINO_2023_0) || (OPENVINO_2023_1)
if (!subgraph_context_.has_dynamic_input_shape && dev_prec != "CPU_FP16") {
const std::string model = model_proto.SerializeAsString();
exe_network_ = global_context_.ie_core.LoadNetwork(model, hw_target, device_config, subgraph_context_.subgraph_name);
LOGS_DEFAULT(INFO) << log_tag << "Loaded model to the plugin";
@ -98,7 +102,7 @@ BasicBackend::BasicBackend(const ONNX_NAMESPACE::ModelProto& model_proto,
inferRequestsQueue_ = std::unique_ptr<InferRequestsQueue>(new InferRequestsQueue(exe_network_, nireq));
}
bool BasicBackend::ValidateSubgraph(std::map<std::string, std::shared_ptr<ngraph::Node>>& const_outputs_map) {
bool BasicBackend::ValidateSubgraph(std::map<std::string, std::shared_ptr<ov::Node>>& const_outputs_map) {
if (const_outputs_map.size() == subgraph_context_.output_names.size())
subgraph_context_.is_constant = true;
if (subgraph_context_.is_constant) {
@ -109,20 +113,23 @@ bool BasicBackend::ValidateSubgraph(std::map<std::string, std::shared_ptr<ngraph
}
void BasicBackend::PopulateConfigValue(ov::AnyMap& device_config) {
// Set inference precision if device_type != AUTO
// if (global_context_.device_type.find("GPU_FP16")!= std::string::npos){
// device_config.emplace(ov::hint::inference_precision(global_context_.precision_str));
// }
device_config = {};
// Set inference precision based on device precision for OV backend
if (global_context_.precision_str.find("FP16") != std::string::npos && global_context_.device_type == "GPU") {
device_config.emplace(ov::hint::inference_precision("f16"));
}
if (global_context_.precision_str.find("FP32") != std::string::npos) {
device_config.emplace(ov::hint::inference_precision("f32"));
}
#ifndef NDEBUG
if (openvino_ep::backend_utils::IsDebugEnabled()) {
device_config.emplace(ov::enable_profiling(true));
}
#endif
#if defined(OPENVINO_2023_0)
#if defined(OPENVINO_2023_0) || (OPENVINO_2023_1)
if (global_context_.device_type.find("VPUX") != std::string::npos) {
std::pair<std::string, ov::Any> device_property;
device_property = std::make_pair("VPUX_COMPILER_TYPE", "MLIR");
device_property = std::make_pair("VPU_COMPILER_TYPE", "MLIR");
device_config.emplace(ov::device::properties("VPUX", device_property));
}
#endif
@ -147,10 +154,17 @@ void BasicBackend::EnableCaching() {
void BasicBackend::EnableGPUThrottling(ov::AnyMap& device_config) {
if (global_context_.enable_opencl_throttling == true && global_context_.device_type.find("GPU") != std::string::npos) {
LOGS_DEFAULT(INFO) << log_tag << "Enabled OpenCL queue throttling for GPU device";
device_config[GPU_CONFIG_KEY(PLUGIN_THROTTLE)] = "1";
std::pair<std::string, ov::Any> device_property;
device_property = std::make_pair("PLUGIN_THROTTLE", "1");
device_config.emplace(ov::device::properties("GPU_CONFIG_KEY", device_property));
// device_config[GPU_CONFIG_KEY(PLUGIN_THROTTLE)] = "1";
}
}
void BasicBackend::EnableStreams() {
global_context_.ie_core.SetStreams(global_context_.device_type, global_context_.num_streams);
}
// Starts an asynchronous inference request for data in slice indexed by batch_slice_idx on
// an Infer Request indexed by infer_req_idx
void BasicBackend::StartAsyncInference(Ort::KernelContext& context, OVInferRequestPtr infer_request) {
@ -177,7 +191,6 @@ void BasicBackend::StartAsyncInference(Ort::KernelContext& context, OVInferReque
}
size_t batch_slice_idx = 0;
if (subgraph_context_.has_dynamic_input_shape &&
global_context_.enable_dynamic_shapes == true &&
(global_context_.device_type.find("CPU") != std::string::npos ||
global_context_.device_type.find("GPU") != std::string::npos)) {
auto tensor = context.GetInput(subgraph_context_.input_names.at(input_name));

View file

@ -31,10 +31,11 @@ class BasicBackend : public IBackend {
private:
bool ImportBlob(std::string hw_target, bool vpu_status);
void PopulateCompiledDirectory(std::string, std::string&, std::string&, bool&);
bool ValidateSubgraph(std::map<std::string, std::shared_ptr<ngraph::Node>>& const_outputs_map);
bool ValidateSubgraph(std::map<std::string, std::shared_ptr<ov::Node>>& const_outputs_map);
void PopulateConfigValue(ov::AnyMap& device_config);
void EnableCaching();
void EnableGPUThrottling(ov::AnyMap& device_config);
void EnableStreams();
void StartAsyncInference(Ort::KernelContext& context, std::shared_ptr<OVInferRequest> infer_request);
#ifdef IO_BUFFER_ENABLED
@ -48,7 +49,7 @@ class BasicBackend : public IBackend {
mutable std::mutex compute_lock_;
std::shared_ptr<OVNetwork> ie_cnn_network_;
OVExeNetwork exe_network_;
std::map<std::string, std::shared_ptr<ngraph::Node>> const_outputs_map_;
std::map<std::string, std::shared_ptr<ov::Node>> const_outputs_map_;
std::unique_ptr<InferRequestsQueue> inferRequestsQueue_;
#if defined IO_BUFFER_ENABLED
OVRemoteContextPtr remote_context_;

View file

@ -20,6 +20,7 @@ struct GlobalContext {
std::string precision_str;
std::string device_id;
std::string cache_dir;
int num_streams;
std::vector<bool> deviceAvailableList = {true, true, true, true, true, true, true, true};
std::vector<std::string> deviceTags = {"0", "1", "2", "3", "4", "5", "6", "7"};
std::string onnx_model_name;
@ -40,7 +41,7 @@ struct SubGraphContext {
std::vector<int> input_indexes;
std::unordered_map<std::string, int> input_names;
std::unordered_map<std::string, int> output_names;
OVPrecision precision;
std::string precision;
};
} // namespace openvino_ep

View file

@ -19,6 +19,7 @@ OpenVINOExecutionProvider::OpenVINOExecutionProvider(const OpenVINOExecutionProv
openvino_ep::BackendManager::GetGlobalContext().precision_str = info.precision_;
openvino_ep::BackendManager::GetGlobalContext().enable_vpu_fast_compile = info.enable_vpu_fast_compile_;
openvino_ep::BackendManager::GetGlobalContext().cache_dir = info.cache_dir_;
openvino_ep::BackendManager::GetGlobalContext().num_streams = info.num_streams_;
openvino_ep::BackendManager::GetGlobalContext().context = info.context_;
openvino_ep::BackendManager::GetGlobalContext().enable_opencl_throttling = info.enable_opencl_throttling_;
openvino_ep::BackendManager::GetGlobalContext().enable_dynamic_shapes = info.enable_dynamic_shapes_;
@ -130,6 +131,10 @@ OpenVINOExecutionProvider::GetCapability(const GraphViewer& graph_viewer,
openvino_ep::GetCapability obj(graph_viewer,
openvino_ep::BackendManager::GetGlobalContext().device_type, "V_2023_0");
result = obj.Execute();
#elif defined(OPENVINO_2023_1)
openvino_ep::GetCapability obj(graph_viewer,
openvino_ep::BackendManager::GetGlobalContext().device_type, "V_2023_1");
result = obj.Execute();
#endif
return result;

View file

@ -57,15 +57,16 @@ struct OpenVINOExecutionProviderInfo {
std::string device_id_;
size_t num_of_threads_;
std::string cache_dir_;
int num_streams_;
void* context_;
bool enable_opencl_throttling_;
bool enable_dynamic_shapes_;
explicit OpenVINOExecutionProviderInfo(std::string dev_type, bool enable_vpu_fast_compile, std::string dev_id,
size_t num_of_threads, std::string cache_dir,
size_t num_of_threads, std::string cache_dir, int num_streams,
void* context, bool enable_opencl_throttling,
bool enable_dynamic_shapes)
: enable_vpu_fast_compile_(enable_vpu_fast_compile), device_id_(dev_id), num_of_threads_(num_of_threads), cache_dir_(cache_dir), context_(context), enable_opencl_throttling_(enable_opencl_throttling), enable_dynamic_shapes_(enable_dynamic_shapes) {
: enable_vpu_fast_compile_(enable_vpu_fast_compile), device_id_(dev_id), num_of_threads_(num_of_threads), cache_dir_(cache_dir), num_streams_(num_streams), context_(context), enable_opencl_throttling_(enable_opencl_throttling), enable_dynamic_shapes_(enable_dynamic_shapes) {
if (dev_type == "") {
LOGS_DEFAULT(INFO) << "[OpenVINO-EP]"
<< "No runtime device selection option provided.";
@ -149,7 +150,7 @@ struct OpenVINOExecutionProviderInfo {
<< "Choosing Device: " << device_type_ << " , Precision: " << precision_;
}
OpenVINOExecutionProviderInfo() {
OpenVINOExecutionProviderInfo("", false, "", 0, "", NULL, false, false);
OpenVINOExecutionProviderInfo("", false, "", 0, "", 1, NULL, false, false);
}
};

View file

@ -3,16 +3,16 @@
#include "core/providers/shared_library/provider_api.h"
#include "core/providers/openvino/openvino_provider_factory.h"
#include "openvino_execution_provider.h"
#include "openvino_provider_factory_creator.h"
#include "core/providers/openvino/openvino_execution_provider.h"
#include "core/providers/openvino/openvino_provider_factory_creator.h"
namespace onnxruntime {
struct OpenVINOProviderFactory : IExecutionProviderFactory {
OpenVINOProviderFactory(const char* device_type, bool enable_vpu_fast_compile,
const char* device_id, size_t num_of_threads,
const char* cache_dir, void* context,
const char* cache_dir, int num_streams, void* context,
bool enable_opencl_throttling, bool enable_dynamic_shapes)
: enable_vpu_fast_compile_(enable_vpu_fast_compile), num_of_threads_(num_of_threads), context_(context), enable_opencl_throttling_(enable_opencl_throttling), enable_dynamic_shapes_(enable_dynamic_shapes) {
: enable_vpu_fast_compile_(enable_vpu_fast_compile), num_of_threads_(num_of_threads), num_streams_(num_streams), context_(context), enable_opencl_throttling_(enable_opencl_throttling), enable_dynamic_shapes_(enable_dynamic_shapes) {
device_type_ = (device_type == nullptr) ? "" : device_type;
device_id_ = (device_id == nullptr) ? "" : device_id;
cache_dir_ = (cache_dir == nullptr) ? "" : cache_dir;
@ -28,6 +28,7 @@ struct OpenVINOProviderFactory : IExecutionProviderFactory {
std::string device_id_;
size_t num_of_threads_;
std::string cache_dir_;
int num_streams_;
void* context_;
bool enable_opencl_throttling_;
bool enable_dynamic_shapes_;
@ -35,20 +36,11 @@ struct OpenVINOProviderFactory : IExecutionProviderFactory {
std::unique_ptr<IExecutionProvider> OpenVINOProviderFactory::CreateProvider() {
OpenVINOExecutionProviderInfo info(device_type_, enable_vpu_fast_compile_, device_id_, num_of_threads_,
cache_dir_, context_, enable_opencl_throttling_,
cache_dir_, num_streams_, context_, enable_opencl_throttling_,
enable_dynamic_shapes_);
return std::make_unique<OpenVINOExecutionProvider>(info);
}
std::shared_ptr<IExecutionProviderFactory> CreateExecutionProviderFactory_OpenVINO(
const char* device_type, bool enable_vpu_fast_compile, const char* device_id, size_t num_of_threads,
const char* cache_dir, void* context, bool enable_opencl_throttling,
bool enable_dynamic_shapes) {
return std::make_shared<onnxruntime::OpenVINOProviderFactory>(device_type, enable_vpu_fast_compile,
device_id, num_of_threads, cache_dir, context, enable_opencl_throttling,
enable_dynamic_shapes);
}
} // namespace onnxruntime
namespace onnxruntime {
@ -63,12 +55,81 @@ struct OpenVINO_Provider : Provider {
void* GetInfo() override { return &g_info; }
std::shared_ptr<IExecutionProviderFactory> CreateExecutionProviderFactory(const void* void_params) override {
auto& params = *reinterpret_cast<const OrtOpenVINOProviderOptions*>(void_params);
return std::make_shared<OpenVINOProviderFactory>(params.device_type, params.enable_vpu_fast_compile,
params.device_id, params.num_of_threads,
params.cache_dir,
params.context, params.enable_opencl_throttling,
params.enable_dynamic_shapes);
auto& provider_options_map = *reinterpret_cast<const ProviderOptions*>(void_params);
const char* device_type = ""; // [device_type]: Overrides the accelerator hardware type and precision
// with these values at runtime.
bool enable_vpu_fast_compile = false; // [enable_vpu_fast_compile]: Fast-compile may be optionally enabled to
// speeds up the model's compilation to VPU device specific format.
const char* device_id = ""; // [device_id]: Selects a particular hardware device for inference.
size_t num_of_threads = 8; // [num_of_threads]: Overrides the accelerator default value of number of
// threads with this value at runtime.
const char* cache_dir = ""; // [cache_dir]: specify the path to
// dump and load the blobs for the model caching/kernel caching (GPU)
// feature. If blob files are already present, it will be directly loaded.
int num_streams = 1; // [num_streams]: Option that specifies the number of parallel inference
// requests to be processed on a given `device_type`. Overrides the
// accelerator default value of number of streams with this value at runtime.
bool enable_opencl_throttling = false; // [enable_opencl_throttling]: Enables OpenCL queue throttling for GPU
// device (Reduces CPU Utilization when using GPU)
bool enable_dynamic_shapes = false; // [enable_dynamic_shapes]: Enables Dynamic Shapes feature for CPU device)
void* context = nullptr;
if (provider_options_map.find("device_type") != provider_options_map.end()) {
device_type = provider_options_map.at("device_type").c_str();
}
if (provider_options_map.find("device_id") != provider_options_map.end()) {
device_id = provider_options_map.at("device_id").c_str();
}
if (provider_options_map.find("cache_dir") != provider_options_map.end()) {
cache_dir = provider_options_map.at("cache_dir").c_str();
}
if (provider_options_map.find("context") != provider_options_map.end()) {
context = (void*)provider_options_map.at("context").c_str();
}
if (provider_options_map.find("num_of_threads") != provider_options_map.end()) {
num_of_threads = std::stoi(provider_options_map.at("num_of_threads"));
}
if (provider_options_map.find("num_streams") != provider_options_map.end()) {
num_streams = std::stoi(provider_options_map.at("num_streams"));
}
std::string bool_flag = "";
if (provider_options_map.find("enable_vpu_fast_compile") != provider_options_map.end()) {
bool_flag = provider_options_map.at("enable_vpu_fast_compile");
if (bool_flag == "true" || bool_flag == "True")
enable_vpu_fast_compile = true;
else if (bool_flag == "false" || bool_flag == "False")
enable_vpu_fast_compile = false;
bool_flag = "";
}
if (provider_options_map.find("enable_opencl_throttling") != provider_options_map.end()) {
bool_flag = provider_options_map.at("enable_opencl_throttling");
if (bool_flag == "true" || bool_flag == "True")
enable_opencl_throttling = true;
else if (bool_flag == "false" || bool_flag == "False")
enable_opencl_throttling = false;
bool_flag = "";
}
if (provider_options_map.find("enable_dynamic_shapes") != provider_options_map.end()) {
bool_flag = provider_options_map.at("enable_dynamic_shapes");
if (bool_flag == "true" || bool_flag == "True")
enable_dynamic_shapes = true;
else if (bool_flag == "false" || bool_flag == "False")
enable_dynamic_shapes = false;
}
return std::make_shared<OpenVINOProviderFactory>(device_type,
enable_vpu_fast_compile,
device_id,
num_of_threads,
cache_dir,
num_streams,
context,
enable_opencl_throttling,
enable_dynamic_shapes);
}
void Initialize() override {

View file

@ -6,12 +6,14 @@
#include <memory>
#include "core/providers/providers.h"
#include "core/framework/provider_options.h"
struct OrtOpenVINOProviderOptions;
namespace onnxruntime {
// defined in provider_bridge_ort.cc
struct OpenVINOProviderFactoryCreator {
static std::shared_ptr<IExecutionProviderFactory> Create(const ProviderOptions* provider_options_map);
static std::shared_ptr<IExecutionProviderFactory> Create(const OrtOpenVINOProviderOptions* provider_options);
};
} // namespace onnxruntime

View file

@ -42,7 +42,7 @@ OVExeNetwork OVCore::LoadNetwork(std::shared_ptr<OVNetwork>& ie_cnn_network, std
}
}
#if defined(OPENVINO_2023_0)
#if defined(OPENVINO_2023_0) || (OPENVINO_2023_1)
OVExeNetwork OVCore::LoadNetwork(const std::string& model, std::string& hw_target, ov::AnyMap& device_config, std::string name) {
ov::CompiledModel obj;
try {
@ -75,8 +75,12 @@ OVExeNetwork OVCore::LoadNetwork(std::shared_ptr<OVNetwork>& model, OVRemoteCont
#endif
std::vector<std::string> OVCore::GetAvailableDevices() {
auto obj = oe.get_available_devices();
return obj;
auto available_devices = oe.get_available_devices();
return available_devices;
}
void OVCore::SetStreams(const std::string& device_type, int num_streams) {
oe.set_property(device_type, {ov::num_streams(num_streams)});
}
OVInferRequest OVExeNetwork::CreateInferRequest() {

View file

@ -5,11 +5,12 @@
#include <vector>
#include <inference_engine.hpp>
#if defined(OPENVINO_2022_1) || (OPENVINO_2022_2) || (OPENVINO_2022_3) || (OPENVINO_2023_0)
#if defined(OPENVINO_2022_1) || (OPENVINO_2022_2) || (OPENVINO_2022_3) || (OPENVINO_2023_0) || (OPENVINO_2023_1)
#define OV_API_20
#include "openvino/openvino.hpp"
#include "openvino/pass/convert_fp32_to_fp16.hpp"
#else
#include <inference_engine.hpp>
#endif
#ifdef IO_BUFFER_ENABLED
@ -26,10 +27,8 @@ class OVCore;
class OVInferRequest;
class OVExeNetwork;
typedef InferenceEngine::Precision OVPrecision;
typedef ov::Tensor OVTensor;
typedef ov::ProfilingInfo OVProfilingInfo;
typedef ov::AnyMap OVConfig;
typedef ov::Model OVNetwork;
typedef std::shared_ptr<OVInferRequest> OVInferRequestPtr;
typedef std::shared_ptr<OVTensor> OVTensorPtr;
@ -45,7 +44,7 @@ class OVCore {
public:
std::shared_ptr<OVNetwork> ReadModel(const std::string& model_stream) const;
OVExeNetwork LoadNetwork(std::shared_ptr<OVNetwork>& ie_cnn_network, std::string& hw_target, ov::AnyMap& device_config, std::string name);
#if defined(OPENVINO_2023_0)
#if defined(OPENVINO_2023_0) || (OPENVINO_2023_1)
OVExeNetwork LoadNetwork(const std::string& model_stream, std::string& hw_target, ov::AnyMap& device_config, std::string name);
#endif
void SetCache(std::string cache_dir_path);
@ -56,6 +55,7 @@ class OVCore {
ov::Core& Get() {
return oe;
}
void SetStreams(const std::string& device_type, int num_streams);
};
class OVExeNetwork {

View file

@ -33,8 +33,10 @@ GetCapability::GetCapability(const GraphViewer& graph_viewer_param, std::string
data_ops_ = new DataOps(graph_viewer_, V_2022_3, device_type_);
} else if (version_param == "V_2023_0") {
data_ops_ = new DataOps(graph_viewer_, V_2023_0, device_type_);
} else if (version_param == "V_2023_1") {
data_ops_ = new DataOps(graph_viewer_, V_2023_1, device_type_);
} else {
data_ops_ = new DataOps(graph_viewer_, V_2023_0, device_type_);
data_ops_ = new DataOps(graph_viewer_, V_2023_1, device_type_);
}
}
@ -46,6 +48,11 @@ std::vector<std::unique_ptr<ComputeCapability>> GetCapability::Execute() {
return result;
}
// Check if it is a subgraph
if (graph_viewer_.IsSubgraph() && graph_viewer_.Name() == "tf2onnx") {
return result;
}
// This is a list of initializers that nGraph considers as constants. Example weights, reshape shape etc.
std::unordered_set<std::string> ng_required_initializers;

View file

@ -17,8 +17,8 @@
#pragma GCC diagnostic push
#pragma GCC diagnostic ignored "-Wunused-parameter"
#endif
#include <ngraph/ngraph.hpp>
#include <ngraph/frontend/onnx_import/onnx.hpp>
// #include <ngraph/ngraph.hpp>
// #include <ngraph/frontend/onnx_import/onnx.hpp>
#if defined(_MSC_VER)
#pragma warning(default : 4244 4245)
#elif __GNUC__
@ -36,6 +36,7 @@ std::set<std::string> ops_supported_only_in_model = {
"ConstantOfShape",
"DequantizeLinear",
"Dropout",
"Einsum",
"Exp",
"Expand",
"EyeLike",
@ -127,6 +128,7 @@ std::vector<SupportedOp> supported_op_mode = {
{"Dropout", V_2023_0, {"VPUX"}},
{"Elu", V_2020_4, {"CPU", "GPU"}},
{"Elu", V_2023_0, {"VPUX"}},
// {"Einsum", V_2023_0, {"CPU", "GPU"}},
{"Equal", V_2020_4, {"CPU", "GPU"}},
{"Equal", V_2023_0, {"VPUX"}}, // Added for whisper decoder model.
{"Erf", V_2020_4, {"CPU", "GPU"}},
@ -155,6 +157,7 @@ std::vector<SupportedOp> supported_op_mode = {
{"GreaterOrEqual", V_2022_1, {"CPU", "GPU"}},
{"GreaterOrEqual", V_2023_0, {"VPUX"}},
{"GridSample", V_2022_3, {"CPU"}},
{"GridSample", V_2023_0, {"GPU"}},
{"Identity", V_2020_4, {"CPU", "GPU"}},
{"Identity", V_2023_0, {"VPUX"}}, // NoOP
{"If", V_2022_3, {"CPU", "GPU"}},
@ -196,6 +199,7 @@ std::vector<SupportedOp> supported_op_mode = {
{"Neg", V_2023_0, {"VPUX"}},
{"NonMaxSuppression", V_2021_1, {"CPU", "GPU"}},
{"NonZero", V_2021_1, {"CPU"}},
{"NonZero", V_2023_0, {"GPU"}},
{"Not", V_2021_1, {"CPU", "GPU"}},
{"Not", V_2020_4, {"CPU", "GPU"}},
{"OneHot", V_2020_4, {"CPU", "GPU"}},
@ -210,6 +214,7 @@ std::vector<SupportedOp> supported_op_mode = {
{"QuantizeLinear", V_2021_4, {"CPU", "GPU"}},
{"QuantizeLinear", V_2023_0, {"VPUX"}},
{"RandomNormalLike", V_2023_0, {"CPU", "GPU"}},
{"RandomNormal", V_2023_0, {"CPU", "GPU"}},
{"Range", V_2022_1, {"CPU", "GPU"}},
{"Range", V_2023_0, {"VPUX"}},
{"Reciprocal", V_2020_4, {"CPU", "GPU"}},
@ -341,6 +346,7 @@ void DataOps::populate_op_mode_supported() {
no_dimension_supported_.push_back({"Div", V_2020_4, {"All"}});
no_dimension_supported_.push_back({"DequantizeLinear", V_2021_4, {"All"}});
no_dimension_supported_.push_back({"Equal", V_2022_1, {"CPU"}});
no_dimension_supported_.push_back({"Equal", V_2023_0, {"GPU"}});
no_dimension_supported_.push_back({"Floor", V_2020_4, {"All"}});
no_dimension_supported_.push_back({"Gather", V_2020_4, {"All"}});
no_dimension_supported_.push_back({"Greater", V_2023_0, {"VPUX"}});
@ -356,6 +362,7 @@ void DataOps::populate_op_mode_supported() {
no_dimension_supported_.push_back({"ReduceProd", V_2022_1, {"CPU", "GPU"}});
no_dimension_supported_.push_back({"Reshape", V_2022_1, {"All"}});
no_dimension_supported_.push_back({"Shape", V_2022_1, {"GPU"}});
no_dimension_supported_.push_back({"Shape", V_2023_0, {"CPU"}});
no_dimension_supported_.push_back({"Squeeze", V_2020_4, {"All"}});
no_dimension_supported_.push_back({"Sub", V_2020_4, {"All"}});
no_dimension_supported_.push_back({"Unsqueeze", V_2020_4, {"All"}});
@ -1022,8 +1029,10 @@ bool DataOps::node_is_supported(const std::map<std::string, std::set<std::string
// Zero dimension check
for (const auto& dim : shape->dim()) {
if (utils::HasDimValue(dim) && dim.dim_value() == 0) {
if ((device_id_.find("GPU") != std::string::npos) && ((optype == "Expand") ||
(optype == "Slice") || (optype == "Concat") || (optype == "Shape"))) {
if (((device_id_.find("CPU") != std::string::npos) || (device_id_.find("GPU") != std::string::npos)) &&
((optype == "Expand") || (optype == "Equal") ||
(optype == "Slice") || (optype == "Concat") ||
(optype == "Shape"))) {
return;
}
has_unsupported_dimension = true;

View file

@ -18,7 +18,8 @@ enum versionNum {
V_2022_1,
V_2022_2,
V_2022_3,
V_2023_0
V_2023_0,
V_2023_1,
};
using VersionNum = enum versionNum;

View file

@ -9,8 +9,15 @@
#pragma GCC diagnostic push
#pragma GCC diagnostic ignored "-Wunused-parameter"
#endif
#include <ngraph/ngraph.hpp>
#include "openvino/core/deprecated.hpp"
#define IN_OV_COMPONENT
#define NGRAPH_LEGACY_HEADER_INCLUDED
#include <ngraph/frontend/onnx_import/onnx.hpp>
#undef NGRAPH_LEGACY_HEADER_INCLUDED
#undef IN_OV_COMPONENT
#if defined(_MSC_VER)
#pragma warning(default : 4244 4245)
#elif __GNUC__
@ -40,6 +47,7 @@ bool IsOpSupportedOnlyInModel(std::string name) {
"Concat",
"ConstantOfShape",
"Dropout",
"Einsum",
"Expand",
"EyeLike",
"Exp",
@ -88,6 +96,7 @@ int GetOnnxOpSet(const GraphViewer& graph_viewer) {
std::map<std::string, std::set<std::string>> GetNgSupportedOps(const int onnx_opset) {
std::map<std::string, std::set<std::string>> ng_supported_ops;
OPENVINO_SUPPRESS_DEPRECATED_START
ng_supported_ops.emplace(kOnnxDomain, ngraph::onnx_import::get_supported_operators(onnx_opset, kOnnxDomain));
const std::set<std::string> ng_disabled_ops = {"LSTM"}; // Place-holder for ops not supported.
@ -95,7 +104,7 @@ std::map<std::string, std::set<std::string>> GetNgSupportedOps(const int onnx_op
for (const auto& disabled_op : ng_disabled_ops) {
ng_supported_ops.at(kOnnxDomain).erase(disabled_op);
}
OPENVINO_SUPPRESS_DEPRECATED_END
return ng_supported_ops;
}

View file

@ -1426,8 +1426,44 @@ std::shared_ptr<IExecutionProviderFactory> MIGraphXProviderFactoryCreator::Creat
return s_library_migraphx.Get().CreateExecutionProviderFactory(provider_options);
}
// Adapter to convert the legacy OrtOpenVINOProviderOptions to ProviderOptions
ProviderOptions OrtOpenVINOProviderOptionsToOrtOpenVINOProviderOptionsV2(const OrtOpenVINOProviderOptions* legacy_ov_options) {
ProviderOptions ov_options_converted_map;
if (legacy_ov_options->device_type != nullptr)
ov_options_converted_map["device_type"] = legacy_ov_options->device_type;
ov_options_converted_map["enable_vpu_fast_compile"] = legacy_ov_options->enable_vpu_fast_compile;
if (legacy_ov_options->device_id != nullptr)
ov_options_converted_map["device_id"] = legacy_ov_options->device_id;
ov_options_converted_map["num_of_threads"] = std::to_string(legacy_ov_options->num_of_threads);
if (legacy_ov_options->cache_dir != nullptr)
ov_options_converted_map["cache_dir"] = legacy_ov_options->cache_dir;
std::stringstream context_string;
if (legacy_ov_options->context != nullptr)
context_string << legacy_ov_options->context;
ov_options_converted_map["context"] = context_string.str();
ov_options_converted_map["enable_opencl_throttling"] = legacy_ov_options->enable_opencl_throttling;
ov_options_converted_map["enable_dynamic_shapes"] = legacy_ov_options->enable_dynamic_shapes;
// Add new provider option below
ov_options_converted_map["num_streams"] = "1";
return ov_options_converted_map;
}
std::shared_ptr<IExecutionProviderFactory> OpenVINOProviderFactoryCreator::Create(const OrtOpenVINOProviderOptions* provider_options) {
return s_library_openvino.Get().CreateExecutionProviderFactory(provider_options);
ProviderOptions ov_options_converted_map = onnxruntime::OrtOpenVINOProviderOptionsToOrtOpenVINOProviderOptionsV2(provider_options);
return s_library_openvino.Get().CreateExecutionProviderFactory(&ov_options_converted_map);
}
std::shared_ptr<IExecutionProviderFactory> OpenVINOProviderFactoryCreator::Create(const ProviderOptions* provider_options_map) {
// std::cout << provider_options_map.at("num_streams") << std::endl;
return s_library_openvino.Get().CreateExecutionProviderFactory(provider_options_map);
}
std::shared_ptr<IExecutionProviderFactory> DnnlProviderFactoryCreator::Create(const OrtDnnlProviderOptions* dnnl_options) {

View file

@ -10,6 +10,7 @@
#include "core/session/abi_session_options_impl.h"
#include "core/session/onnxruntime_c_api.h"
#include "core/session/ort_apis.h"
#include "core/providers/openvino/openvino_provider_factory_creator.h"
using namespace onnxruntime;
@ -71,6 +72,12 @@ ORT_API_STATUS_IMPL(OrtApis::SessionOptionsAppendExecutionProvider,
options->provider_factories.push_back(QNNProviderFactoryCreator::Create(provider_options, &(options->value)));
#else
status = create_not_supported_status();
#endif
} else if (strcmp(provider_name, "OpenVINO") == 0) {
#if defined(USE_OPENVINO)
options->provider_factories.push_back(OpenVINOProviderFactoryCreator::Create(&provider_options));
#else
status = create_not_supported_status();
#endif
} else if (strcmp(provider_name, "SNPE") == 0) {
#if defined(USE_SNPE)
@ -115,7 +122,7 @@ ORT_API_STATUS_IMPL(OrtApis::SessionOptionsAppendExecutionProvider,
} else {
ORT_UNUSED_PARAMETER(options);
status = OrtApis::CreateStatus(ORT_INVALID_ARGUMENT,
"Unknown provider name. Currently supported values are 'SNPE', 'XNNPACK', and 'AZURE'");
"Unknown provider name. Currently supported values are 'OPENVINO', 'SNPE', 'XNNPACK', 'QNN', 'WEBNN' and 'AZURE'");
}
return status;

View file

@ -39,8 +39,8 @@ void addGlobalSchemaFunctions(pybind11::module& m) {
#endif
#ifdef USE_OPENVINO
[]() {
OrtOpenVINOProviderOptions provider_options;
return onnxruntime::OpenVINOProviderFactoryCreator::Create(&provider_options);
ProviderOptions provider_options_map;
return onnxruntime::OpenVINOProviderFactoryCreator::Create(&provider_options_map);
}(),
#endif
#ifdef USE_TENSORRT

View file

@ -780,56 +780,53 @@ std::unique_ptr<IExecutionProvider> CreateExecutionProviderInstance(
#endif
} else if (type == kOpenVINOExecutionProvider) {
#ifdef USE_OPENVINO
OrtOpenVINOProviderOptions params;
params.device_type = openvino_device_type.c_str();
std::string cache_dir;
ProviderOptions OV_provider_options_map;
auto it = provider_options_map.find(type);
if (it != provider_options_map.end()) {
for (auto option : it->second) {
if (option.first == "device_type") {
openvino_device_type = option.second;
params.device_type = openvino_device_type.c_str();
OV_provider_options_map[option.first] = option.second;
continue;
} else if (option.first == "enable_vpu_fast_compile") {
if (option.second == "True") {
params.enable_vpu_fast_compile = true;
} else if (option.second == "False") {
params.enable_vpu_fast_compile = false;
} else {
if (!(option.second == "True" || option.second == "true" ||
option.second == "False" || option.second == "false")) {
ORT_THROW("Invalid value passed for enable_vpu_fast_compile: ", option.second);
}
OV_provider_options_map[option.first] = option.second;
} else if (option.first == "enable_opencl_throttling") {
if (option.second == "True") {
params.enable_opencl_throttling = true;
} else if (option.second == "False") {
params.enable_opencl_throttling = false;
} else {
if (!(option.second == "True" || option.second == "true" ||
option.second == "False" || option.second == "false")) {
ORT_THROW("Invalid value passed for enable_opencl_throttling: ", option.second);
}
OV_provider_options_map[option.first] = option.second;
} else if (option.first == "enable_dynamic_shapes") {
if (option.second == "True") {
params.enable_dynamic_shapes = true;
} else if (option.second == "False") {
params.enable_dynamic_shapes = false;
} else {
if (!(option.second == "True" || option.second == "true" ||
option.second == "False" || option.second == "false")) {
ORT_THROW("Invalid value passed for enable_dynamic_shapes: ", option.second);
}
OV_provider_options_map[option.first] = option.second;
} else if (option.first == "device_id") {
params.device_id = option.second.c_str();
OV_provider_options_map[option.first] = option.second;
continue;
} else if (option.first == "num_of_threads") {
params.num_of_threads = std::stoi(option.second);
OV_provider_options_map[option.first] = option.second;
continue;
} else if (option.first == "num_streams") {
OV_provider_options_map[option.first] = option.second;
continue;
} else if (option.first == "cache_dir") {
cache_dir = option.second;
params.cache_dir = cache_dir.c_str();
OV_provider_options_map[option.first] = option.second;
continue;
} else if (option.first == "context") {
params.context = (void*)(option.second.c_str());
OV_provider_options_map[option.first] = option.second;
continue;
} else {
ORT_THROW("Invalid OpenVINO EP option: ", option.first);
}
}
}
if (std::shared_ptr<IExecutionProviderFactory> openvino_provider_factory = onnxruntime::OpenVINOProviderFactoryCreator::Create(&params)) {
if (std::shared_ptr<IExecutionProviderFactory> openvino_provider_factory = onnxruntime::OpenVINOProviderFactoryCreator::Create(
&OV_provider_options_map)) {
auto p = openvino_provider_factory->CreateProvider();
// Reset global variables config to avoid it being accidentally passed on to the next session
openvino_device_type.clear();

View file

@ -440,7 +440,6 @@ std::shared_ptr<IExecutionProviderFactory> CreateExecutionProviderFactory_MIGrap
std::shared_ptr<IExecutionProviderFactory> CreateExecutionProviderFactory_MIGraphX(int device_id);
std::shared_ptr<IExecutionProviderFactory> CreateExecutionProviderFactory_Cuda(const OrtCUDAProviderOptions* params);
std::shared_ptr<IExecutionProviderFactory> CreateExecutionProviderFactory_Dnnl(const OrtDnnlProviderOptions* params);
std::shared_ptr<IExecutionProviderFactory> CreateExecutionProviderFactory_OpenVINO(const OrtOpenVINOProviderOptions* params);
#ifdef USE_TVM
std::shared_ptr<IExecutionProviderFactory> CreateExecutionProviderFactory_Tvm(const tvm::TvmEPOptions& info);
std::shared_ptr<IExecutionProviderFactory> CreateExecutionProviderFactory_Tvm(const char* params);

View file

@ -423,24 +423,12 @@ OnnxRuntimeTestSession::OnnxRuntimeTestSession(Ort::Env& env, std::random_device
#endif
} else if (provider_name == onnxruntime::kOpenVINOExecutionProvider) {
#ifdef USE_OPENVINO
std::string device_type = ""; // [device_type]: Overrides the accelerator hardware type and precision
// with these values at runtime.
bool enable_vpu_fast_compile = false; // [enable_vpu_fast_compile]: Fast-compile may be optionally enabled to
// speeds up the model's compilation to VPU device specific format.
std::string device_id = ""; // [device_id]: Selects a particular hardware device for inference.
size_t num_of_threads = 8; // [num_of_threads]: Overrides the accelerator default value of number of
// threads with this value at runtime.
std::string cache_dir = ""; // [cache_dir]: specify the path to
// dump and load the blobs for the model caching/kernel caching (GPU)
// feature. If blob files are already present, it will be directly loaded.
bool enable_opencl_throttling = false; // [enable_opencl_throttling]: Enables OpenCL queue throttling for GPU
// device (Reduces CPU Utilization when using GPU)
bool enable_dynamic_shapes = false; // [enable_dynamic_shapes]: Enables Dynamic Shapes feature for CPU device)
#ifdef _MSC_VER
std::string ov_string = ToUTF8String(performance_test_config.run_config.ep_runtime_config_string);
#else
std::string ov_string = performance_test_config.run_config.ep_runtime_config_string;
#endif
std::unordered_map<std::string, std::string> ov_options;
std::istringstream ss(ov_string);
std::string token;
while (ss >> token) {
@ -461,69 +449,64 @@ OnnxRuntimeTestSession::OnnxRuntimeTestSession(Ort::Env& env, std::random_device
"GPU.0_FP16", "GPU.1_FP16",
"VPUX_FP16", "VPUX_U8"};
if (ov_supported_device_types.find(value) != ov_supported_device_types.end()) {
device_type = value;
ov_options[key] = value;
} else if (value.find("HETERO:") == 0) {
device_type = value;
ov_options[key] = value;
} else if (value.find("MULTI:") == 0) {
device_type = value;
ov_options[key] = value;
} else if (value.find("AUTO:") == 0) {
device_type = value;
ov_options[key] = value;
} else {
ORT_THROW(
"[ERROR] [OpenVINO] You have selcted wrong configuration value for the key 'device_type'. "
"Select from 'CPU_FP32', 'CPU_FP16', 'GPU_FP32', 'GPU.0_FP32', 'GPU.1_FP32', 'GPU_FP16', "
"'GPU.0_FP16', 'GPU.1_FP16', 'VPUX_FP16', 'VPUX_U8', or from"
"'GPU.0_FP16', 'GPU.1_FP16', 'VPUX_FP16', 'VPUX_U8' or from"
" HETERO/MULTI/AUTO options available. \n");
}
} else if (key == "device_id") {
device_id = value;
ov_options[key] = value;
} else if (key == "enable_vpu_fast_compile") {
if (value == "true" || value == "True") {
enable_vpu_fast_compile = true;
} else if (value == "false" || value == "False") {
enable_vpu_fast_compile = false;
if (value == "true" || value == "True" ||
value == "false" || value == "False") {
ov_options[key] = value;
} else {
ORT_THROW("[ERROR] [OpenVINO] The value for the key 'enable_vpu_fast_compile' should be a boolean i.e. true or false. Default value is false.\n");
}
} else if (key == "enable_opencl_throttling") {
if (value == "true" || value == "True") {
enable_opencl_throttling = true;
} else if (value == "false" || value == "False") {
enable_opencl_throttling = false;
if (value == "true" || value == "True" ||
value == "false" || value == "False") {
ov_options[key] = value;
} else {
ORT_THROW("[ERROR] [OpenVINO] The value for the key 'enable_opencl_throttling' should be a boolean i.e. true or false. Default value is false.\n");
}
} else if (key == "enable_dynamic_shapes") {
if (value == "true" || value == "True") {
enable_dynamic_shapes = true;
} else if (value == "false" || value == "False") {
enable_dynamic_shapes = false;
if (value == "true" || value == "True" ||
value == "false" || value == "False") {
ov_options[key] = value;
} else {
ORT_THROW(
"[ERROR] [OpenVINO] The value for the key 'enable_dynamic_shapes' "
"should be a boolean i.e. true or false. Default value is false.\n");
}
} else if (key == "num_of_threads") {
std::stringstream sstream(value);
sstream >> num_of_threads;
if ((int)num_of_threads <= 0) {
if (std::stoi(value) <= 0) {
ORT_THROW("[ERROR] [OpenVINO] The value for the key 'num_of_threads' should be greater than 0\n");
} else {
ov_options[key] = value;
}
} else if (key == "cache_dir") {
cache_dir = value;
ov_options[key] = value;
} else if (key == "num_streams") {
if (std::stoi(value) <= 0 && std::stoi(value) > 8) {
ORT_THROW("[ERROR] [OpenVINO] The value for the key 'num_streams' should be in the range of 1-8 \n");
} else {
ov_options[key] = value;
}
} else {
ORT_THROW("[ERROR] [OpenVINO] wrong key type entered. Choose from the following runtime key options that are available for OpenVINO. ['device_type', 'device_id', 'enable_vpu_fast_compile', 'num_of_threads', 'cache_dir', 'enable_opencl_throttling|true'] \n");
ORT_THROW("[ERROR] [OpenVINO] wrong key type entered. Choose from the following runtime key options that are available for OpenVINO. ['device_type', 'device_id', 'enable_vpu_fast_compile', 'num_of_threads', 'cache_dir', 'num_streams', 'enable_opencl_throttling|true'] \n");
}
}
OrtOpenVINOProviderOptions options;
options.device_type = device_type.c_str(); // To set the device_type
options.device_id = device_id.c_str(); // To set the device_id
options.enable_vpu_fast_compile = enable_vpu_fast_compile; // To enable_vpu_fast_compile, default is false
options.num_of_threads = num_of_threads; // To set number of free InferRequests, default is 8
options.cache_dir = cache_dir.c_str(); // sets the cache_dir, default is ""
options.enable_opencl_throttling = enable_opencl_throttling; // Enables GPU Throttling (Reduces CPU Utilization)
options.enable_dynamic_shapes = enable_dynamic_shapes; // Enables Dynamic Shapes feature
session_options.AppendExecutionProvider_OpenVINO(options);
session_options.AppendExecutionProvider("OpenVINO", ov_options);
#else
ORT_THROW("OpenVINO is not supported in this build\n");
#endif
@ -579,7 +562,7 @@ OnnxRuntimeTestSession::OnnxRuntimeTestSession(Ort::Env& env, std::random_device
ORT_THROW("Supported htp_performance_mode: " + str);
}
} else {
ORT_THROW(R"(Wrong key type entered. Choose from options: ['backend_path', 'qnn_context_cache_enable',
ORT_THROW(R"(Wrong key type entered. Choose from options: ['backend_path', 'qnn_context_cache_enable',
'qnn_context_cache_path', 'profiling_level', 'rpc_control_latency', 'htp_performance_mode'])");
}

View file

@ -22,7 +22,7 @@ TEST(Einsum, ExplicitEinsumAsIdentity_1D_input) {
test.AddAttribute<std::string>("equation", "i->i");
test.AddInput<float>("x", {5}, {0.9f, 2.5f, 2.3f, 1.5f, -4.5f});
test.AddOutput<float>("y", {5}, {0.9f, 2.5f, 2.3f, 1.5f, -4.5f});
test.Run();
test.Run(OpTester::ExpectResult::kExpectSuccess, "", {kOpenVINOExecutionProvider});
}
// Implicit
@ -31,7 +31,7 @@ TEST(Einsum, ImplicitEinsumAsIdentity_1D_input) {
test.AddAttribute<std::string>("equation", "i");
test.AddInput<float>("x", {5}, {0.9f, 2.5f, 2.3f, 1.5f, -4.5f});
test.AddOutput<float>("y", {5}, {0.9f, 2.5f, 2.3f, 1.5f, -4.5f});
test.Run();
test.Run(OpTester::ExpectResult::kExpectSuccess, "", {kOpenVINOExecutionProvider});
}
// Theme: Transpose/Permutation

View file

@ -86,10 +86,19 @@ std::unique_ptr<IExecutionProvider> MIGraphXExecutionProviderWithOptions(const O
return nullptr;
}
std::unique_ptr<IExecutionProvider> OpenVINOExecutionProviderWithOptions(const OrtOpenVINOProviderOptions* params) {
#ifdef USE_OPENVINO
return OpenVINOProviderFactoryCreator::Create(params)->CreateProvider();
#else
ORT_UNUSED_PARAMETER(params);
#endif
return nullptr;
}
std::unique_ptr<IExecutionProvider> DefaultOpenVINOExecutionProvider() {
#ifdef USE_OPENVINO
OrtOpenVINOProviderOptions params;
return OpenVINOProviderFactoryCreator::Create(&params)->CreateProvider();
ProviderOptions provider_options_map;
return OpenVINOProviderFactoryCreator::Create(&provider_options_map)->CreateProvider();
#else
return nullptr;
#endif

View file

@ -18,9 +18,6 @@ std::shared_ptr<IExecutionProviderFactory> CreateExecutionProviderFactory_MIGrap
std::shared_ptr<IExecutionProviderFactory> CreateExecutionProviderFactory_Nnapi(
uint32_t flags, const optional<std::string>& partitioning_stop_ops_list);
// std::shared_ptr<IExecutionProviderFactory> CreateExecutionProviderFactory_Tvm(const char*);
std::shared_ptr<IExecutionProviderFactory> CreateExecutionProviderFactory_OpenVINO(
const char* device_type, bool enable_vpu_fast_compile, const char* device_id, size_t num_of_threads, const char* cache_dir);
std::shared_ptr<IExecutionProviderFactory> CreateExecutionProviderFactory_OpenVINO(const OrtOpenVINOProviderOptions* params);
std::shared_ptr<IExecutionProviderFactory> CreateExecutionProviderFactory_Rknpu();
std::shared_ptr<IExecutionProviderFactory> CreateExecutionProviderFactory_Rocm(const OrtROCMProviderOptions* provider_options);
std::shared_ptr<IExecutionProviderFactory> CreateExecutionProviderFactory_Tensorrt(const OrtTensorRTProviderOptions* params);
@ -45,6 +42,7 @@ std::unique_ptr<IExecutionProvider> TensorrtExecutionProviderWithOptions(const O
std::unique_ptr<IExecutionProvider> TensorrtExecutionProviderWithOptions(const OrtTensorRTProviderOptionsV2* params);
std::unique_ptr<IExecutionProvider> DefaultMIGraphXExecutionProvider();
std::unique_ptr<IExecutionProvider> MIGraphXExecutionProviderWithOptions(const OrtMIGraphXProviderOptions* params);
std::unique_ptr<IExecutionProvider> OpenVINOExecutionProviderWithOptions(const OrtOpenVINOProviderOptions* params);
std::unique_ptr<IExecutionProvider> DefaultOpenVINOExecutionProvider();
std::unique_ptr<IExecutionProvider> DefaultNnapiExecutionProvider();
std::unique_ptr<IExecutionProvider> DefaultRknpuExecutionProvider();