mirror of
https://github.com/saymrwulf/onnxruntime.git
synced 2026-05-14 20:48:00 +00:00
Ovep develop 1.21 (#22824)
### Description OVEP development changes for ORT 1.21 Release ### Motivation and Context Has critical bug fixes Support for concurrency execution of models is enabled Support for OV 2024.5 Memory optimizations for NPU platform --------- Co-authored-by: jatinwadhwa921 <jatin.wadhwa@intel.com> Co-authored-by: Ankit Maheshkar <ankit.maheshkar@intel.com> Co-authored-by: sfatimar <sahar.fatima@intel.com> Co-authored-by: saurabhkale17 <saurabh1.kale@intel.com> Co-authored-by: TejalKhade28 <tejal.khade@intel.com> Co-authored-by: Javier E. Martinez <javier.e.martinez@intel.com>
This commit is contained in:
parent
632a36a233
commit
ac9c135b95
10 changed files with 76 additions and 39 deletions
|
|
@ -11,22 +11,22 @@
|
|||
"${ONNXRUNTIME_ROOT}/core/providers/shared_library/*.cc"
|
||||
)
|
||||
|
||||
if (WIN32)
|
||||
set(CMAKE_MAP_IMPORTED_CONFIG_RELWITHDEBINFO Release)
|
||||
endif()
|
||||
|
||||
# Header paths
|
||||
find_package(OpenVINO REQUIRED COMPONENTS Runtime ONNX)
|
||||
if(OpenVINO_VERSION VERSION_LESS 2024.0)
|
||||
message(FATAL_ERROR "OpenVINO 2024.0 and newer are supported. Please, use latest OpenVINO release")
|
||||
if(OpenVINO_VERSION VERSION_LESS 2024.3)
|
||||
message(FATAL_ERROR "OpenVINO 2024.3 and newer are supported. Please, use latest OpenVINO release")
|
||||
endif()
|
||||
|
||||
if(OpenVINO_VERSION VERSION_GREATER_EQUAL 2024.4)
|
||||
add_definitions(-DUSE_OVEP_NPU_MEMORY=1)
|
||||
endif()
|
||||
|
||||
if (WIN32)
|
||||
unset(CMAKE_MAP_IMPORTED_CONFIG_RELWITHDEBINFO)
|
||||
# If building RelWithDebInfo and OV package does not have that configuration map to Release
|
||||
get_target_property(ov_rt_implib_rwdi openvino::runtime IMPORTED_IMPLIB_RELWITHDEBINFO)
|
||||
if ((CMAKE_BUILD_TYPE STREQUAL RelWithDebInfo) AND NOT ov_rt_implib_rwdi)
|
||||
set_target_properties(openvino::runtime PROPERTIES
|
||||
MAP_IMPORTED_CONFIG_RELWITHDEBINFO Release
|
||||
)
|
||||
endif()
|
||||
|
||||
list(APPEND OPENVINO_LIB_LIST openvino::frontend::onnx openvino::runtime ${PYTHON_LIBRARIES})
|
||||
|
|
@ -82,3 +82,8 @@
|
|||
LIBRARY DESTINATION ${CMAKE_INSTALL_LIBDIR}
|
||||
RUNTIME DESTINATION ${CMAKE_INSTALL_BINDIR})
|
||||
endif()
|
||||
|
||||
set_target_properties(onnxruntime_providers_openvino PROPERTIES
|
||||
MAP_IMPORTED_CONFIG_RELEASE RelWithDebInfo
|
||||
MAP_IMPORTED_CONFIG_DEBUG RelWithDebInfo
|
||||
)
|
||||
|
|
@ -626,8 +626,13 @@ typedef struct OrtMIGraphXProviderOptions {
|
|||
} OrtMIGraphXProviderOptions;
|
||||
|
||||
/** \brief OpenVINO Provider Options
|
||||
*
|
||||
* \see OrtApi::SessionOptionsAppendExecutionProvider_OpenVINO
|
||||
* \brief This Struct is frozen since ORT 1.13.0. Its maintained part of Legacy API for compatibility.
|
||||
* \brief For latest OpenVINO Provider Options update to the ProviderOptions map.
|
||||
* \brief Latest OpenVINO Provider Options are listed in the
|
||||
* \htmlonly
|
||||
* <a href="https://onnxruntime.ai/docs/execution-providers/OpenVINO-ExecutionProvider.html#summary-of-options">onnxruntime document.</a>
|
||||
* \endhtmlonly
|
||||
* \see OrtApi::SessionOptionsAppendExecutionProvider()
|
||||
*/
|
||||
typedef struct OrtOpenVINOProviderOptions {
|
||||
#ifdef __cplusplus
|
||||
|
|
@ -645,7 +650,7 @@ typedef struct OrtOpenVINOProviderOptions {
|
|||
* Valid settings are one of: "CPU_FP32", "CPU_FP16", "GPU_FP32", "GPU_FP16"
|
||||
*/
|
||||
const char* device_type;
|
||||
unsigned char enable_npu_fast_compile;
|
||||
unsigned char enable_npu_fast_compile; ///< 0 = disabled, nonzero = enabled
|
||||
const char* device_id;
|
||||
size_t num_of_threads; ///< 0 = Use default number of threads
|
||||
const char* cache_dir; // path is set to empty by default
|
||||
|
|
|
|||
|
|
@ -120,8 +120,8 @@ BasicBackend::BasicBackend(std::unique_ptr<ONNX_NAMESPACE::ModelProto>& model_pr
|
|||
} catch (const char* msg) {
|
||||
ORT_THROW(msg);
|
||||
}
|
||||
|
||||
inferRequestsQueue_ = std::unique_ptr<InferRequestsQueue>(new InferRequestsQueue(exe_network_, 1));
|
||||
int num_infer_req = (global_context_.num_of_threads > 0) ? global_context_.num_of_threads : 1;
|
||||
inferRequestsQueue_ = std::unique_ptr<InferRequestsQueue>(new InferRequestsQueue(exe_network_, num_infer_req));
|
||||
}
|
||||
|
||||
bool BasicBackend::ValidateSubgraph(std::map<std::string, std::shared_ptr<ov::Node>>& const_outputs_map) {
|
||||
|
|
@ -663,7 +663,6 @@ void BasicBackend::Infer(OrtKernelContext* ctx) {
|
|||
// Requesting for an idle infer_request from a pool of infer_requests_
|
||||
OVInferRequestPtr infer_request;
|
||||
infer_request = inferRequestsQueue_->getIdleRequest();
|
||||
|
||||
#ifdef IO_BUFFER_ENABLED
|
||||
if ((global_context_.device_type.find("GPU") != std::string::npos) &&
|
||||
(global_context_.context != nullptr) && global_context_.is_wholly_supported_graph) {
|
||||
|
|
|
|||
|
|
@ -3,6 +3,8 @@
|
|||
#include <filesystem>
|
||||
#include <utility>
|
||||
#include <string>
|
||||
#include <memory>
|
||||
#include <vector>
|
||||
#include "core/providers/shared_library/provider_api.h"
|
||||
#include "core/providers/openvino/openvino_execution_provider.h"
|
||||
#include "core/providers/openvino/contexts.h"
|
||||
|
|
@ -187,15 +189,23 @@ common::Status OpenVINOExecutionProvider::Compile(
|
|||
|
||||
#ifdef USE_OVEP_NPU_MEMORY
|
||||
std::vector<AllocatorPtr> OpenVINOExecutionProvider::CreatePreferredAllocators() {
|
||||
AllocatorCreationInfo npu_allocator_info{
|
||||
[this](OrtDevice::DeviceId device_id) {
|
||||
return std::make_unique<OVRTAllocator>(global_context_->ie_core.Get(), OrtDevice::NPU, device_id, OpenVINO_RT_NPU);
|
||||
},
|
||||
0,
|
||||
};
|
||||
if (global_context_->device_type.find("NPU") != std::string::npos) {
|
||||
AllocatorCreationInfo npu_allocator_info{
|
||||
[this](OrtDevice::DeviceId device_id) {
|
||||
return std::make_unique<OVRTAllocator>(
|
||||
global_context_->ie_core.Get(),
|
||||
OrtDevice::NPU,
|
||||
device_id,
|
||||
OpenVINO_RT_NPU);
|
||||
},
|
||||
0,
|
||||
};
|
||||
|
||||
// fill in allocator
|
||||
return std::vector<AllocatorPtr>{CreateAllocator(npu_allocator_info)};
|
||||
// fill in allocator
|
||||
return std::vector<AllocatorPtr>{CreateAllocator(npu_allocator_info)};
|
||||
} else {
|
||||
return std::vector<AllocatorPtr>{};
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
|
|
|
|||
|
|
@ -199,8 +199,8 @@ class OpenVINOExecutionProvider : public IExecutionProvider {
|
|||
#endif
|
||||
private:
|
||||
std::unique_ptr<openvino_ep::GlobalContext> global_context_;
|
||||
openvino_ep::EPCtxHandler ep_ctx_handle_{};
|
||||
std::shared_ptr<openvino_ep::BackendManager> backend_manager_;
|
||||
openvino_ep::EPCtxHandler ep_ctx_handle_{};
|
||||
};
|
||||
|
||||
} // namespace onnxruntime
|
||||
|
|
|
|||
|
|
@ -35,16 +35,14 @@ GetCapability::GetCapability(const GraphViewer& graph_viewer_param,
|
|||
device_type_ = "CPU";
|
||||
if (enable_qdq_optimizer) npu_qdq_optimizer_enabled = true;
|
||||
}
|
||||
#if OPENVINO_VERSION_MAJOR == 2024 && OPENVINO_VERSION_MINOR == 1
|
||||
data_ops_ = new DataOps(graph_viewer_, V_2024_1, device_type_, npu_qdq_optimizer_enabled);
|
||||
#elif OPENVINO_VERSION_MAJOR == 2024 && OPENVINO_VERSION_MINOR == 2
|
||||
data_ops_ = new DataOps(graph_viewer_, V_2024_2, device_type_, npu_qdq_optimizer_enabled);
|
||||
#elif OPENVINO_VERSION_MAJOR == 2024 && OPENVINO_VERSION_MINOR == 3
|
||||
#if OPENVINO_VERSION_MAJOR == 2024 && OPENVINO_VERSION_MINOR == 3
|
||||
data_ops_ = new DataOps(graph_viewer_, V_2024_3, device_type_, npu_qdq_optimizer_enabled);
|
||||
#elif OPENVINO_VERSION_MAJOR == 2024 && OPENVINO_VERSION_MINOR == 4
|
||||
data_ops_ = new DataOps(graph_viewer_, V_2024_4, device_type_, npu_qdq_optimizer_enabled);
|
||||
#elif OPENVINO_VERSION_MAJOR == 2024 && OPENVINO_VERSION_MINOR == 5
|
||||
data_ops_ = new DataOps(graph_viewer_, V_2024_5, device_type_, npu_qdq_optimizer_enabled);
|
||||
#else
|
||||
data_ops_ = new DataOps(graph_viewer_, V_2024_4, device_type_, npu_qdq_optimizer_enabled);
|
||||
data_ops_ = new DataOps(graph_viewer_, V_2024_5, device_type_, npu_qdq_optimizer_enabled);
|
||||
#endif
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -118,6 +118,7 @@ std::vector<SupportedOp> supported_op_mode = {
|
|||
{"CumSum", V_2022_1, {"CPU", "GPU"}},
|
||||
{"DepthToSpace", V_2020_4, {"CPU", "GPU"}},
|
||||
{"DequantizeLinear", V_2021_4, {"CPU", "GPU"}},
|
||||
{"DequantizeLinear", V_2024_4, {"NPU"}},
|
||||
{"Div", V_2020_4, {"CPU", "GPU"}},
|
||||
{"Dropout", V_2020_4, {"CPU", "GPU"}},
|
||||
{"Elu", V_2020_4, {"CPU", "GPU"}},
|
||||
|
|
@ -254,6 +255,8 @@ void DataOps::populate_types_supported() {
|
|||
std::make_pair(V_2020_4, ONNX_NAMESPACE::TensorProto_DataType::TensorProto_DataType_INT32));
|
||||
supported_types_initializer_.insert(
|
||||
std::make_pair(V_2020_4, ONNX_NAMESPACE::TensorProto_DataType::TensorProto_DataType_INT64));
|
||||
supported_types_initializer_.insert(
|
||||
std::make_pair(V_2020_4, ONNX_NAMESPACE::TensorProto_DataType::TensorProto_DataType_INT16));
|
||||
supported_types_initializer_.insert(
|
||||
std::make_pair(V_2020_4, ONNX_NAMESPACE::TensorProto_DataType::TensorProto_DataType_UINT16));
|
||||
supported_types_initializer_.insert(
|
||||
|
|
@ -262,6 +265,10 @@ void DataOps::populate_types_supported() {
|
|||
std::make_pair(V_2021_4, ONNX_NAMESPACE::TensorProto_DataType::TensorProto_DataType_INT8));
|
||||
supported_types_initializer_.insert(
|
||||
std::make_pair(V_2021_4, ONNX_NAMESPACE::TensorProto_DataType::TensorProto_DataType_UINT8));
|
||||
supported_types_initializer_.insert(
|
||||
std::make_pair(V_2024_4, ONNX_NAMESPACE::TensorProto_DataType::TensorProto_DataType_INT4));
|
||||
supported_types_initializer_.insert(
|
||||
std::make_pair(V_2024_4, ONNX_NAMESPACE::TensorProto_DataType::TensorProto_DataType_UINT4));
|
||||
|
||||
supported_types_npu_.insert(
|
||||
std::make_pair(V_2020_4, ONNX_NAMESPACE::TensorProto_DataType::TensorProto_DataType_BOOL));
|
||||
|
|
@ -285,6 +292,10 @@ void DataOps::populate_types_supported() {
|
|||
std::make_pair(V_2024_3, ONNX_NAMESPACE::TensorProto_DataType::TensorProto_DataType_FLOAT8E4M3FN));
|
||||
supported_types_npu_.insert(
|
||||
std::make_pair(V_2024_3, ONNX_NAMESPACE::TensorProto_DataType::TensorProto_DataType_FLOAT8E4M3FNUZ));
|
||||
supported_types_npu_.insert(
|
||||
std::make_pair(V_2024_4, ONNX_NAMESPACE::TensorProto_DataType::TensorProto_DataType_INT4));
|
||||
supported_types_npu_.insert(
|
||||
std::make_pair(V_2024_4, ONNX_NAMESPACE::TensorProto_DataType::TensorProto_DataType_UINT4));
|
||||
|
||||
supported_types_cpu_.insert(
|
||||
std::make_pair(V_2020_4, ONNX_NAMESPACE::TensorProto_DataType::TensorProto_DataType_BOOL));
|
||||
|
|
@ -304,6 +315,10 @@ void DataOps::populate_types_supported() {
|
|||
std::make_pair(V_2020_4, ONNX_NAMESPACE::TensorProto_DataType::TensorProto_DataType_INT64));
|
||||
supported_types_cpu_.insert(
|
||||
std::make_pair(V_2022_2, ONNX_NAMESPACE::TensorProto_DataType::TensorProto_DataType_FLOAT16));
|
||||
supported_types_cpu_.insert(
|
||||
std::make_pair(V_2024_4, ONNX_NAMESPACE::TensorProto_DataType::TensorProto_DataType_INT4));
|
||||
supported_types_cpu_.insert(
|
||||
std::make_pair(V_2024_4, ONNX_NAMESPACE::TensorProto_DataType::TensorProto_DataType_UINT4));
|
||||
|
||||
supported_types_gpu_.insert(
|
||||
std::make_pair(V_2020_4, ONNX_NAMESPACE::TensorProto_DataType::TensorProto_DataType_FLOAT));
|
||||
|
|
@ -319,6 +334,10 @@ void DataOps::populate_types_supported() {
|
|||
std::make_pair(V_2021_4, ONNX_NAMESPACE::TensorProto_DataType::TensorProto_DataType_UINT8));
|
||||
supported_types_gpu_.insert(
|
||||
std::make_pair(V_2022_1, ONNX_NAMESPACE::TensorProto_DataType::TensorProto_DataType_BOOL));
|
||||
supported_types_gpu_.insert(
|
||||
std::make_pair(V_2024_4, ONNX_NAMESPACE::TensorProto_DataType::TensorProto_DataType_INT4));
|
||||
supported_types_gpu_.insert(
|
||||
std::make_pair(V_2024_4, ONNX_NAMESPACE::TensorProto_DataType::TensorProto_DataType_UINT4));
|
||||
}
|
||||
|
||||
void DataOps::populate_op_mode_supported() {
|
||||
|
|
@ -368,7 +387,7 @@ void DataOps::populate_op_mode_supported() {
|
|||
|
||||
// populate unsupportedmode_t
|
||||
{
|
||||
UnsupportedOpMode obj = {{V_2024_1, V_2024_2, V_2024_3, V_2024_4},
|
||||
UnsupportedOpMode obj = {{V_2024_1, V_2024_2, V_2024_3, V_2024_4, V_2024_5},
|
||||
[this](const Node* node, const InitializedTensorSet&) {
|
||||
// If the Input of ReduceMax op is UINT8, it is rejected (Due to output mismatch)
|
||||
for (size_t i = 0; i < node->InputDefs().size(); i++) {
|
||||
|
|
@ -383,7 +402,7 @@ void DataOps::populate_op_mode_supported() {
|
|||
op_list_.insert({"ReduceMax", obj});
|
||||
}
|
||||
{
|
||||
UnsupportedOpMode obj = {{V_2023_1, V_2023_2, V_2023_3, V_2024_0, V_2024_1, V_2024_2, V_2024_3, V_2024_4},
|
||||
UnsupportedOpMode obj = {{V_2023_1, V_2023_2, V_2023_3, V_2024_0, V_2024_1, V_2024_2, V_2024_3, V_2024_4, V_2024_5},
|
||||
[this](const Node* node, const InitializedTensorSet&) {
|
||||
const auto& input_arg = node->InputDefs()[1];
|
||||
auto shape = input_arg->Shape();
|
||||
|
|
@ -400,7 +419,7 @@ void DataOps::populate_op_mode_supported() {
|
|||
op_list_.insert({"Reshape", obj});
|
||||
}
|
||||
{
|
||||
UnsupportedOpMode obj = {{V_2023_1, V_2023_2, V_2023_3, V_2024_0, V_2024_1, V_2024_2, V_2024_3, V_2024_4},
|
||||
UnsupportedOpMode obj = {{V_2023_1, V_2023_2, V_2023_3, V_2024_0, V_2024_1, V_2024_2, V_2024_3, V_2024_4, V_2024_5},
|
||||
[this](const Node* node, const InitializedTensorSet&) {
|
||||
// If the operator is unsqueeze
|
||||
// If axes is an input, then we cannot produce a static graph.
|
||||
|
|
@ -415,7 +434,7 @@ void DataOps::populate_op_mode_supported() {
|
|||
op_list_.insert({"Unsqueeze", obj});
|
||||
}
|
||||
{
|
||||
UnsupportedOpMode obj = {{V_2023_1, V_2023_2, V_2023_3, V_2024_0, V_2024_1, V_2024_2, V_2024_3, V_2024_4},
|
||||
UnsupportedOpMode obj = {{V_2023_1, V_2023_2, V_2023_3, V_2024_0, V_2024_1, V_2024_2, V_2024_3, V_2024_4, V_2024_5},
|
||||
[this](const Node* node, const InitializedTensorSet&) {
|
||||
// check for attributes
|
||||
auto& upsample_attr = node->GetAttributes();
|
||||
|
|
|
|||
|
|
@ -31,7 +31,8 @@ enum versionNum {
|
|||
V_2024_1,
|
||||
V_2024_2,
|
||||
V_2024_3,
|
||||
V_2024_4
|
||||
V_2024_4,
|
||||
V_2024_5
|
||||
};
|
||||
|
||||
using VersionNum = enum versionNum;
|
||||
|
|
|
|||
|
|
@ -33,5 +33,5 @@ jobs:
|
|||
parameters:
|
||||
AgentPool : 'Linux-CPU-2019'
|
||||
JobName: 'Linux_CI_Dev'
|
||||
RunDockerBuildArgs: '-o ubuntu22.04 -p 3.10 -d openvino -v 2024.3.0 -x "--use_openvino CPU --build_wheel"'
|
||||
RunDockerBuildArgs: '-o ubuntu22.04 -p 3.10 -d openvino -v 2024.4.0 -x "--use_openvino CPU --build_wheel"'
|
||||
TimeoutInMinutes: 120
|
||||
|
|
|
|||
|
|
@ -1,7 +1,7 @@
|
|||
ARG UBUNTU_VERSION=22.04
|
||||
FROM ubuntu:${UBUNTU_VERSION}
|
||||
|
||||
ARG OPENVINO_VERSION=2024.3.0
|
||||
ARG OPENVINO_VERSION=2024.4.0
|
||||
ARG PYTHON_VERSION=3.10
|
||||
|
||||
ADD scripts /tmp/scripts
|
||||
|
|
@ -19,9 +19,9 @@ ENV IE_PLUGINS_PATH=$INTEL_OPENVINO_DIR/runtime/lib/intel64
|
|||
ENV DEBIAN_FRONTEND=noninteractive
|
||||
|
||||
RUN cd /opt && mkdir -p intel && cd intel && \
|
||||
wget https://storage.openvinotoolkit.org/repositories/openvino/packages/2024.3/linux/l_openvino_toolkit_ubuntu22_2024.3.0.16041.1e3b88e4e3f_x86_64.tgz && \
|
||||
tar xzf l_openvino_toolkit_ubuntu22_2024.3.0.16041.1e3b88e4e3f_x86_64.tgz && rm -rf l_openvino_toolkit_ubuntu22_2024.3.0.16041.1e3b88e4e3f_x86_64.tgz && \
|
||||
mv l_openvino_toolkit_ubuntu22_2024.3.0.16041.1e3b88e4e3f_x86_64 openvino_2024.3.0 && \
|
||||
wget https://storage.openvinotoolkit.org/repositories/openvino/packages/2024.4/linux/l_openvino_toolkit_ubuntu22_2024.4.0.16579.c3152d32c9c_x86_64.tgz && \
|
||||
tar xzf l_openvino_toolkit_ubuntu22_2024.4.0.16579.c3152d32c9c_x86_64.tgz && rm -rf l_openvino_toolkit_ubuntu22_2024.4.0.16579.c3152d32c9c_x86_64.tgz && \
|
||||
mv l_openvino_toolkit_ubuntu22_2024.4.0.16579.c3152d32c9c_x86_64 openvino_2024.4.0 && \
|
||||
cd $INTEL_OPENVINO_DIR/install_dependencies && ./install_openvino_dependencies.sh -y
|
||||
|
||||
WORKDIR /root
|
||||
|
|
|
|||
Loading…
Reference in a new issue