mirror of
https://github.com/saymrwulf/onnxruntime.git
synced 2026-05-18 21:21:17 +00:00
* dnnl ep rework
rework DnnlTensor,DnnlNode,DnnlSubgraph to support arbitrary graph topology and tensor data types
rework GetCapability to claim nodes in graph greedily from node topological ordering and delay creation of DnnlSubgraph until Compile
rework compile to have DnnlSubgraphPrimitive as the object to handle primitive creation and execution
instead of thread local primitive pool which duplicates intermediate memory allocated by the EP across threads
DnnlSubgraphPrimitive provides helpers to handle many common functions for each dnnl primitive builder and become the centralized place to store input, output, intermediate memories, initializer memories and etc
it provides functions to obtain input memories with automatic reordering/reshaping and moving between engines
it provides interfaces to add primitive, set output memory for single node and etc
add CONCURRENT_EXEC compile flag for dnnl library as without it, convolution primitive cannot be created and executed on different threads
enable unit tests to run on dnnl ep as well if built with dnnl ep
add dnnl ep support for Matmulinteger
* Add Relu to the DNNL refactor
Signed-off-by: George Nash <george.nash@intel.com>
* Add Convolution op to the DNNL rework
Signed-off-by: George Nash <george.nash@intel.com>
* Add Pooling ops to the DNNL rework
This adds the following ops:
- AveragePool
- GlobalAveragePool
- GlobalMaxPool
- MaxPool
Note: Pooling with dilation is not yet supported.
Note: GlobalLpPool, LpPool, MaxRoiPool, and MaxUnpool are not supported yet.
Signed-off-by: George Nash <george.nash@intel.com>
* Add Sum op to the DNNL rework
Signed-off-by: George Nash <george.nash@intel.com>
* Add ConvGrad op to the DNNL rework
Signed-off-by: George Nash <george.nash@intel.com>
* Add MaxPoolGrad and AveragePoolGrad ops to DNNL rework
Signed-off-by: George Nash <george.nash@intel.com>
* Added lrn operator to the refactored code
Signed-off by chethan.palangoutu.keshava@intel.com
* Added ReduceMean DNNL op to the refactor code
Signed-off-by: Chethan Palangotu Keshava <chethan.palangotu.keshava@intel.com>
* Added Softmax DNNL op for the refactored code
Signed-off-by: Chethan Palangotu Keshava <chethan.palangotu.keshava@intel.com>
* Added BatchNorm DNNL op inference-only for refactored code
Signed-off-by: Chethan Palangotu Keshava <chethan.palangotu.keshava@intel.com>
* Added Binary Ops to DNNL rework
Signed-off-by: Wang <zhaoyang.wang@intel.com>
* Added ReluGrad to DNNL Rework
Signed-off-by: Wang <zhaoyang.wang@intel.com>
* Update OneDNN tag to v2.3
Signed-off-by: Wang <zhaoyang.wang@intel.com>
* Added support for memory upto dim size 12
this is to fix the CI test cases that contain binary ops of input dim
size > 5
Signed-off-by: Wang <zhaoyang.wang@intel.com>
* Prevent claiming support for float16 and bfloat16 when only float is suppoted
By using The string.find used was causing the code to claiming support
for float16 and bfloat16 when we only supported float. We now explicitly
check the code for the data type or the data type with a 7 letter prefix
basically prefixed with "tensor("
Signed-off-by: George Nash <george.nash@intel.com>
* Disable uint8 mul and div, improve type conversion
Disable mul_uint8 and div_uint8 test cases as they use modulo for
overflow handling while onednn uses saturation
improve ype conversion using enum instead of string comparsion as well
as adding more types
Signed-off-by: Wang <zhaoyang.wang@intel.com>
Co-authored-by: Wang <zhaoyang.wang@intel.com>
Co-authored-by: Chethan Palangotu Keshava <chethan.palangotu.keshava@intel.com>
57 lines
2.5 KiB
CMake
57 lines
2.5 KiB
CMake
include (ExternalProject)
|
|
|
|
set(DNNL_URL https://github.com/oneapi-src/onednn)
|
|
# If DNNL_TAG is updated, check if MKLML_VERSION and platform.cmake.patch need to be updated.
|
|
set(DNNL_TAG v2.3)
|
|
|
|
if(WIN32)
|
|
set(DNNL_SHARED_LIB dnnl.dll)
|
|
set(DNNL_IMPORT_LIB dnnl.lib)
|
|
else()
|
|
if (APPLE)
|
|
set(DNNL_SHARED_LIB libdnnl.2.dylib)
|
|
else()
|
|
set(DNNL_SHARED_LIB libdnnl.so.2)
|
|
endif()
|
|
endif()
|
|
|
|
if(onnxruntime_USE_DNNL AND onnxruntime_DNNL_GPU_RUNTIME STREQUAL "ocl" AND onnxruntime_DNNL_OPENCL_ROOT STREQUAL "")
|
|
message(FATAL_ERROR "--dnnl_opencl_root required")
|
|
elseif(onnxruntime_USE_DNNL AND onnxruntime_DNNL_GPU_RUNTIME STREQUAL "" AND NOT (onnxruntime_DNNL_OPENCL_ROOT STREQUAL ""))
|
|
message(FATAL_ERROR "--dnnl_gpu_runtime required")
|
|
elseif(onnxruntime_USE_DNNL AND onnxruntime_DNNL_GPU_RUNTIME STREQUAL "ocl" AND NOT (onnxruntime_DNNL_OPENCL_ROOT STREQUAL ""))
|
|
file(TO_CMAKE_PATH ${onnxruntime_DNNL_OPENCL_ROOT} onnxruntime_DNNL_OPENCL_ROOT)
|
|
set(DNNL_OCL_INCLUDE_DIR ${onnxruntime_DNNL_OPENCL_ROOT}/include)
|
|
set(DNNL_GPU_CMAKE_ARGS "-DDNNL_GPU_RUNTIME=OCL " "-DOPENCLROOT=${onnxruntime_DNNL_OPENCL_ROOT}")
|
|
endif()
|
|
|
|
if (onnxruntime_USE_DNNL)
|
|
set(DNNL_SOURCE ${CMAKE_CURRENT_BINARY_DIR}/dnnl/src/dnnl/src)
|
|
set(DNNL_INSTALL ${CMAKE_CURRENT_BINARY_DIR}/dnnl/install)
|
|
set(DNNL_LIB_DIR ${DNNL_INSTALL}/${CMAKE_INSTALL_LIBDIR})
|
|
if(WIN32)
|
|
set(DNNL_DLL_PATH ${DNNL_INSTALL}/${CMAKE_INSTALL_BINDIR}/${DNNL_SHARED_LIB})
|
|
else()
|
|
set(DNNL_DLL_PATH ${DNNL_LIB_DIR}/${DNNL_SHARED_LIB})
|
|
endif()
|
|
set(DNNL_INCLUDE_DIR ${DNNL_INSTALL}/include)
|
|
set(DNNL_CMAKE_EXTRA_ARGS)
|
|
# set(DNNL_PATCH_COMMAND git apply ${CMAKE_SOURCE_DIR}/patches/mkldnn/constexpr.patch)
|
|
# discard prior changes due to patching in mkldnn source to unblock incremental builds.
|
|
# set(MKLDNN_PATCH_DISCARD_COMMAND cd ${DNNL_SOURCE} && git checkout -- .)
|
|
# if(NOT onnxruntime_BUILD_FOR_NATIVE_MACHINE)
|
|
# pre-v1.0
|
|
# list(APPEND DNNL_CMAKE_EXTRA_ARGS "-DARCH_OPT_FLAGS=")
|
|
# v1.0
|
|
# list(APPEND DNNL_CMAKE_EXTRA_ARGS "-DDNNL_ARCH_OPT_FLAGS=")
|
|
# endif()
|
|
ExternalProject_Add(project_dnnl
|
|
PREFIX dnnl
|
|
GIT_REPOSITORY ${DNNL_URL}
|
|
GIT_TAG ${DNNL_TAG}
|
|
# PATCH_COMMAND ${MKLDNN_PATCH_DISCARD_COMMAND} COMMAND ${DNNL_PATCH_COMMAND}
|
|
SOURCE_DIR ${DNNL_SOURCE}
|
|
CMAKE_ARGS -DDNNL_BUILD_TESTS=OFF -DDNNL_ENABLE_CONCURRENT_EXEC=ON -DDNNL_BUILD_EXAMPLES=OFF -DCMAKE_BUILD_TYPE=${CMAKE_BUILD_TYPE} -DCMAKE_INSTALL_PREFIX=${DNNL_INSTALL} ${DNNL_GPU_CMAKE_ARGS}
|
|
)
|
|
link_directories(${DNNL_LIB_DIR})
|
|
endif()
|