2022-07-13 07:43:32 +00:00
|
|
|
# Copyright (c) Microsoft Corporation. All rights reserved.
|
|
|
|
|
# Licensed under the MIT License.
|
|
|
|
|
|
2022-11-11 05:56:44 +00:00
|
|
|
include(CheckLanguage)
|
2022-07-13 07:43:32 +00:00
|
|
|
|
|
|
|
|
if(NOT onnxruntime_ENABLE_PYTHON)
|
|
|
|
|
message(FATAL_ERROR "python is required but is not enabled")
|
|
|
|
|
endif()
|
|
|
|
|
|
|
|
|
|
set(KERNEL_EXPLORER_ROOT ${ONNXRUNTIME_ROOT}/python/tools/kernel_explorer)
|
|
|
|
|
|
2022-11-11 05:56:44 +00:00
|
|
|
if (onnxruntime_USE_CUDA)
|
|
|
|
|
check_language(CUDA)
|
|
|
|
|
set(LANGUAGE CUDA)
|
|
|
|
|
set(BERT_DIR ${ONNXRUNTIME_ROOT}/contrib_ops/cuda/bert)
|
|
|
|
|
elseif(onnxruntime_USE_ROCM)
|
|
|
|
|
check_language(HIP)
|
|
|
|
|
set(LANGUAGE HIP)
|
2023-01-12 09:09:40 +00:00
|
|
|
if (onnxruntime_USE_COMPOSABLE_KERNEL)
|
|
|
|
|
include(composable_kernel)
|
|
|
|
|
endif()
|
2023-05-23 03:07:09 +00:00
|
|
|
if (onnxruntime_USE_HIPBLASLT)
|
|
|
|
|
find_package(hipblaslt REQUIRED)
|
|
|
|
|
endif()
|
2022-11-11 05:56:44 +00:00
|
|
|
set(BERT_DIR ${ONNXRUNTIME_ROOT}/contrib_ops/rocm/bert)
|
|
|
|
|
endif()
|
|
|
|
|
|
|
|
|
|
file(GLOB kernel_explorer_srcs CONFIGURE_DEPENDS
|
|
|
|
|
"${KERNEL_EXPLORER_ROOT}/*.cc"
|
|
|
|
|
"${KERNEL_EXPLORER_ROOT}/*.h"
|
|
|
|
|
)
|
2022-10-05 08:15:16 +00:00
|
|
|
|
2022-11-11 05:56:44 +00:00
|
|
|
file(GLOB kernel_explorer_kernel_srcs CONFIGURE_DEPENDS
|
|
|
|
|
"${KERNEL_EXPLORER_ROOT}/kernels/*.cc"
|
|
|
|
|
"${KERNEL_EXPLORER_ROOT}/kernels/*.h"
|
|
|
|
|
"${KERNEL_EXPLORER_ROOT}/kernels/*.cu"
|
|
|
|
|
"${KERNEL_EXPLORER_ROOT}/kernels/*.cuh"
|
|
|
|
|
)
|
2022-07-13 07:43:32 +00:00
|
|
|
|
2022-11-11 05:56:44 +00:00
|
|
|
onnxruntime_add_shared_library_module(kernel_explorer ${kernel_explorer_srcs} ${kernel_explorer_kernel_srcs})
|
2022-07-13 07:43:32 +00:00
|
|
|
set_target_properties(kernel_explorer PROPERTIES PREFIX "_")
|
|
|
|
|
target_include_directories(kernel_explorer PUBLIC
|
|
|
|
|
$<TARGET_PROPERTY:onnxruntime_pybind11_state,INCLUDE_DIRECTORIES>
|
|
|
|
|
${KERNEL_EXPLORER_ROOT})
|
2022-11-11 05:56:44 +00:00
|
|
|
target_link_libraries(kernel_explorer PRIVATE $<TARGET_PROPERTY:onnxruntime_pybind11_state,LINK_LIBRARIES>)
|
|
|
|
|
target_compile_definitions(kernel_explorer PRIVATE $<TARGET_PROPERTY:onnxruntime_pybind11_state,COMPILE_DEFINITIONS>)
|
|
|
|
|
target_compile_options(kernel_explorer PRIVATE -Wno-sign-compare)
|
|
|
|
|
|
|
|
|
|
if (onnxruntime_USE_CUDA)
|
|
|
|
|
file(GLOB kernel_explorer_cuda_kernel_srcs CONFIGURE_DEPENDS
|
|
|
|
|
"${KERNEL_EXPLORER_ROOT}/kernels/cuda/*.cc"
|
|
|
|
|
"${KERNEL_EXPLORER_ROOT}/kernels/cuda/*.h"
|
|
|
|
|
"${KERNEL_EXPLORER_ROOT}/kernels/cuda/*.cu"
|
|
|
|
|
"${KERNEL_EXPLORER_ROOT}/kernels/cuda/*.cuh"
|
|
|
|
|
)
|
|
|
|
|
target_sources(kernel_explorer PRIVATE ${kernel_explorer_cuda_kernel_srcs})
|
2023-02-10 06:27:43 +00:00
|
|
|
target_include_directories(kernel_explorer PUBLIC ${CMAKE_CUDA_TOOLKIT_INCLUDE_DIRECTORIES})
|
2022-11-11 05:56:44 +00:00
|
|
|
elseif (onnxruntime_USE_ROCM)
|
|
|
|
|
file(GLOB kernel_explorer_rocm_kernel_srcs CONFIGURE_DEPENDS
|
|
|
|
|
"${KERNEL_EXPLORER_ROOT}/kernels/rocm/*.cc"
|
|
|
|
|
"${KERNEL_EXPLORER_ROOT}/kernels/rocm/*.h"
|
|
|
|
|
"${KERNEL_EXPLORER_ROOT}/kernels/rocm/*.cu"
|
|
|
|
|
"${KERNEL_EXPLORER_ROOT}/kernels/rocm/*.cuh"
|
|
|
|
|
)
|
|
|
|
|
auto_set_source_files_hip_language(${kernel_explorer_kernel_srcs} ${kernel_explorer_rocm_kernel_srcs})
|
|
|
|
|
target_sources(kernel_explorer PRIVATE ${kernel_explorer_rocm_kernel_srcs})
|
2024-10-15 03:34:03 +00:00
|
|
|
target_compile_definitions(kernel_explorer PRIVATE __HIP_PLATFORM_AMD__=1 __HIP_PLATFORM_HCC__=1 HIPBLAS_V2)
|
2023-01-12 09:09:40 +00:00
|
|
|
if (onnxruntime_USE_COMPOSABLE_KERNEL)
|
|
|
|
|
target_compile_definitions(kernel_explorer PRIVATE USE_COMPOSABLE_KERNEL)
|
2024-06-19 06:06:10 +00:00
|
|
|
if (onnxruntime_USE_COMPOSABLE_KERNEL_CK_TILE)
|
|
|
|
|
target_compile_definitions(kernel_explorer PRIVATE USE_COMPOSABLE_KERNEL_CK_TILE)
|
|
|
|
|
endif()
|
2023-01-12 09:09:40 +00:00
|
|
|
target_link_libraries(kernel_explorer PRIVATE onnxruntime_composable_kernel_includes)
|
|
|
|
|
endif()
|
integrate triton into ort (#15862)
### Description
In some scenarios, the triton written kernels are more performant than
CK or other handwritten kernels, so we implement a framework that
onnxruntime can use these triton written kernels.
This PR is to integrate triton into ort, so that ort can use kernels
that written and compiled by triton.
The main change focus on two part:
1. a build part to compile triton written kernel and combine these
kernels into libonnxruntime_providers_rocm.so
2. a loader and launcher in c++, for loading and launch triton written
kernels.
#### Build
To compile triton written kernel, add a script
`tools/ci_build/compile_triton.py`. This script will dynamic load all
kernel files, compile them, and generate `triton_kernel_infos.a` and
`triton_kernel_infos.h`.
`triton_kernel_infos.a` contains all compiled kernel instructions, this
file will be combined into libonnxruntime_providers_rocm.so, using
--whole-archive flag.
`triton_kernel_infos.h` defines a const array that contains all the
metadata for each compiled kernel. These metadata will be used for load
and launch. So this header file is included by 'triton_kernel.cu' which
defines load and launch functions.
Add a build flag in build.py and CMakeList.txt, when building rocm
provider, it will call triton_kernel build command, and generate all
necessary files.
#### C++ Load and Launch
On c++ part, we implement load and launch functions in triton_kernel.cu
and triton_kernel.h.
These two files located in `providers/cuda`, and when compiling rocm,
they will be hipified. so this part supports both cuda and rocm. But
currently we only call triton kernel in rocm.
We also implement a softmax triton op for example. Because there will
generate many kernels for different input shape of softmax, we use
TunableOp to select the best one.
### Motivation and Context
<!-- - Why is this change required? What problem does it solve?
- If it fixes an open issue, please link to the issue here. -->
2023-05-17 01:35:28 +00:00
|
|
|
if (onnxruntime_USE_TRITON_KERNEL)
|
|
|
|
|
target_compile_definitions(kernel_explorer PRIVATE USE_TRITON_KERNEL)
|
|
|
|
|
endif()
|
2023-05-23 03:07:09 +00:00
|
|
|
if (onnxruntime_USE_HIPBLASLT)
|
|
|
|
|
target_compile_definitions(kernel_explorer PRIVATE USE_HIPBLASLT)
|
|
|
|
|
endif()
|
2023-07-13 03:20:26 +00:00
|
|
|
if (onnxruntime_USE_ROCBLAS_EXTENSION_API)
|
|
|
|
|
target_compile_definitions(kernel_explorer PRIVATE USE_ROCBLAS_EXTENSION_API)
|
|
|
|
|
target_compile_definitions(kernel_explorer PRIVATE ROCBLAS_NO_DEPRECATED_WARNINGS)
|
|
|
|
|
target_compile_definitions(kernel_explorer PRIVATE ROCBLAS_BETA_FEATURES_API)
|
|
|
|
|
endif()
|
2022-11-11 05:56:44 +00:00
|
|
|
endif()
|
2022-07-20 05:49:26 +00:00
|
|
|
|
2022-07-13 07:43:32 +00:00
|
|
|
add_dependencies(kernel_explorer onnxruntime_pybind11_state)
|
|
|
|
|
|
|
|
|
|
enable_testing()
|
|
|
|
|
find_package(Python COMPONENTS Interpreter REQUIRED)
|
2024-08-30 06:50:32 +00:00
|
|
|
# add_test(NAME test_kernels COMMAND ${Python_EXECUTABLE} -m pytest ..)
|