mirror of
https://github.com/saymrwulf/onnxruntime.git
synced 2026-05-16 21:00:14 +00:00
### Description <!-- Describe your changes. --> Add GemmFastGelu CK implementation. TODO 1. The performance of CK GemmFastGelu in ORT is not good as using CK directly, still need to investigate the reason and improve the CK in ORT. `GemmFastGeluUnfused float16 NN m=49152 n=3072 k=768 2298.8064 us 100.89 tflops` `withbias DeviceGemmMultipleD_Xdl_CShuffle<256, 256, 128, 32, 8, 8, Default> LoopScheduler: Default, PipelineVersion: v1 float16 NN m=49152 n=3072 k=768 2401.9799 us 96.56 tflops` ### Motivation and Context <!-- - Why is this change required? What problem does it solve? - If it fixes an open issue, please link to the issue here. --> Co-authored-by: peixuanzuo <peixuanzuo@linmif39a000004.zvflicr54joexhdgnhvmxrxygg.phxx.internal.cloudapp.net>
23 lines
1.1 KiB
CMake
23 lines
1.1 KiB
CMake
set(composable_kernel_URL https://github.com/ROCmSoftwarePlatform/composable_kernel.git)
|
|
set(composable_kernel_TAG 0345963eef4f92e9c5eab608bb8557b5463a1dcb) # 2022-12-15 15:07:24 -0600
|
|
|
|
set(PATCH ${PROJECT_SOURCE_DIR}/patches/composable_kernel/Fix_Clang_Build.patch)
|
|
|
|
include(FetchContent)
|
|
FetchContent_Declare(composable_kernel
|
|
GIT_REPOSITORY ${composable_kernel_URL}
|
|
GIT_TAG ${composable_kernel_TAG}
|
|
PATCH_COMMAND git apply --reverse --check ${PATCH} || git apply --ignore-space-change --ignore-whitespace ${PATCH}
|
|
)
|
|
|
|
FetchContent_GetProperties(composable_kernel)
|
|
if(NOT composable_kernel_POPULATED)
|
|
FetchContent_Populate(composable_kernel)
|
|
set(BUILD_DEV OFF CACHE BOOL "Disable -Weverything, otherwise, error: 'constexpr' specifier is incompatible with C++98 [-Werror,-Wc++98-compat]" FORCE)
|
|
add_subdirectory(${composable_kernel_SOURCE_DIR} ${composable_kernel_BINARY_DIR} EXCLUDE_FROM_ALL)
|
|
|
|
add_library(onnxruntime_composable_kernel_includes INTERFACE)
|
|
target_include_directories(onnxruntime_composable_kernel_includes INTERFACE
|
|
${composable_kernel_SOURCE_DIR}/include
|
|
${composable_kernel_SOURCE_DIR}/library/include)
|
|
endif()
|