onnxruntime/cmake/patches/composable_kernel/Fix_Clang_Build.patch
cloudhan f39354d7cb
Add composable kernel GEMM baseline for kernel explorer (#12364)
* Split GemmBase RocBlasGemm

* Add composable kernel GEMM baseline

* Make linter happy

* Address review comment

* Update bert cases with batchsize

* Adjust includes to fix IWYU lint

* Only builds and links used ck kernels to improve building time

* Remove warmup run on SelectImpl

* Add comment to utility function

* Mute cpplint

* Make RocBlasGemm<T>::SelectImpl semantically correct

* Add reduced basic test cases for ck gemm

* More robust gemm testing

* Fix warnings

* Fix grammar
2022-08-04 17:32:20 -07:00

53 lines
1.6 KiB
Diff

diff --git a/CMakeLists.txt b/CMakeLists.txt
index 9f706207..fb444227 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -8,7 +8,7 @@ list(APPEND CMAKE_MODULE_PATH "${PROJECT_SOURCE_DIR}/cmake")
enable_testing()
set(ROCM_SYMLINK_LIBS OFF)
-find_package(ROCM 0.8 REQUIRED PATHS /opt/rocm)
+find_package(ROCM 0.7 REQUIRED PATHS /opt/rocm)
include(ROCMInstallTargets)
include(ROCMPackageConfigHelpers)
@@ -28,6 +28,17 @@ set(CMAKE_CXX_STANDARD_REQUIRED ON)
set(CMAKE_CXX_EXTENSIONS OFF)
message("CMAKE_CXX_COMPILER_ID: ${CMAKE_CXX_COMPILER_ID}")
+add_compile_options(
+ -xhip
+ --offload-arch=gfx908
+ --offload-arch=gfx90a
+ -O3 # otherwise, "Illegal instruction detected" for gfx908
+ "SHELL:-mllvm -amdgpu-early-inline-all=true"
+ "SHELL:-mllvm -amdgpu-function-calls=false" # otherwise, "local memory (65920) exceeds limit (65536) in function"
+ -fhip-new-launch-api
+)
+
+
## OpenMP
if(CMAKE_CXX_COMPILER_ID MATCHES "Clang")
# workaround issue hipcc in rocm3.5 cannot find openmp
@@ -237,9 +248,6 @@ rocm_package_setup_component(tests
)
add_subdirectory(library)
-add_subdirectory(example)
-add_subdirectory(test)
-add_subdirectory(profiler)
#Create an interface target for the include only files and call it "composablekernels"
include(CMakePackageConfigHelpers)
@@ -265,11 +273,3 @@ rocm_install(FILES
set(CPACK_RESOURCE_FILE_LICENSE "${CMAKE_CURRENT_SOURCE_DIR}/LICENSE")
set(CPACK_RPM_PACKAGE_LICENSE "MIT")
-
-rocm_create_package(
- NAME composablekernel
- DESCRIPTION "High Performance Composable Kernel for AMD GPUs"
- MAINTAINER "MIOpen Kernels Dev Team <dl.MIOpen@amd.com>"
- LDCONFIG
- HEADER_ONLY
-)