mirror of
https://github.com/saymrwulf/onnxruntime.git
synced 2026-05-17 21:10:43 +00:00
92 lines
3.8 KiB
Diff
92 lines
3.8 KiB
Diff
diff --git a/CMakeLists.txt b/CMakeLists.txt
|
|
index 04674124c..12e8b8b00 100644
|
|
--- a/CMakeLists.txt
|
|
+++ b/CMakeLists.txt
|
|
@@ -19,7 +19,7 @@ endif()
|
|
|
|
set(version 1.1.0)
|
|
# Check support for CUDA/HIP in Cmake
|
|
-project(composable_kernel VERSION ${version})
|
|
+project(composable_kernel VERSION ${version} LANGUAGES CXX HIP)
|
|
|
|
list(APPEND CMAKE_MODULE_PATH "${PROJECT_SOURCE_DIR}/cmake")
|
|
|
|
@@ -173,27 +173,6 @@ set(CMAKE_CXX_STANDARD_REQUIRED ON)
|
|
set(CMAKE_CXX_EXTENSIONS OFF)
|
|
message("CMAKE_CXX_COMPILER_ID: ${CMAKE_CXX_COMPILER_ID}")
|
|
|
|
-## OpenMP
|
|
-if(CMAKE_CXX_COMPILER_ID MATCHES "Clang")
|
|
- # workaround issue hipcc in rocm3.5 cannot find openmp
|
|
- set(OpenMP_CXX "${CMAKE_CXX_COMPILER}")
|
|
- set(OpenMP_CXX_FLAGS "-fopenmp=libomp -Wno-unused-command-line-argument")
|
|
- set(OpenMP_CXX_LIB_NAMES "libomp" "libgomp" "libiomp5")
|
|
- set(OpenMP_libomp_LIBRARY ${OpenMP_CXX_LIB_NAMES})
|
|
- set(OpenMP_libgomp_LIBRARY ${OpenMP_CXX_LIB_NAMES})
|
|
- set(OpenMP_libiomp5_LIBRARY ${OpenMP_CXX_LIB_NAMES})
|
|
-else()
|
|
- find_package(OpenMP REQUIRED)
|
|
-endif()
|
|
-
|
|
-message("OpenMP_CXX_LIB_NAMES: ${OpenMP_CXX_LIB_NAMES}")
|
|
-message("OpenMP_gomp_LIBRARY: ${OpenMP_gomp_LIBRARY}")
|
|
-message("OpenMP_pthread_LIBRARY: ${OpenMP_pthread_LIBRARY}")
|
|
-message("OpenMP_CXX_FLAGS: ${OpenMP_CXX_FLAGS}")
|
|
-
|
|
-link_libraries(${OpenMP_gomp_LIBRARY})
|
|
-link_libraries(${OpenMP_pthread_LIBRARY})
|
|
-
|
|
## HIP
|
|
find_package(HIP REQUIRED)
|
|
# Override HIP version in config.h, if necessary.
|
|
@@ -215,8 +194,6 @@ if( DEFINED CK_OVERRIDE_HIP_VERSION_PATCH )
|
|
message(STATUS "CK_HIP_VERSION_PATCH overriden with ${CK_OVERRIDE_HIP_VERSION_PATCH}")
|
|
endif()
|
|
message(STATUS "Build with HIP ${HIP_VERSION}")
|
|
-link_libraries(hip::device)
|
|
-add_compile_definitions(__HIP_PLATFORM_HCC__=1)
|
|
|
|
## tidy
|
|
include(EnableCompilerWarnings)
|
|
@@ -376,7 +353,9 @@ if(BUILD_DEV)
|
|
add_compile_options(-Werror -Weverything)
|
|
endif()
|
|
#add flags to reduce the size of binaries
|
|
-add_compile_options(-Oz -flto=thin)
|
|
+# -flto requires ORT to use a linker that support LTO and -flto flag shoud be passed to linker together.
|
|
+# add_compile_options(-Oz -flto=thin)
|
|
+add_compile_options(-Oz)
|
|
message("CMAKE_CXX_FLAGS: ${CMAKE_CXX_FLAGS}")
|
|
|
|
add_custom_target(check COMMAND ${CMAKE_CTEST_COMMAND} --output-on-failure -C ${CMAKE_CFG_INTDIR})
|
|
@@ -482,11 +461,3 @@ rocm_install(FILES
|
|
|
|
set(CPACK_RESOURCE_FILE_LICENSE "${CMAKE_CURRENT_SOURCE_DIR}/LICENSE")
|
|
set(CPACK_RPM_PACKAGE_LICENSE "MIT")
|
|
-
|
|
-rocm_create_package(
|
|
- NAME composablekernel
|
|
- DESCRIPTION "High Performance Composable Kernel for AMD GPUs"
|
|
- MAINTAINER "MIOpen Kernels Dev Team <dl.MIOpen@amd.com>"
|
|
- LDCONFIG
|
|
- HEADER_ONLY
|
|
-)
|
|
diff --git a/library/src/tensor_operation_instance/gpu/CMakeLists.txt b/library/src/tensor_operation_instance/gpu/CMakeLists.txt
|
|
index 9cb5d0e9a..141a46f3d 100644
|
|
--- a/library/src/tensor_operation_instance/gpu/CMakeLists.txt
|
|
+++ b/library/src/tensor_operation_instance/gpu/CMakeLists.txt
|
|
@@ -44,8 +44,14 @@ function(add_instance_library INSTANCE_NAME)
|
|
endforeach()
|
|
#only continue if there are some source files left on the list
|
|
if(ARGN)
|
|
+ set_source_files_properties(${ARGN} PROPERTIES LANGUAGE HIP)
|
|
add_library(${INSTANCE_NAME} OBJECT ${ARGN})
|
|
+ # Always disable debug symbol and C debug assert due to
|
|
+ # - Linker error: ... relocation truncated to fit ..., caused by object files to be linked are too huge.
|
|
+ # - https://github.com/ROCmSoftwarePlatform/composable_kernel/issues/622
|
|
+ target_compile_options(${INSTANCE_NAME} PRIVATE -g0 -DNDEBUG)
|
|
target_compile_features(${INSTANCE_NAME} PUBLIC)
|
|
+ target_compile_definitions(${INSTANCE_NAME} PRIVATE "__HIP_PLATFORM_AMD__=1" "__HIP_PLATFORM_HCC__=1")
|
|
set_target_properties(${INSTANCE_NAME} PROPERTIES POSITION_INDEPENDENT_CODE ON)
|
|
clang_tidy_check(${INSTANCE_NAME})
|
|
set(result 0)
|