mirror of
https://github.com/saymrwulf/onnxruntime.git
synced 2026-05-14 20:48:00 +00:00
[CUDA] Update nvcc flags (#23572)
### Description (1) Remove `if (CMAKE_CUDA_COMPILER_VERSION VERSION_GREATER_EQUAL 11)` since build requires cuda >= 11.4. (2) Add sm_86 and sm_89 since we generate SASS code for specified cuda architectures only. This change could support popular consumer GPUs (like RTX 30X0 and RTX 40X0). (3) Add sm_120 to support Blackwell GPUs (like RTX 50X0 etc). (4) Add `-Xfatbin=-compress-all` to reduce wheel size. When CMAKE_CUDA_ARCHITECTURES is not specified, the linux wheel size built by CUDA 12.8 is reduced 8% (from 324MB to 299MB). ### Motivation and Context To support popular consumer GPUs (RTX 30x0, 40x0, 50x0) in the default setting. Reduce binary size. Note that the default sm settings does not impact official released binary. ORT official released binary are built with augmentation like CMAKE_CUDA_ARCHITECTURES=75;80;90, which has both SASS (real) and PTX (virtual) by default. See https://cmake.org/cmake/help/latest/prop_tgt/CUDA_ARCHITECTURES.html for more info.
This commit is contained in:
parent
b47e1e64d7
commit
9e18b6a0f3
1 changed files with 11 additions and 13 deletions
|
|
@ -1517,35 +1517,33 @@ if (onnxruntime_USE_CUDA)
|
|||
set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -gencode=arch=compute_53,code=sm_53") # TX1, Nano
|
||||
set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -gencode=arch=compute_62,code=sm_62") # TX2
|
||||
set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -gencode=arch=compute_72,code=sm_72") # AGX Xavier, NX Xavier
|
||||
if (CMAKE_CUDA_COMPILER_VERSION VERSION_GREATER_EQUAL 11)
|
||||
set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -gencode=arch=compute_87,code=sm_87") # AGX Orin, NX Orin
|
||||
endif()
|
||||
set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -gencode=arch=compute_87,code=sm_87") # AGX Orin, NX Orin
|
||||
else()
|
||||
# the following compute capabilities are removed in CUDA 11 Toolkit
|
||||
if (CMAKE_CUDA_COMPILER_VERSION VERSION_LESS 11)
|
||||
set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -gencode=arch=compute_30,code=sm_30") # K series
|
||||
endif()
|
||||
if (CMAKE_CUDA_COMPILER_VERSION VERSION_LESS 12)
|
||||
# 37, 50 still work in CUDA 11 but are marked deprecated and will be removed in future CUDA version.
|
||||
set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -gencode=arch=compute_37,code=sm_37") # K80
|
||||
set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -gencode=arch=compute_50,code=sm_50") # M series
|
||||
endif()
|
||||
# Note that we generate SASS code for specified cuda architectures. It does not support forward compatibility.
|
||||
# To add PTX for future GPU architectures >= XX, append -gencode=arch=compute_XX,code=compute_XX.
|
||||
set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -gencode=arch=compute_52,code=sm_52") # M60
|
||||
set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -gencode=arch=compute_60,code=sm_60") # P series
|
||||
set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -gencode=arch=compute_70,code=sm_70") # V series
|
||||
set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -gencode=arch=compute_75,code=sm_75") # T series
|
||||
if (CMAKE_CUDA_COMPILER_VERSION VERSION_GREATER_EQUAL 11)
|
||||
set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -gencode=arch=compute_80,code=sm_80") # A series
|
||||
endif()
|
||||
set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -gencode=arch=compute_80,code=sm_80") # A series
|
||||
set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -gencode=arch=compute_86,code=sm_86")
|
||||
set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -gencode=arch=compute_89,code=sm_89")
|
||||
if (CMAKE_CUDA_COMPILER_VERSION VERSION_GREATER_EQUAL 12)
|
||||
set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -gencode=arch=compute_90,code=sm_90") # H series
|
||||
if (CMAKE_CUDA_COMPILER_VERSION VERSION_GREATER_EQUAL 12.8)
|
||||
set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -gencode=arch=compute_120,code=sm_120") # B series
|
||||
endif()
|
||||
endif()
|
||||
endif()
|
||||
endif()
|
||||
set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -Xfatbin=-compress-all")
|
||||
set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} --expt-relaxed-constexpr")
|
||||
if (CMAKE_CUDA_COMPILER_VERSION VERSION_GREATER_EQUAL 11)
|
||||
set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} --Werror default-stream-launch")
|
||||
endif()
|
||||
set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} --Werror default-stream-launch")
|
||||
if (NOT WIN32)
|
||||
list(APPEND CUDA_NVCC_FLAGS --compiler-options -fPIC)
|
||||
endif()
|
||||
|
|
|
|||
Loading…
Reference in a new issue