diff --git a/cmake/CMakeLists.txt b/cmake/CMakeLists.txt index 962b42c190..c1a171fba0 100644 --- a/cmake/CMakeLists.txt +++ b/cmake/CMakeLists.txt @@ -1517,35 +1517,33 @@ if (onnxruntime_USE_CUDA) set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -gencode=arch=compute_53,code=sm_53") # TX1, Nano set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -gencode=arch=compute_62,code=sm_62") # TX2 set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -gencode=arch=compute_72,code=sm_72") # AGX Xavier, NX Xavier - if (CMAKE_CUDA_COMPILER_VERSION VERSION_GREATER_EQUAL 11) - set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -gencode=arch=compute_87,code=sm_87") # AGX Orin, NX Orin - endif() + set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -gencode=arch=compute_87,code=sm_87") # AGX Orin, NX Orin else() - # the following compute capabilities are removed in CUDA 11 Toolkit - if (CMAKE_CUDA_COMPILER_VERSION VERSION_LESS 11) - set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -gencode=arch=compute_30,code=sm_30") # K series - endif() if (CMAKE_CUDA_COMPILER_VERSION VERSION_LESS 12) # 37, 50 still work in CUDA 11 but are marked deprecated and will be removed in future CUDA version. set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -gencode=arch=compute_37,code=sm_37") # K80 set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -gencode=arch=compute_50,code=sm_50") # M series endif() + # Note that we generate SASS code for specified cuda architectures. It does not support forward compatibility. + # To add PTX for future GPU architectures >= XX, append -gencode=arch=compute_XX,code=compute_XX. set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -gencode=arch=compute_52,code=sm_52") # M60 set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -gencode=arch=compute_60,code=sm_60") # P series set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -gencode=arch=compute_70,code=sm_70") # V series set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -gencode=arch=compute_75,code=sm_75") # T series - if (CMAKE_CUDA_COMPILER_VERSION VERSION_GREATER_EQUAL 11) - set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -gencode=arch=compute_80,code=sm_80") # A series - endif() + set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -gencode=arch=compute_80,code=sm_80") # A series + set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -gencode=arch=compute_86,code=sm_86") + set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -gencode=arch=compute_89,code=sm_89") if (CMAKE_CUDA_COMPILER_VERSION VERSION_GREATER_EQUAL 12) set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -gencode=arch=compute_90,code=sm_90") # H series + if (CMAKE_CUDA_COMPILER_VERSION VERSION_GREATER_EQUAL 12.8) + set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -gencode=arch=compute_120,code=sm_120") # B series + endif() endif() endif() endif() + set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -Xfatbin=-compress-all") set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} --expt-relaxed-constexpr") - if (CMAKE_CUDA_COMPILER_VERSION VERSION_GREATER_EQUAL 11) - set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} --Werror default-stream-launch") - endif() + set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} --Werror default-stream-launch") if (NOT WIN32) list(APPEND CUDA_NVCC_FLAGS --compiler-options -fPIC) endif()