onnxruntime/cmake/external/cutlass.cmake
Ye Wang f9af94009b
onboard MoE (#18279)
### Description
<!-- Describe your changes. -->
1. Introduce MoE CUDA op to ORT based on FT implementation.
2. Upgrade cutlass to 3.1.0 to avoid some build failures on Windows.
Remove patch file for cutlass 3.0.0.
3. Sharded MoE implementation will come with another PR

limitation: __CUDA_ARCH__ >= 700


### Motivation and Context
<!-- - Why is this change required? What problem does it solve?
- If it fixes an open issue, please link to the issue here. -->
2023-11-14 16:48:51 -08:00

13 lines
332 B
CMake

if (onnxruntime_USE_FLASH_ATTENTION OR onnxruntime_USE_MEMORY_EFFICIENT_ATTENTION)
include(FetchContent)
FetchContent_Declare(
cutlass
URL ${DEP_URL_cutlass}
URL_HASH SHA1=${DEP_SHA1_cutlass}
)
FetchContent_GetProperties(cutlass)
if(NOT cutlass_POPULATED)
FetchContent_Populate(cutlass)
endif()
endif()