mirror of
https://github.com/saymrwulf/onnxruntime.git
synced 2026-05-27 22:45:57 +00:00
### Description Add sequence length threshold for triggering cutlass FMHA in FP32. See performance test results in https://github.com/microsoft/onnxruntime/pull/14343 to see how this threshold is selected. Upgrade cutlass to v2.11 and update deps.txt and cgmanifest for nuget pipeline build (test build: https://aiinfra.visualstudio.com/Lotus/_build/results?buildId=268574&view=results)
13 lines
286 B
CMake
13 lines
286 B
CMake
if (onnxruntime_USE_FLASH_ATTENTION)
|
|
include(FetchContent)
|
|
FetchContent_Declare(
|
|
cutlass
|
|
URL ${DEP_URL_cutlass}
|
|
URL_HASH SHA1=${DEP_SHA1_cutlass}
|
|
)
|
|
|
|
FetchContent_GetProperties(cutlass)
|
|
if(NOT cutlass_POPULATED)
|
|
FetchContent_Populate(cutlass)
|
|
endif()
|
|
endif()
|