Set CUDA12 as default in GPU packages (#21438)

### Description
* Swap cuda version 11.8/12.2 in GPU CIs
* Set CUDA12 as default version in yamls of publishing nuget/python/java
GPU packages
* Suppress warnings as errors of flash_api.cc during ort win-build
This commit is contained in:
Yifan Li 2024-07-25 10:17:16 -07:00 committed by GitHub
parent f3a6e58ae3
commit ebcb7075eb
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
17 changed files with 99 additions and 54 deletions

View file

@ -92,6 +92,11 @@ void set_params_fprop(Flash_fwd_params& params,
params.softmax_lse_ptr = softmax_lse_d;
// Set the dimensions.
#if defined(_MSC_VER)
#pragma warning(push)
#pragma warning(disable : 4267) // Ignore conversion from 'size_t' to 'int', possible loss of data
#pragma warning(disable : 4244) // Ignore conversion from 'double' to 'float', possible loss of data
#endif
params.b = batch_size;
params.h = num_heads;
params.h_k = num_heads_k;
@ -119,6 +124,9 @@ void set_params_fprop(Flash_fwd_params& params,
if (window_size_left >= 0 && window_size_right < 0) {
window_size_right = seqlen_k;
}
#if defined(_MSC_VER)
#pragma warning(pop)
#endif
params.window_size_left = window_size_left;
params.window_size_right = window_size_right;

View file

@ -30,7 +30,7 @@ parameters:
- name: CudaVersion
displayName: CUDA version
type: string
default: '11.8'
default: '12.2'
values:
- 11.8
- 12.2

View file

@ -30,7 +30,7 @@ parameters:
- name: CudaVersion
displayName: CUDA version
type: string
default: '11.8'
default: '12.2'
values:
- 11.8
- 12.2

View file

@ -8,14 +8,12 @@ parameters:
- name: TrtVersion
displayName: TensorRT Version
type: string
default: 10.0.cuda_11_8_cudnn_8
default: 10.2.cuda_12_5_cudnn_9
values:
- 8.4.cuda_11_6_cudnn_8
- 8.5.cuda_11_8_cudnn_8
- 8.6.cuda_11_8_cudnn_8
- 8.6.cuda_12_3_cudnn_9
- 10.0.cuda_11_8_cudnn_8
- 10.0.cuda_12_4_cudnn_9
- 10.2.cuda_11_8_cudnn_8
- 10.2.cuda_12_5_cudnn_9
- BIN
- name: UseTensorrtOssParser

View file

@ -6,6 +6,7 @@ resources:
branches:
include:
- main
- rel-*
branch: main
parameters:
@ -16,15 +17,15 @@ parameters:
variables:
- name: ArtifactFeed
${{ if eq(parameters.isReleaseBuild, false) }}:
value: ort-cuda-12-nightly
value: ORT-Nightly
${{ else }}:
value: onnxruntime-cuda-12
stages:
- template: stages/nuget-cuda-publishing-stage.yml
parameters:
artifact_feed: $(ArtifactFeed)
- template: stages/nuget-cuda-publishing-stage.yml
parameters:
artifact_feed: $(ArtifactFeed)
- template: stages/java-cuda-publishing-stage.yml
parameters:
artifact_feed: $(ArtifactFeed)
- template: stages/java-cuda-publishing-stage.yml
parameters:
artifact_feed: $(ArtifactFeed)

View file

@ -9,10 +9,22 @@ resources:
- rel-*
branch: main
parameters:
- name: isReleaseBuild
type: boolean
default: false
variables:
- name: ArtifactFeed
${{ if eq(parameters.isReleaseBuild, false) }}:
value: ort-cuda-11-nightly
${{ else }}:
value: onnxruntime-cuda-11
stages:
- template: templates/publish-nuget-steps.yml
parameters:
stage_name: 'Publish_NuGet_Packag_And_Report'
stage_name: 'Publish_NuGet_Package_And_Report'
include_cpu_ep: true
download_artifacts_steps:
- download: build
@ -25,7 +37,11 @@ stages:
artifact: 'drop-signed-nuget-Training-CPU'
- script: move "$(Pipeline.Workspace)\build\drop-signed-nuget-Training-CPU\*" $(Build.BinariesDirectory)\nuget-artifact\final-package
- download: build
displayName: 'Download Pipeline Artifact - Signed NuGet Package'
artifact: 'drop-signed-nuget-GPU'
- script: move "$(Pipeline.Workspace)\build\drop-signed-nuget-GPU\*" $(Build.BinariesDirectory)\nuget-artifact\final-package
# Publish CUDA 11 Nuget/Java pkgs to ADO feed
- template: stages/nuget-cuda-publishing-stage.yml
parameters:
artifact_feed: $(ArtifactFeed)
- template: stages/java-cuda-publishing-stage.yml
parameters:
artifact_feed: $(ArtifactFeed)

View file

@ -16,7 +16,7 @@ parameters:
variables:
- name: ArtifactFeed
${{ if eq(parameters.isReleaseBuild, false) }}:
value: ort-cuda-12-nightly
value: ORT-Nightly
${{ else }}:
value: onnxruntime-cuda-12

View file

@ -8,7 +8,7 @@ stages:
jobs:
- job: JAR_Publishing_GPU
#TD-DO: figure out a way to package nightly jar. Currently Java version are set from VERSION_NUMBER file
condition: ${{ eq(parameters.artifact_feed, 'onnxruntime-cuda-12') }}
condition: ${{ or(eq(parameters.artifact_feed, 'onnxruntime-cuda-11'), eq(parameters.artifact_feed, 'onnxruntime-cuda-12')) }}
workspace:
clean: all
pool: 'onnxruntime-Win-CPU-2022'

View file

@ -7,7 +7,7 @@ parameters:
default: false
- name: CudaVersion
type: string
default: '11.8'
default: '12.2'
values:
- 11.8
- 12.2

View file

@ -9,10 +9,10 @@ parameters:
default: false
- name: PrimaryCUDAVersion
type: string
default: '11.8'
default: '12.2'
- name: SecondaryCUDAVersion
type: string
default: '12.2'
default: '11.8'
steps:
- ${{ if eq(parameters.DownloadCUDA, 'true') }}:

View file

@ -28,6 +28,13 @@ pr:
#### end trigger ####
parameters:
- name: CudaVersion
displayName: CUDA version
type: string
default: '12.2'
values:
- 11.8
- 12.2
- name: RunOnnxRuntimeTests
displayName: Run Tests?
type: boolean
@ -43,7 +50,7 @@ stages:
EnvSetupScript: setup_env_cuda.bat
buildArch: x64
additionalBuildFlags: >-
--enable_pybind --build_java --build_nodejs --use_cuda --cuda_home="$(Agent.TempDirectory)\v11.8"
--enable_pybind --build_java --build_nodejs --use_cuda --cuda_home="$(Agent.TempDirectory)\v${{ parameters.CudaVersion }}"
--enable_cuda_profiling --enable_transformers_tool_test
--cmake_extra_defines CMAKE_CUDA_ARCHITECTURES=86
--cmake_extra_defines onnxruntime_BUILD_UNIT_TESTS=ON
@ -65,7 +72,7 @@ stages:
EnvSetupScript: setup_env_cuda.bat
buildArch: x64
additionalBuildFlags: >-
--enable_pybind --enable_training --use_cuda --cuda_home="$(Agent.TempDirectory)\v11.8"
--enable_pybind --enable_training --use_cuda --cuda_home="$(Agent.TempDirectory)\v${{ parameters.CudaVersion }}"
--skip_onnx_tests
--cmake_extra_defines CMAKE_CUDA_ARCHITECTURES=86
msbuildPlatform: x64
@ -105,7 +112,7 @@ stages:
# note: need to specify `--gen_doc` when creating the build config so it has to be in additionalBuildFlags
additionalBuildFlags: >-
--gen_doc validate --skip_tests --enable_pybind --use_dml --use_cuda
--cuda_home="$(Agent.TempDirectory)\v11.8"
--cuda_home="$(Agent.TempDirectory)\v${{ parameters.CudaVersion }}"
--cmake_extra_defines CMAKE_CUDA_ARCHITECTURES=86
--cmake_extra_defines onnxruntime_BUILD_UNIT_TESTS=OFF
msbuildPlatform: x64

View file

@ -26,6 +26,21 @@ pr:
- 'js/web'
- 'onnxruntime/core/providers/js'
#### end trigger ####
parameters:
- name: CudaVersion
displayName: CUDA version
type: string
default: '12.2'
values:
- 11.8
- 12.2
variables:
- name: win_trt_folder
${{ if eq(parameters.CudaVersion, '11.8') }}:
value: TensorRT-10.2.0.19.Windows10.x86_64.cuda-11.8
${{ if eq(parameters.CudaVersion, '12.2') }}:
value: TensorRT-10.2.0.19.Windows10.x86_64.cuda-12.5
jobs:
- job: 'build'
@ -55,7 +70,7 @@ jobs:
WithCache: True
Today: $(TODAY)
AdditionalKey: "gpu-tensorrt | RelWithDebInfo"
BuildPyArguments: '--config RelWithDebInfo --parallel --use_binskim_compliant_compile_flags --build_dir $(Build.BinariesDirectory) --skip_submodule_sync --build_shared_lib --update --cmake_generator "Visual Studio 17 2022" --build_wheel --enable_onnx_tests --use_tensorrt --tensorrt_home="$(Agent.TempDirectory)\TensorRT-10.2.0.19.Windows10.x86_64.cuda-11.8" --cuda_home="$(Agent.TempDirectory)\v11.8" --cmake_extra_defines CMAKE_CUDA_ARCHITECTURES=86'
BuildPyArguments: '--config RelWithDebInfo --parallel --use_binskim_compliant_compile_flags --build_dir $(Build.BinariesDirectory) --skip_submodule_sync --build_shared_lib --update --cmake_generator "Visual Studio 17 2022" --build_wheel --enable_onnx_tests --use_tensorrt --tensorrt_home="$(Agent.TempDirectory)\${{ variables.win_trt_folder }}" --cuda_home="$(Agent.TempDirectory)\v${{ parameters.CudaVersion }}" --cmake_extra_defines CMAKE_CUDA_ARCHITECTURES=86'
MsbuildArguments: $(MsbuildArguments)
BuildArch: 'x64'
Platform: 'x64'
@ -75,7 +90,7 @@ jobs:
del wheel_filename_file
python.exe -m pip install -q --upgrade %WHEEL_FILENAME%
set PATH=$(Build.BinariesDirectory)\RelWithDebInfo\RelWithDebInfo;%PATH%
python $(Build.SourcesDirectory)\tools\ci_build\build.py --config RelWithDebInfo --use_binskim_compliant_compile_flags --build_dir $(Build.BinariesDirectory) --skip_submodule_sync --build_shared_lib --test --cmake_generator "Visual Studio 17 2022" --build_wheel --enable_onnx_tests --use_tensorrt --tensorrt_home="$(Agent.TempDirectory)\TensorRT-10.2.0.19.Windows10.x86_64.cuda-11.8" --cuda_home="$(Agent.TempDirectory)\v11.8" --cmake_extra_defines CMAKE_CUDA_ARCHITECTURES=75
python $(Build.SourcesDirectory)\tools\ci_build\build.py --config RelWithDebInfo --use_binskim_compliant_compile_flags --build_dir $(Build.BinariesDirectory) --skip_submodule_sync --build_shared_lib --test --cmake_generator "Visual Studio 17 2022" --build_wheel --enable_onnx_tests --use_tensorrt --tensorrt_home="$(Agent.TempDirectory)\${{ variables.win_trt_folder }}" --cuda_home="$(Agent.TempDirectory)\v${{ parameters.CudaVersion }}" --cmake_extra_defines CMAKE_CUDA_ARCHITECTURES=86
workingDirectory: '$(Build.BinariesDirectory)\RelWithDebInfo\RelWithDebInfo'
displayName: 'Run tests'

View file

@ -2,7 +2,7 @@
# Please overwrite BASEIMAGE, TRT_VERSION and other arguments with
# --docker-build-args ' --build-arg BASEIMAGE=other_base_image --build-arg TRT_VERSION=other_trt_version etc...'
# for other cuda version and TRT version
ARG BASEIMAGE=nvidia/cuda:11.8.0-cudnn8-devel-ubi8
ARG BASEIMAGE=nvidia/cuda:12.5.1-cudnn-devel-ubi8
FROM $BASEIMAGE
ARG TRT_VERSION

View file

@ -2,11 +2,11 @@
# Copyright (c) Microsoft Corporation. All rights reserved.
# Licensed under the MIT License.
# --------------------------------------------------------------
# Dockerfile to Test ONNX Runtime on UBI8 with TensorRT 10.0 and CUDA 11.8 by default
# Dockerfile to Test ONNX Runtime on UBI8 with TensorRT 10 and CUDA 12 by default
# Build base image with required system packages
ARG BASEIMAGE=nvidia/cuda:11.8.0-cudnn8-devel-ubi8
ARG TRT_VERSION=10.2.0.19-1.cuda11.8
ARG BASEIMAGE=nvidia/cuda:12.5.1-cudnn-devel-ubi8
ARG TRT_VERSION=10.2.0.19-1.cuda12.4
FROM $BASEIMAGE AS base
ARG TRT_VERSION
ENV PATH /opt/python/cp38-cp38/bin:/usr/local/nvidia/bin:/usr/local/cuda/bin:/usr/src/tensorrt/bin:${PATH}

View file

@ -1,17 +1,17 @@
REM Copyright (c) Microsoft Corporation. All rights reserved.
REM Licensed under the MIT License.
if exist PATH=%AGENT_TEMPDIRECTORY%\v11.8\ (
set PATH=%AGENT_TEMPDIRECTORY%\v11.8\bin;%AGENT_TEMPDIRECTORY%\v11.8\extras\CUPTI\lib64;%PATH%
if exist PATH=%AGENT_TEMPDIRECTORY%\v12.2\ (
set PATH=%AGENT_TEMPDIRECTORY%\v12.2\bin;%AGENT_TEMPDIRECTORY%\v12.2\extras\CUPTI\lib64;%PATH%
) else (
set PATH=C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v11.8\bin;C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v11.8\extras\CUPTI\lib64;%PATH%
set PATH=C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.2\bin;C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.2\extras\CUPTI\lib64;%PATH%
)
@REM The default version is still cuda v11.8, because set cuda v12.2 after it
if exist PATH=%AGENT_TEMPDIRECTORY%\v12.2\ (
set PATH=%PATH%;%AGENT_TEMPDIRECTORY%\v12.2\bin;%AGENT_TEMPDIRECTORY%\v12.2\extras\CUPTI\lib64
@REM The default version is still cuda v12.2, because set cuda v11.8 after it
if exist PATH=%AGENT_TEMPDIRECTORY%\v11.8\ (
set PATH=%PATH%;%AGENT_TEMPDIRECTORY%\v11.8\bin;%AGENT_TEMPDIRECTORY%\v11.8\extras\CUPTI\lib64
) else (
set PATH=%PATH%;C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.2\bin;C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.2\extras\CUPTI\lib64
set PATH=%PATH%;C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v11.8\bin;C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v11.8\extras\CUPTI\lib64
)
set GRADLE_OPTS=-Dorg.gradle.daemon=false

View file

@ -1,17 +1,17 @@
REM Copyright (c) Microsoft Corporation. All rights reserved.
REM Licensed under the MIT License.
if exist PATH=%AGENT_TEMPDIRECTORY%\v11.8\ (
set PATH=%AGENT_TEMPDIRECTORY%\v11.8\bin;%AGENT_TEMPDIRECTORY%\v11.8\extras\CUPTI\lib64;%PATH%
) else (
set PATH=C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v11.8\bin;C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v11.8\extras\CUPTI\lib64;%PATH%
)
set PATH=%AGENT_TEMPDIRECTORY%\TensorRT-10.2.0.19.Windows10.x86_64.cuda-11.8\lib;%PATH%
@REM The default version is still cuda v11.8, because set cuda v12.2 after it
set PATH=%PATH%;%AGENT_TEMPDIRECTORY%\TensorRT-10.2.0.19.Windows10.x86_64.cuda-12.5\lib
if exist PATH=%AGENT_TEMPDIRECTORY%\v12.2\ (
set PATH=%PATH%;%AGENT_TEMPDIRECTORY%\v12.2\bin;%AGENT_TEMPDIRECTORY%\v12.2\extras\CUPTI\lib64
set PATH=%AGENT_TEMPDIRECTORY%\v12.2\bin;%AGENT_TEMPDIRECTORY%\v12.2\extras\CUPTI\lib64;%PATH%
) else (
set PATH=C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.2\bin;C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.2\extras\CUPTI\lib64;%PATH%
)
set PATH=%AGENT_TEMPDIRECTORY%\TensorRT-10.2.0.19.Windows10.x86_64.cuda-12.5\lib;%PATH%
@REM The default version is still cuda v12.2, because set cuda v11.8 after it
set PATH=%PATH%;%AGENT_TEMPDIRECTORY%\TensorRT-10.2.0.19.Windows10.x86_64.cuda-11.8\lib
if exist PATH=%AGENT_TEMPDIRECTORY%\v11.8\ (
set PATH=%PATH%;%AGENT_TEMPDIRECTORY%\v11.8\bin;%AGENT_TEMPDIRECTORY%\v11.8\extras\CUPTI\lib64
) else (
set PATH=%PATH%;C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\\bin;C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\\extras\CUPTI\lib64
)

View file

@ -1,11 +1,11 @@
REM Copyright (c) Microsoft Corporation. All rights reserved.
REM Licensed under the MIT License.
if exist PATH=%AGENT_TEMPDIRECTORY%\v11.8\ (
set PATH=%PATH%;%AGENT_TEMPDIRECTORY%\v11.8\bin;%AGENT_TEMPDIRECTORY%\v11.8\extras\CUPTI\lib64
if exist PATH=%AGENT_TEMPDIRECTORY%\v12.2\ (
set PATH=%PATH%;%AGENT_TEMPDIRECTORY%\v12.2\bin;%AGENT_TEMPDIRECTORY%\v12.2\extras\CUPTI\lib64
) else (
set PATH=%PATH%;C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v11.8\bin;C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v11.8\extras\CUPTI\lib64
set PATH=%PATH%;C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.2\bin;C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.2\extras\CUPTI\lib64
)
set PATH=%AGENT_TEMPDIRECTORY%\TensorRT-10.2.0.19.Windows10.x86_64.cuda-11.8\lib;%PATH%
set PATH=%AGENT_TEMPDIRECTORY%\TensorRT-10.2.0.19.Windows10.x86_64.cuda-12.5\lib;%PATH%
set GRADLE_OPTS=-Dorg.gradle.daemon=false
set CUDA_MODULE_LOADING=LAZY