mirror of
https://github.com/saymrwulf/onnxruntime.git
synced 2026-05-14 20:48:00 +00:00
Update range of gpu arch (#23309)
### Description <!-- Describe your changes. --> * Remove deprecated gpu arch to control nuget/python package size (latest TRT supports sm75 Turing and newer arch) * Add 90 to support blackwell series in next release (86;89 not considered as adding them will rapidly increase package size) | arch_range | Python-cuda12 | Nuget-cuda12 | | -------------- | ------------------------------------------------------------ | ---------------------------------- | | 60;61;70;75;80 | Linux: 279MB Win: 267MB | Linux: 247MB Win: 235MB | | 75;80 | Linux: 174MB Win: 162MB | Linux: 168MB Win: 156MB | | **75;80;90** | **Linux: 299MB Win: 277MB** | **Linux: 294MB Win: 271MB** | | 75;80;86;89 | [Linux: MB Win: 390MB](https://aiinfra.visualstudio.com/Lotus/_build/results?buildId=647457&view=results) | Linux: 416MB Win: 383MB | | 75;80;86;89;90 | [Linux: MB Win: 505MB](https://aiinfra.visualstudio.com/Lotus/_build/results?buildId=646536&view=results) | Linux: 541MB Win: 498MB | ### Motivation and Context <!-- - Why is this change required? What problem does it solve? - If it fixes an open issue, please link to the issue here. --> Callout: While adding sm90 support, the build of cuda11.8+cudnn8 will be dropped in the coming ORT release, as the build has issue with blackwell (mentioned in comments) and demand on cuda 11 is minor, according to internal ort-cuda11 repo.
This commit is contained in:
parent
39db20f3ff
commit
5c3c7643db
8 changed files with 15 additions and 11 deletions
|
|
@ -12,7 +12,9 @@ ARG OS=ubuntu24.04
|
|||
FROM nvcr.io/nvidia/cuda:${CUDA_VERSION}-devel-${OS}
|
||||
ARG CUDA_VERSION
|
||||
ARG CUDNN_VERSION
|
||||
ARG CMAKE_CUDA_ARCHITECTURES="61;70;75;80;86;90"
|
||||
# Adjust as needed
|
||||
# Check your CUDA arch: https://developer.nvidia.com/cuda-gpus
|
||||
ARG CMAKE_CUDA_ARCHITECTURES="75;80;90"
|
||||
|
||||
ENV DEBIAN_FRONTEND=noninteractive
|
||||
|
||||
|
|
|
|||
|
|
@ -10,7 +10,9 @@ FROM nvcr.io/nvidia/tensorrt:${TRT_CONTAINER_VERSION}-py3
|
|||
|
||||
ARG ONNXRUNTIME_REPO=https://github.com/Microsoft/onnxruntime
|
||||
ARG ONNXRUNTIME_BRANCH=main
|
||||
ARG CMAKE_CUDA_ARCHITECTURES=37;50;52;60;61;70;75;80
|
||||
# Adjust as needed
|
||||
# Check your CUDA arch: https://developer.nvidia.com/cuda-gpus
|
||||
ARG CMAKE_CUDA_ARCHITECTURES=75;80;90
|
||||
|
||||
RUN apt-get update &&\
|
||||
apt-get install -y sudo git bash unattended-upgrades
|
||||
|
|
|
|||
|
|
@ -50,7 +50,7 @@ stages:
|
|||
msbuildPlatform: x64
|
||||
packageName: x64-cuda
|
||||
CudaVersion: ${{ parameters.CudaVersion }}
|
||||
buildparameter: --use_cuda --cuda_home=${{ parameters.win_cuda_home }} --enable_onnx_tests --enable_wcos --cmake_extra_defines "CMAKE_CUDA_ARCHITECTURES=60;61;70;75;80"
|
||||
buildparameter: --use_cuda --cuda_home=${{ parameters.win_cuda_home }} --enable_onnx_tests --enable_wcos --cmake_extra_defines "CMAKE_CUDA_ARCHITECTURES=75;80;90"
|
||||
runTests: ${{ parameters.RunOnnxRuntimeTests }}
|
||||
buildJava: ${{ parameters.buildJava }}
|
||||
java_artifact_id: onnxruntime_gpu
|
||||
|
|
@ -68,7 +68,7 @@ stages:
|
|||
msbuildPlatform: x64
|
||||
CudaVersion: ${{ parameters.CudaVersion }}
|
||||
packageName: x64-tensorrt
|
||||
buildparameter: --use_tensorrt --tensorrt_home=${{ parameters.win_trt_home }} --cuda_home=${{ parameters.win_cuda_home }} --enable_onnx_tests --enable_wcos --cmake_extra_defines "CMAKE_CUDA_ARCHITECTURES=60;61;70;75;80"
|
||||
buildparameter: --use_tensorrt --tensorrt_home=${{ parameters.win_trt_home }} --cuda_home=${{ parameters.win_cuda_home }} --enable_onnx_tests --enable_wcos --cmake_extra_defines "CMAKE_CUDA_ARCHITECTURES=75;80;90"
|
||||
runTests: ${{ parameters.RunOnnxRuntimeTests }}
|
||||
buildJava: ${{ parameters.buildJava }}
|
||||
java_artifact_id: onnxruntime_gpu
|
||||
|
|
|
|||
|
|
@ -56,7 +56,7 @@ stages:
|
|||
PYTHON_VERSION: ${{ python_version }}
|
||||
EP_NAME: gpu
|
||||
CudaVersion: ${{ parameters.cuda_version }}
|
||||
EP_BUILD_FLAGS: --enable_lto --cuda_home=$(Agent.TempDirectory)\v${{ parameters.cuda_version }} --cmake_extra_defines "CMAKE_CUDA_ARCHITECTURES=52;60;61;70;75;80"
|
||||
EP_BUILD_FLAGS: --enable_lto --cuda_home=$(Agent.TempDirectory)\v${{ parameters.cuda_version }} --cmake_extra_defines "CMAKE_CUDA_ARCHITECTURES=75;80;90"
|
||||
use_tensorrt: True
|
||||
|
||||
- ${{ if eq(parameters.enable_linux_cuda, true) }}:
|
||||
|
|
|
|||
|
|
@ -319,7 +319,7 @@ stages:
|
|||
--build_wheel \
|
||||
--enable_onnx_tests --use_tensorrt --cuda_version=11.8 --tensorrt_home=/usr --cuda_home=/usr/local/cuda-11.8 --cudnn_home=/usr/local/cuda-11.8 \
|
||||
${{ parameters.gpu_build_py_parameters }} \
|
||||
--cmake_extra_defines CMAKE_CUDA_HOST_COMPILER=/opt/rh/devtoolset-11/root/usr/bin/cc 'CMAKE_CUDA_ARCHITECTURES=37;50;52;60;61;70;75;80'
|
||||
--cmake_extra_defines CMAKE_CUDA_HOST_COMPILER=/opt/rh/devtoolset-11/root/usr/bin/cc 'CMAKE_CUDA_ARCHITECTURES=75;80;90'
|
||||
workingDirectory: $(Build.SourcesDirectory)
|
||||
|
||||
- task: CmdLine@2
|
||||
|
|
@ -349,7 +349,7 @@ stages:
|
|||
--build_wheel \
|
||||
--enable_onnx_tests --use_tensorrt --cuda_version=11.8 --tensorrt_home=/usr --cuda_home=/usr/local/cuda-11.8 --cudnn_home=/usr/local/cuda-11.8 \
|
||||
${{ parameters.gpu_build_py_parameters }} --ctest_path '' \
|
||||
--cmake_extra_defines CMAKE_CUDA_HOST_COMPILER=/opt/rh/devtoolset-11/root/usr/bin/cc 'CMAKE_CUDA_ARCHITECTURES=37;50;52;60;61;70;75;80'
|
||||
--cmake_extra_defines CMAKE_CUDA_HOST_COMPILER=/opt/rh/devtoolset-11/root/usr/bin/cc 'CMAKE_CUDA_ARCHITECTURES=75;80;90'
|
||||
|
||||
- task: CopyFiles@2
|
||||
displayName: 'Copy Python Wheel to: $(Build.ArtifactStagingDirectory)'
|
||||
|
|
@ -379,7 +379,7 @@ stages:
|
|||
- template: common-variables.yml
|
||||
CUDA_VERSION: '11.8'
|
||||
buildArch: x64
|
||||
EpBuildFlags: --use_tensorrt --tensorrt_home="$(Agent.TempDirectory)\${{ variables.win_trt_folder_cuda11 }}" --cuda_version=$(CUDA_VERSION) --cuda_home="C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v$(CUDA_VERSION)" --cmake_extra_defines "CMAKE_CUDA_ARCHITECTURES=37;50;52;60;61;70;75;80"
|
||||
EpBuildFlags: --use_tensorrt --tensorrt_home="$(Agent.TempDirectory)\${{ variables.win_trt_folder_cuda11 }}" --cuda_version=$(CUDA_VERSION) --cuda_home="C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v$(CUDA_VERSION)" --cmake_extra_defines "CMAKE_CUDA_ARCHITECTURES=75;80;90"
|
||||
EnvSetupScript: setup_env_gpu.bat
|
||||
EP_NAME: gpu
|
||||
VSGenerator: 'Visual Studio 17 2022'
|
||||
|
|
|
|||
|
|
@ -2,4 +2,4 @@
|
|||
set -e -x
|
||||
docker run --rm --volume \
|
||||
$BUILD_SOURCESDIRECTORY:/onnxruntime_src --volume $BUILD_BINARIESDIRECTORY:/build -e NIGHTLY_BUILD onnxruntimecuda${CUDA_VERSION_MAJOR}build \
|
||||
/bin/bash -c "/usr/bin/python3.12 /onnxruntime_src/tools/ci_build/build.py --enable_lto --build_java --build_nodejs --build_dir /build --config Release --skip_submodule_sync --parallel --use_binskim_compliant_compile_flags --build_shared_lib --use_cuda --cuda_version=$CUDA_VERSION --cuda_home=/usr/local/cuda-$CUDA_VERSION --cudnn_home=/usr/local/cuda-$CUDA_VERSION --skip_tests --cmake_extra_defines 'CMAKE_CUDA_ARCHITECTURES=60;61;70;75;80' && cd /build/Release && make install DESTDIR=/build/installed"
|
||||
/bin/bash -c "/usr/bin/python3.12 /onnxruntime_src/tools/ci_build/build.py --enable_lto --build_java --build_nodejs --build_dir /build --config Release --skip_submodule_sync --parallel --use_binskim_compliant_compile_flags --build_shared_lib --use_cuda --cuda_version=$CUDA_VERSION --cuda_home=/usr/local/cuda-$CUDA_VERSION --cudnn_home=/usr/local/cuda-$CUDA_VERSION --skip_tests --cmake_extra_defines 'CMAKE_CUDA_ARCHITECTURES=75;80;90' && cd /build/Release && make install DESTDIR=/build/installed"
|
||||
|
|
|
|||
|
|
@ -70,7 +70,7 @@ fi
|
|||
if [ "$BUILD_DEVICE" == "GPU" ]; then
|
||||
SHORT_CUDA_VERSION=$(echo $CUDA_VERSION | sed 's/\([[:digit:]]\+\.[[:digit:]]\+\)\.[[:digit:]]\+/\1/')
|
||||
#Enable CUDA and TRT EPs.
|
||||
BUILD_ARGS+=("--use_cuda" "--use_tensorrt" "--cuda_version=$SHORT_CUDA_VERSION" "--tensorrt_home=/usr" "--cuda_home=/usr/local/cuda-$SHORT_CUDA_VERSION" "--cudnn_home=/usr/local/cuda-$SHORT_CUDA_VERSION" "--cmake_extra_defines" "CMAKE_CUDA_ARCHITECTURES=52;60;61;70;75;80")
|
||||
BUILD_ARGS+=("--use_cuda" "--use_tensorrt" "--cuda_version=$SHORT_CUDA_VERSION" "--tensorrt_home=/usr" "--cuda_home=/usr/local/cuda-$SHORT_CUDA_VERSION" "--cudnn_home=/usr/local/cuda-$SHORT_CUDA_VERSION" "--cmake_extra_defines" "CMAKE_CUDA_ARCHITECTURES=75;80;90")
|
||||
fi
|
||||
|
||||
if [ "$BUILD_DEVICE" == "NPU" ]; then
|
||||
|
|
|
|||
|
|
@ -3,4 +3,4 @@ set -e -x
|
|||
mkdir -p $HOME/.onnx
|
||||
docker run --rm --volume /data/onnx:/data/onnx:ro --volume $BUILD_SOURCESDIRECTORY:/onnxruntime_src --volume $BUILD_BINARIESDIRECTORY:/build \
|
||||
--volume /data/models:/build/models:ro --volume $HOME/.onnx:/home/onnxruntimedev/.onnx -e NIGHTLY_BUILD onnxruntimecuda${CUDA_VERSION_MAJOR}xtrt86build \
|
||||
/bin/bash -c "/usr/bin/python3.12 /onnxruntime_src/tools/ci_build/build.py --build_dir /build --config Release --skip_tests --skip_submodule_sync --parallel --use_binskim_compliant_compile_flags --build_shared_lib --build_java --build_nodejs --use_tensorrt --cuda_version=$CUDA_VERSION --cuda_home=/usr/local/cuda-$CUDA_VERSION --cudnn_home=/usr --tensorrt_home=/usr --cmake_extra_defines 'CMAKE_CUDA_ARCHITECTURES=60;61;70;75;80' && cd /build/Release && make install DESTDIR=/build/installed"
|
||||
/bin/bash -c "/usr/bin/python3.12 /onnxruntime_src/tools/ci_build/build.py --build_dir /build --config Release --skip_tests --skip_submodule_sync --parallel --use_binskim_compliant_compile_flags --build_shared_lib --build_java --build_nodejs --use_tensorrt --cuda_version=$CUDA_VERSION --cuda_home=/usr/local/cuda-$CUDA_VERSION --cudnn_home=/usr --tensorrt_home=/usr --cmake_extra_defines 'CMAKE_CUDA_ARCHITECTURES=75;80;90' && cd /build/Release && make install DESTDIR=/build/installed"
|
||||
|
|
|
|||
Loading…
Reference in a new issue