onnxruntime/dockerfiles/Dockerfile.cuda
Yifan Li 5c3c7643db
Update range of gpu arch (#23309)
### Description
<!-- Describe your changes. -->
* Remove deprecated gpu arch to control nuget/python package size
(latest TRT supports sm75 Turing and newer arch)
* Add 90 to support blackwell series in next release (86;89 not
considered as adding them will rapidly increase package size)

| arch_range | Python-cuda12 | Nuget-cuda12 |
| -------------- |
------------------------------------------------------------ |
---------------------------------- |
| 60;61;70;75;80 | Linux: 279MB Win: 267MB | Linux: 247MB Win: 235MB |
| 75;80 | Linux: 174MB Win: 162MB | Linux: 168MB Win: 156MB |
| **75;80;90** | **Linux: 299MB Win: 277MB** | **Linux: 294MB Win:
271MB** |
| 75;80;86;89 | [Linux: MB Win:
390MB](https://aiinfra.visualstudio.com/Lotus/_build/results?buildId=647457&view=results)
| Linux: 416MB Win: 383MB |
| 75;80;86;89;90 | [Linux: MB Win:
505MB](https://aiinfra.visualstudio.com/Lotus/_build/results?buildId=646536&view=results)
| Linux: 541MB Win: 498MB |

### Motivation and Context
<!-- - Why is this change required? What problem does it solve?
- If it fixes an open issue, please link to the issue here. -->

Callout: While adding sm90 support, the build of cuda11.8+cudnn8 will be
dropped in the coming ORT release,
as the build has issue with blackwell (mentioned in comments) and demand
on cuda 11 is minor, according to internal ort-cuda11 repo.
2025-01-14 14:27:34 -08:00

112 lines
4.1 KiB
Text

# --------------------------------------------------------------
# Copyright (c) Microsoft Corporation. All rights reserved.
# Licensed under the MIT License.
# --------------------------------------------------------------
# Build onnxruntime-gpu python package with CUDA 12.x & CUDNN 9.x for python 3.12 in Ubuntu 24.04.
# If memory is less than 64GB, you may change "--parallel" to "--parallel 4" to avoid out-of-memory error.
ARG CUDA_VERSION=12.6.1
ARG CUDNN_VERSION=9.5.0.50
ARG OS=ubuntu24.04
FROM nvcr.io/nvidia/cuda:${CUDA_VERSION}-devel-${OS}
ARG CUDA_VERSION
ARG CUDNN_VERSION
# Adjust as needed
# Check your CUDA arch: https://developer.nvidia.com/cuda-gpus
ARG CMAKE_CUDA_ARCHITECTURES="75;80;90"
ENV DEBIAN_FRONTEND=noninteractive
# Add source code to /code
ADD . /code
ENV PATH=/usr/local/cuda/bin:${PATH}
# Install required packages
RUN apt-get update && apt-get install -y --no-install-recommends \
ca-certificates \
g++ \
gcc \
make \
git \
cmake \
wget \
ninja-build \
python3-pip \
python3.12-dev \
python3.12-venv \
&& rm -rf /var/lib/apt/lists/*
# Install CUDNN 9.4.0.58 for building ONNX Runtime with CUDA.
RUN cudnn_tar="cudnn-linux-x86_64-${CUDNN_VERSION}_cuda${CUDA_VERSION%%.*}-archive.tar.xz" \
&& wget "https://developer.download.nvidia.com/compute/cudnn/redist/cudnn/linux-x86_64/${cudnn_tar}" \
&& mkdir -p /code/build/cudnn \
&& tar -Jxvf ${cudnn_tar} -C /code/build/cudnn --strip=1 \
&& rm -f ${cudnn_tar}
# Create a virtual environment and install dependencies, then build ONNX Runtime with CUDA support.
RUN cd /code \
&& python3 -m venv /code/env \
&& . /code/env/bin/activate \
&& pip install --upgrade psutil setuptools wheel packaging \
&& pip install -r /code/tools/ci_build/github/linux/python/requirements.txt \
&& python /code/tools/ci_build/build.py --build_dir /code/build/Linux \
--allow_running_as_root --skip_submodule_sync \
--use_cuda --cuda_home /usr/local/cuda \
--cudnn_home /code/build/cudnn \
--build_shared_lib --skip_tests \
--config Release --build_wheel --update --build --parallel \
--cmake_generator Ninja \
--cmake_extra_defines ONNXRUNTIME_VERSION=$(cat ./VERSION_NUMBER) "CMAKE_CUDA_ARCHITECTURES=${CMAKE_CUDA_ARCHITECTURES}" onnxruntime_BUILD_UNIT_TESTS=OFF
# Start second stage to copy the build artifacts
FROM nvcr.io/nvidia/cuda:${CUDA_VERSION}-runtime-${OS}
ARG CUDA_VERSION
ARG CUDNN_VERSION
ARG GIT_COMMIT
ARG GIT_BRANCH
ARG ONNXRUNTIME_VERSION
# Make sure the required build arguments are set. See README.md for more information.
RUN test -n ${GIT_COMMIT:?}
RUN test -n ${GIT_BRANCH:?}
RUN test -n ${ONNXRUNTIME_VERSION:?}
LABEL CUDA_VERSION="${CUDA_VERSION}"
LABEL CUDNN_VERSION="${CUDNN_VERSION}"
LABEL maintainer="Changming Sun <chasun@microsoft.com>"
LABEL onnxruntime_version="${ONNXRUNTIME_VERSION}"
LABEL onnxruntime_git_branch="${GIT_BRANCH}"
LABEL onnxruntime_git_commit="${GIT_COMMIT}"
# Copy built wheel and license
COPY --from=0 /code/build/Linux/Release/dist /ort
COPY --from=0 /code/dockerfiles/LICENSE-IMAGE.txt /code/LICENSE-IMAGE.txt
# Set environment variables
ENV DEBIAN_FRONTEND=noninteractive
ENV CUDNN_VERSION=$CUDNN_VERSION
ENV ONNXRUNTIME_VERSION=$ONNXRUNTIME_VERSION
# CUDNN from nvidia-cudnn-cu12 python package is located in the site-packages directory of python virtual environment.
ENV LD_LIBRARY_PATH="/ort/env/lib/python3.12/site-packages/nvidia/cudnn/lib:/usr/local/cuda/lib64"
# Install runtime dependencies
RUN apt-get update && apt-get install -y --no-install-recommends \
libstdc++6 \
ca-certificates \
python3-pip \
python3.12-venv \
&& python3 -m venv /ort/env \
&& . /ort/env/bin/activate \
&& pip install /ort/*.whl \
&& pip install nvidia-cudnn-cu${CUDA_VERSION%%.*}==${CUDNN_VERSION} \
&& python -c 'import onnxruntime; print(onnxruntime.get_available_providers())' \
&& rm -rf /ort/*.whl \
&& rm -rf /var/lib/apt/lists/*
# Ensure the virtual environment is always activated when running commands in the container.
RUN echo ". /ort/env/bin/activate" >> ~/.bashrc
# Set the default command to start an interactive bash shell
CMD [ "/bin/bash" ]