From d701dcd0279a0ffef0e317426bfc2a0e1337f6b4 Mon Sep 17 00:00:00 2001 From: yf711 <109183385+yf711@users.noreply.github.com> Date: Wed, 26 Apr 2023 10:01:33 -0700 Subject: [PATCH] Fix Linux MultiGPU TensorRT CI (#15697) ### Description * Reverting default TensorRT version to 8.5 as temporary fix * Apart from that, this PR temporarily leaves this CI as a place to validate user behavior that uses TRT 8.5 with latest ORT ### Context * This CI pool equips 2xTesla M60 GPUs, which are no longer supported by TensorRT 8.6. * Currently, other CIs are using single-T4 VM but there's no VM with 2xT4 or other suitable dualGPU in the range. * Once we decide which VM instance for this CI to migrate to, TRT8.6 can be enabled on this CI * According to [Nvidia](https://docs.nvidia.com/deeplearning/tensorrt/release-notes/index.html): * TensorRT 8.5.3 was the last release supporting NVIDIA Kepler (SM 3.x) and NVIDIA Maxwell (SM 5.x) devices. *These devices are no longer supported in TensorRT 8.6*. NVIDIA Pascal (SM 6.x) devices are deprecated in TensorRT 8.6. --- .../github/linux/docker/Dockerfile.ubuntu_tensorrt | 7 ++++--- tools/ci_build/github/linux/run_build.sh | 1 - tools/ci_build/github/linux/run_dockerbuild.sh | 2 +- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/tools/ci_build/github/linux/docker/Dockerfile.ubuntu_tensorrt b/tools/ci_build/github/linux/docker/Dockerfile.ubuntu_tensorrt index c3b41afecb..d3f242ddd2 100644 --- a/tools/ci_build/github/linux/docker/Dockerfile.ubuntu_tensorrt +++ b/tools/ci_build/github/linux/docker/Dockerfile.ubuntu_tensorrt @@ -11,11 +11,11 @@ ADD scripts /tmp/scripts RUN /tmp/scripts/install_ubuntu.sh -p $PYTHON_VERSION && /tmp/scripts/install_os_deps.sh && /tmp/scripts/install_python_deps.sh -p $PYTHON_VERSION && rm -rf /tmp/scripts # Install TensorRT -RUN v="8.6.0.12-1+cuda11.8" &&\ +RUN v="8.5.1-1+cuda11.8" &&\ apt-key adv --fetch-keys https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2004/x86_64/7fa2af80.pub &&\ apt-get update &&\ - sudo apt-get install -y libnvinfer8=${v} libnvonnxparsers8=${v} libnvparsers8=${v} libnvinfer-plugin8=${v} libnvinfer-vc-plugin8=${v}\ - libnvinfer-dev=${v} libnvonnxparsers-dev=${v} libnvparsers-dev=${v} libnvinfer-plugin-dev=${v} libnvinfer-vc-plugin-dev=${v}\ + sudo apt-get install -y libnvinfer8=${v} libnvonnxparsers8=${v} libnvparsers8=${v} libnvinfer-plugin8=${v} \ + libnvinfer-dev=${v} libnvonnxparsers-dev=${v} libnvparsers-dev=${v} libnvinfer-plugin-dev=${v} \ python3-libnvinfer=${v} libnvinfer-samples=${v} WORKDIR /root @@ -34,3 +34,4 @@ ARG BUILD_UID=1000 WORKDIR /home/$BUILD_USER RUN adduser --gecos 'onnxruntime Build User' --disabled-password $BUILD_USER --uid $BUILD_UID USER $BUILD_USER +ENV CUDA_MODULE_LOADING "LAZY" diff --git a/tools/ci_build/github/linux/run_build.sh b/tools/ci_build/github/linux/run_build.sh index 75008ec773..43e1543890 100755 --- a/tools/ci_build/github/linux/run_build.sh +++ b/tools/ci_build/github/linux/run_build.sh @@ -56,7 +56,6 @@ else _CUDNN_VERSION=$(echo $CUDNN_VERSION | cut -d. -f1-2) python3 $SCRIPT_DIR/../../build.py --build_dir /build \ --config Release $COMMON_BUILD_ARGS \ - --tensorrt_placeholder_builder \ --use_tensorrt --tensorrt_home /usr/lib/x86_64-linux-gnu/ \ --cuda_home /usr/local/cuda \ --cudnn_home /usr/lib/x86_64-linux-gnu/ $BUILD_EXTR_PAR diff --git a/tools/ci_build/github/linux/run_dockerbuild.sh b/tools/ci_build/github/linux/run_dockerbuild.sh index 401ede6386..0c1850e518 100755 --- a/tools/ci_build/github/linux/run_dockerbuild.sh +++ b/tools/ci_build/github/linux/run_dockerbuild.sh @@ -96,7 +96,7 @@ elif [ $BUILD_DEVICE = "gpu" ]; then --docker-build-args="--build-arg BASEIMAGE=nvcr.io/nvidia/cuda:11.8.0-cudnn8-devel-${BUILD_OS} --build-arg BUILD_USER=onnxruntimedev --build-arg BUILD_UID=$(id -u) --build-arg PYTHON_VERSION=${PYTHON_VER} --build-arg INSTALL_DEPS_EXTRA_ARGS=\"${INSTALL_DEPS_EXTRA_ARGS}\" --build-arg USE_CONDA=${USE_CONDA} --network=host" \ --dockerfile Dockerfile.ubuntu_gpu_training --context . elif [[ $BUILD_DEVICE = "tensorrt"* ]]; then - IMAGE="$BUILD_OS-cuda11.8-cudnn8.7-tensorrt8.6" + IMAGE="$BUILD_OS-cuda11.8-cudnn8.7-tensorrt8.5" DOCKER_FILE=Dockerfile.ubuntu_tensorrt $GET_DOCKER_IMAGE_CMD --repository "onnxruntime-$IMAGE" \