Fix Linux MultiGPU TensorRT CI (#15697)

### Description
* Reverting default TensorRT version to 8.5 as temporary fix
  
* Apart from that, this PR temporarily leaves this CI as a place to
validate user behavior that uses TRT 8.5 with latest ORT

### Context
* This CI pool equips 2xTesla M60 GPUs, which are no longer supported by
TensorRT 8.6.
* Currently, other CIs are using single-T4 VM but there's no VM with
2xT4 or other suitable dualGPU in the range.
* Once we decide which VM instance for this CI to migrate to, TRT8.6 can
be enabled on this CI

* According to
[Nvidia](https://docs.nvidia.com/deeplearning/tensorrt/release-notes/index.html):
* TensorRT 8.5.3 was the last release supporting NVIDIA Kepler (SM 3.x)
and NVIDIA Maxwell (SM 5.x) devices. *These devices are no longer
supported in TensorRT 8.6*. NVIDIA Pascal (SM 6.x) devices are
deprecated in TensorRT 8.6.
This commit is contained in:
yf711 2023-04-26 10:01:33 -07:00 committed by GitHub
parent 0ecfe83932
commit d701dcd027
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
3 changed files with 5 additions and 5 deletions

View file

@ -11,11 +11,11 @@ ADD scripts /tmp/scripts
RUN /tmp/scripts/install_ubuntu.sh -p $PYTHON_VERSION && /tmp/scripts/install_os_deps.sh && /tmp/scripts/install_python_deps.sh -p $PYTHON_VERSION && rm -rf /tmp/scripts
# Install TensorRT
RUN v="8.6.0.12-1+cuda11.8" &&\
RUN v="8.5.1-1+cuda11.8" &&\
apt-key adv --fetch-keys https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2004/x86_64/7fa2af80.pub &&\
apt-get update &&\
sudo apt-get install -y libnvinfer8=${v} libnvonnxparsers8=${v} libnvparsers8=${v} libnvinfer-plugin8=${v} libnvinfer-vc-plugin8=${v}\
libnvinfer-dev=${v} libnvonnxparsers-dev=${v} libnvparsers-dev=${v} libnvinfer-plugin-dev=${v} libnvinfer-vc-plugin-dev=${v}\
sudo apt-get install -y libnvinfer8=${v} libnvonnxparsers8=${v} libnvparsers8=${v} libnvinfer-plugin8=${v} \
libnvinfer-dev=${v} libnvonnxparsers-dev=${v} libnvparsers-dev=${v} libnvinfer-plugin-dev=${v} \
python3-libnvinfer=${v} libnvinfer-samples=${v}
WORKDIR /root
@ -34,3 +34,4 @@ ARG BUILD_UID=1000
WORKDIR /home/$BUILD_USER
RUN adduser --gecos 'onnxruntime Build User' --disabled-password $BUILD_USER --uid $BUILD_UID
USER $BUILD_USER
ENV CUDA_MODULE_LOADING "LAZY"

View file

@ -56,7 +56,6 @@ else
_CUDNN_VERSION=$(echo $CUDNN_VERSION | cut -d. -f1-2)
python3 $SCRIPT_DIR/../../build.py --build_dir /build \
--config Release $COMMON_BUILD_ARGS \
--tensorrt_placeholder_builder \
--use_tensorrt --tensorrt_home /usr/lib/x86_64-linux-gnu/ \
--cuda_home /usr/local/cuda \
--cudnn_home /usr/lib/x86_64-linux-gnu/ $BUILD_EXTR_PAR

View file

@ -96,7 +96,7 @@ elif [ $BUILD_DEVICE = "gpu" ]; then
--docker-build-args="--build-arg BASEIMAGE=nvcr.io/nvidia/cuda:11.8.0-cudnn8-devel-${BUILD_OS} --build-arg BUILD_USER=onnxruntimedev --build-arg BUILD_UID=$(id -u) --build-arg PYTHON_VERSION=${PYTHON_VER} --build-arg INSTALL_DEPS_EXTRA_ARGS=\"${INSTALL_DEPS_EXTRA_ARGS}\" --build-arg USE_CONDA=${USE_CONDA} --network=host" \
--dockerfile Dockerfile.ubuntu_gpu_training --context .
elif [[ $BUILD_DEVICE = "tensorrt"* ]]; then
IMAGE="$BUILD_OS-cuda11.8-cudnn8.7-tensorrt8.6"
IMAGE="$BUILD_OS-cuda11.8-cudnn8.7-tensorrt8.5"
DOCKER_FILE=Dockerfile.ubuntu_tensorrt
$GET_DOCKER_IMAGE_CMD --repository "onnxruntime-$IMAGE" \