[ROCm] Update ROCm and MIGraphX CI to ROCm5.7 (#17834)

- Update ROCm and MIGraphX CI to ROCm5.7
- Simplify test exculde file. Some tests will output `registered
execution providers ROCMExecutionProvider were unable to run the model.`
if they cannot run.
- Add `enable_training` build argument for MIGraphX pipeline.
This commit is contained in:
PeixuanZuo 2023-10-09 10:29:11 +08:00 committed by GitHub
parent c2bd5b70b2
commit 2ef6ee674c
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
8 changed files with 13 additions and 49 deletions

View file

@ -36,7 +36,7 @@ variables:
- name: render
value: 109
- name: RocmVersion
value: 5.6
value: 5.7
jobs:
- job: Linux_Build
@ -99,6 +99,7 @@ jobs:
ccache -s; \
python tools/ci_build/build.py \
--config Release \
--enable_training \
--cmake_extra_defines \
CMAKE_HIP_COMPILER=/opt/rocm/llvm/bin/clang++ \
onnxruntime_BUILD_KERNEL_EXPLORER=OFF \
@ -181,7 +182,7 @@ jobs:
/bin/bash -c "
set -ex; \
cd /build/Release && xargs -a /build/Release/perms.txt chmod a+x; \
bash /onnxruntime_src/tools/ci_build/github/pai/migraphx_test_launcher.sh"
bash /onnxruntime_src/tools/ci_build/github/pai/pai_test_launcher.sh"
workingDirectory: $(Build.SourcesDirectory)
displayName: 'Run onnxruntime unit tests'

View file

@ -25,7 +25,7 @@ variables:
- name: render
value: 109
- name: RocmVersion
value: 5.6
value: 5.7
- name: BuildConfig
value: Release
@ -98,7 +98,7 @@ jobs:
/bin/bash -c "
set -ex; \
ccache -s; \
/opt/python/cp38-cp38/bin/python3 tools/ci_build/build.py \
/opt/python/cp39-cp39/bin/python3 tools/ci_build/build.py \
--config $(BuildConfig) \
--enable_training \
--mpi_home /opt/ompi \

View file

@ -1,7 +1,7 @@
# Refer to https://github.com/RadeonOpenCompute/ROCm-docker/blob/master/dev/Dockerfile-ubuntu-22.04-complete
FROM ubuntu:22.04
ARG ROCM_VERSION=5.6
ARG ROCM_VERSION=5.7
ARG AMDGPU_VERSION=${ROCM_VERSION}
ARG APT_PREF='Package: *\nPin: release o=repo.radeon.com\nPin-Priority: 600'
@ -71,12 +71,15 @@ RUN pip install cryptography==41.0.0
# Create migraphx-ci environment
ENV CONDA_ENVIRONMENT_PATH /opt/miniconda/envs/migraphx-ci
ENV CONDA_DEFAULT_ENV migraphx-ci
RUN conda create -y -n ${CONDA_DEFAULT_ENV} python=3.8
RUN conda create -y -n ${CONDA_DEFAULT_ENV} python=3.9
ENV PATH ${CONDA_ENVIRONMENT_PATH}/bin:${PATH}
# Enable migraphx-ci environment
SHELL ["conda", "run", "-n", "migraphx-ci", "/bin/bash", "-c"]
# ln -sf is needed to make sure that version `GLIBCXX_3.4.30' is found
RUN ln -sf /usr/lib/x86_64-linux-gnu/libstdc++.so.6 ${CONDA_ENVIRONMENT_PATH}/bin/../lib/libstdc++.so.6
# Install migraphx
RUN apt update && apt install -y migraphx

View file

@ -1,2 +0,0 @@
GatherOpTest.Gather_invalid_index_cpu
Scatter.InvalidIndex

View file

@ -1,15 +0,0 @@
#!/bin/bash
build_dir=${1:-"."}
script_dir="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )"
echo "Warning: The following tests are EXCLUDED on MIGraphX agent:"
gtest_filter="-"
while read line; do
gtest_filter="$gtest_filter:$line"
echo "$line"
done <$script_dir/migraphx-excluded-tests.txt
echo ""
echo "Running ./onnxruntime_test_all .."
$build_dir/onnxruntime_test_all --gtest_filter=$gtest_filter

View file

@ -1,6 +1,3 @@
CudaKernelTest.NegativeLogLikelihoodLoss_TinySizeTensor
CudaKernelTest.NegativeLogLikelihoodLoss_SmallSizeTensor
CudaKernelTest.NegativeLogLikelihoodLoss_MediumSizeTensor
CudaKernelTest.SoftmaxGrad_LargeTensor_LastAxis_Float16
CudaKernelTest.SoftmaxGrad_LargeTensor_LastAxis_Float16_NoPowerOfTwo
CudaKernelTest.SoftmaxGrad_LargeTensor_AllAxis_Float16
@ -10,26 +7,6 @@ CudaKernelTest.LogSoftmaxGrad_LargeTensor_LastAxis_Float16_NoPowerOfTwo
CudaKernelTest.LogSoftmaxGrad_LargeTensor_AllAxis_Float16
CudaKernelTest.LogSoftmaxGrad_LargeTensor_AllAxis_Float16_NoPowerOfTwo
ReductionOpTest.ReductionVariationTest
ReductionOpTest.ReduceLogSumExp_default_axes_keepdims_double
ReductionOpTest.ReduceLogSumExp_default_axes_do_not_keep_dims_double
ReductionOpTest.ReduceLogSumExp_do_not_keepdims_double
ReductionOpTest.ReduceLogSumExp_do_not_keepdims_2_double
ReductionOpTest.ReduceLogSumExp_keepdims_double
ReductionOpTest.ReduceLogSumExp_double
ReductionOpTest.ReduceMax_double
ReductionOpTest.ReduceMean_default_axes_keepdims_double
ReductionOpTest.ReduceMean_default_axes_do_not_keep_dims_double
ReductionOpTest.ReduceMean_do_not_keepdims_double
ReductionOpTest.ReduceMean_do_not_keepdims_2_double
ReductionOpTest.ReduceMean_keepdims_double
ReductionOpTest.ReduceMean_double
ReductionOpTest.ReduceMean0DTensor_double
ReductionOpTest.ReduceMin_double
ReductionOpTest.ReduceSum_double
ReductionOpTest.ReduceSumSquare_double
ReductionOpTest.ReduceInfMax_double
ReductionOpTest.ReduceInfMin_double
ReductionOpTest.ReduceInfLogSumExp_double
GatherOpTest.Gather_invalid_index_cpu
Scatter.InvalidIndex
GradientCheckerTest.AddGrad

View file

@ -1,7 +1,7 @@
# Refer to https://github.com/RadeonOpenCompute/ROCm-docker/blob/master/dev/Dockerfile-ubuntu-22.04-complete
FROM ubuntu:22.04
ARG ROCM_VERSION=5.6
ARG ROCM_VERSION=5.7
ARG AMDGPU_VERSION=${ROCM_VERSION}
ARG APT_PREF='Package: *\nPin: release o=repo.radeon.com\nPin-Priority: 600'
@ -64,7 +64,7 @@ RUN wget --quiet https://repo.anaconda.com/miniconda/Miniconda3-latest-Linux-x86
# Create rocm-ci environment
ENV CONDA_ENVIRONMENT_PATH /opt/miniconda/envs/rocm-ci
ENV CONDA_DEFAULT_ENV rocm-ci
RUN conda create -y -n ${CONDA_DEFAULT_ENV} python=3.8
RUN conda create -y -n ${CONDA_DEFAULT_ENV} python=3.9
ENV PATH ${CONDA_ENVIRONMENT_PATH}/bin:${PATH}
# Conda base patch
@ -77,7 +77,7 @@ SHELL ["conda", "run", "-n", "rocm-ci", "/bin/bash", "-c"]
RUN ln -sf /usr/lib/x86_64-linux-gnu/libstdc++.so.6 ${CONDA_ENVIRONMENT_PATH}/bin/../lib/libstdc++.so.6
# Install Pytorch
RUN pip install install torch==2.0.1 torchvision==0.15.2 -f https://repo.radeon.com/rocm/manylinux/rocm-rel-${ROCM_VERSION}/ && \
RUN pip install torch==2.0.1 torchvision==0.15.2 -f https://repo.radeon.com/rocm/manylinux/rocm-rel-${ROCM_VERSION}/ && \
pip install torch-ort --no-dependencies