mirror of
https://github.com/saymrwulf/onnxruntime.git
synced 2026-05-21 21:52:11 +00:00
[ROCm] Update CI based on ubuntu 22.04 (#17076)
- Update ROCm version to ROCm5.6 - Update CI based on ubuntu 22.04
This commit is contained in:
parent
87285323e6
commit
12837ba5c7
6 changed files with 174 additions and 62 deletions
|
|
@ -9,7 +9,7 @@ variables:
|
|||
- name: render
|
||||
value: 109
|
||||
- name: RocmVersion
|
||||
value: 5.5
|
||||
value: 5.6
|
||||
|
||||
jobs:
|
||||
- job: Linux_Build
|
||||
|
|
@ -31,10 +31,12 @@ jobs:
|
|||
clean: true
|
||||
submodules: recursive
|
||||
|
||||
|
||||
- template: templates/get-docker-image-steps.yml
|
||||
parameters:
|
||||
Dockerfile: tools/ci_build/github/linux/docker/migraphx-ci-pipeline-env.Dockerfile
|
||||
Context: tools/ci_build/github/linux/docker
|
||||
DockerBuildArgs: "--build-arg ROCM_VERSION=$(RocmVersion)"
|
||||
Repository: onnxruntimetrainingmigraphx-cibuild-rocm$(RocmVersion)
|
||||
|
||||
- task: Cache@2
|
||||
|
|
@ -131,6 +133,7 @@ jobs:
|
|||
parameters:
|
||||
Dockerfile: tools/ci_build/github/linux/docker/migraphx-ci-pipeline-env.Dockerfile
|
||||
Context: tools/ci_build/github/linux/docker
|
||||
DockerBuildArgs: "--build-arg ROCM_VERSION=$(RocmVersion)"
|
||||
Repository: onnxruntimetrainingmigraphx-cibuild-rocm$(RocmVersion)
|
||||
|
||||
- task: CmdLine@2
|
||||
|
|
|
|||
|
|
@ -8,7 +8,7 @@ variables:
|
|||
- name: render
|
||||
value: 109
|
||||
- name: RocmVersion
|
||||
value: 5.5
|
||||
value: 5.6
|
||||
- name: BuildConfig
|
||||
value: Release
|
||||
|
||||
|
|
@ -36,6 +36,7 @@ jobs:
|
|||
parameters:
|
||||
Dockerfile: tools/ci_build/github/pai/rocm-ci-pipeline-env.Dockerfile
|
||||
Context: tools/ci_build/github/linux/docker
|
||||
DockerBuildArgs: "--build-arg ROCM_VERSION=$(RocmVersion)"
|
||||
Repository: onnxruntimetrainingrocm-cibuild-rocm$(RocmVersion)-build
|
||||
|
||||
#- script: |-
|
||||
|
|
@ -130,7 +131,7 @@ jobs:
|
|||
parameters:
|
||||
Dockerfile: tools/ci_build/github/pai/rocm-ci-pipeline-env.Dockerfile
|
||||
Context: tools/ci_build/github/linux/docker
|
||||
DockerBuildArgs: "--build-arg BUILD_UID=$( id -u )"
|
||||
DockerBuildArgs: "--build-arg BUILD_UID=$( id -u ) --build-arg ROCM_VERSION=$(RocmVersion)"
|
||||
Repository: onnxruntimetrainingrocm-cibuild-rocm$(RocmVersion)-test
|
||||
|
||||
- task: Bash@3
|
||||
|
|
|
|||
|
|
@ -1,13 +1,42 @@
|
|||
FROM rocm/pytorch:rocm5.5_ubuntu20.04_py3.8_pytorch_1.13.1
|
||||
# Refer to https://github.com/RadeonOpenCompute/ROCm-docker/blob/master/dev/Dockerfile-ubuntu-22.04-complete
|
||||
FROM ubuntu:22.04
|
||||
|
||||
# MIGraphX version should be the same as ROCm version
|
||||
ARG MIGRAPHX_VERSION=rocm-5.5.0
|
||||
ARG ROCM_VERSION=5.6
|
||||
ARG AMDGPU_VERSION=${ROCM_VERSION}
|
||||
ARG APT_PREF='Package: *\nPin: release o=repo.radeon.com\nPin-Priority: 600'
|
||||
|
||||
CMD ["/bin/bash"]
|
||||
|
||||
RUN echo "$APT_PREF" > /etc/apt/preferences.d/rocm-pin-600
|
||||
|
||||
ENV DEBIAN_FRONTEND noninteractive
|
||||
ENV MIGRAPHX_DISABLE_FAST_GELU=1
|
||||
|
||||
RUN apt-get update && \
|
||||
apt-get install -y --no-install-recommends ca-certificates curl libnuma-dev gnupg && \
|
||||
curl -sL https://repo.radeon.com/rocm/rocm.gpg.key | apt-key add - &&\
|
||||
printf "deb [arch=amd64] https://repo.radeon.com/rocm/apt/$ROCM_VERSION/ jammy main" | tee /etc/apt/sources.list.d/rocm.list && \
|
||||
printf "deb [arch=amd64] https://repo.radeon.com/amdgpu/$AMDGPU_VERSION/ubuntu jammy main" | tee /etc/apt/sources.list.d/amdgpu.list && \
|
||||
apt-get update && apt-get install -y --no-install-recommends \
|
||||
sudo \
|
||||
libelf1 \
|
||||
kmod \
|
||||
file \
|
||||
python3 \
|
||||
python3-pip \
|
||||
rocm-dev \
|
||||
rocm-libs \
|
||||
build-essential && \
|
||||
apt-get clean && \
|
||||
rm -rf /var/lib/apt/lists/*
|
||||
|
||||
RUN groupadd -g 109 render
|
||||
|
||||
# Upgrade to meet security requirements
|
||||
RUN apt-get update -y && apt-get upgrade -y && apt-get autoremove -y && \
|
||||
apt-get install -y locales unzip && apt-get clean -y
|
||||
apt-get install -y locales cifs-utils wget half libnuma-dev lsb-release && \
|
||||
apt-get clean -y
|
||||
|
||||
ENV MIGRAPHX_DISABLE_FAST_GELU=1
|
||||
RUN locale-gen en_US.UTF-8
|
||||
RUN update-locale LANG=en_US.UTF-8
|
||||
ENV LC_ALL C.UTF-8
|
||||
|
|
@ -15,28 +44,11 @@ ENV LANG C.UTF-8
|
|||
|
||||
WORKDIR /stage
|
||||
|
||||
ADD scripts /tmp/scripts
|
||||
RUN /tmp/scripts/install_os_deps.sh
|
||||
|
||||
# from rocm/pytorch's image, work around ucx's dlopen replacement conflicting with shared provider
|
||||
RUN cd /opt/mpi_install/ucx/build &&\
|
||||
make clean &&\
|
||||
../contrib/configure-release --prefix=/opt/ucx --without-rocm &&\
|
||||
make -j $(nproc) &&\
|
||||
make install
|
||||
|
||||
RUN apt-get update &&\
|
||||
apt-get install -y half libnuma-dev
|
||||
|
||||
# Install rbuild
|
||||
RUN pip3 install https://github.com/RadeonOpenCompute/rbuild/archive/master.tar.gz numpy yapf==0.28.0
|
||||
|
||||
# Install MIGraphX from source
|
||||
RUN mkdir -p /migraphx
|
||||
RUN cd /migraphx && git clone --depth=1 --branch ${MIGRAPHX_VERSION} https://github.com/ROCmSoftwarePlatform/AMDMIGraphX src
|
||||
RUN cd /migraphx && rbuild package --cxx /opt/rocm/llvm/bin/clang++ -d /migraphx/deps -B /migraphx/build -S /migraphx/src/ -DPYTHON_EXECUTABLE=/usr/bin/python3
|
||||
RUN dpkg -i /migraphx/build/*.deb
|
||||
RUN rm -rf /migraphx
|
||||
# Cmake
|
||||
ENV CMAKE_VERSION=3.26.3
|
||||
RUN cd /usr/local && \
|
||||
wget -q https://github.com/Kitware/CMake/releases/download/v${CMAKE_VERSION}/cmake-${CMAKE_VERSION}-Linux-x86_64.tar.gz && \
|
||||
tar -zxf /usr/local/cmake-3.26.3-Linux-x86_64.tar.gz --strip=1 -C /usr
|
||||
|
||||
# ccache
|
||||
RUN mkdir -p /tmp/ccache && \
|
||||
|
|
@ -44,3 +56,28 @@ RUN mkdir -p /tmp/ccache && \
|
|||
wget -q -O - https://github.com/ccache/ccache/releases/download/v4.7.4/ccache-4.7.4-linux-x86_64.tar.xz | tar --strip 1 -J -xf - && \
|
||||
cp /tmp/ccache/ccache /usr/bin && \
|
||||
rm -rf /tmp/ccache
|
||||
|
||||
# Install Conda
|
||||
ENV PATH /opt/miniconda/bin:${PATH}
|
||||
RUN wget --quiet https://repo.anaconda.com/miniconda/Miniconda3-latest-Linux-x86_64.sh -O ~/miniconda.sh --no-check-certificate && /bin/bash ~/miniconda.sh -b -p /opt/miniconda && \
|
||||
conda init bash && \
|
||||
conda config --set auto_activate_base false && \
|
||||
conda update --all && \
|
||||
rm ~/miniconda.sh && conda clean -ya
|
||||
|
||||
# Conda base patch
|
||||
RUN pip install cryptography==41.0.0
|
||||
|
||||
# Create migraphx-ci environment
|
||||
ENV CONDA_ENVIRONMENT_PATH /opt/miniconda/envs/migraphx-ci
|
||||
ENV CONDA_DEFAULT_ENV migraphx-ci
|
||||
RUN conda create -y -n ${CONDA_DEFAULT_ENV} python=3.8
|
||||
ENV PATH ${CONDA_ENVIRONMENT_PATH}/bin:${PATH}
|
||||
|
||||
# Enable migraphx-ci environment
|
||||
SHELL ["conda", "run", "-n", "migraphx-ci", "/bin/bash", "-c"]
|
||||
|
||||
# Install migraphx
|
||||
RUN apt update && apt install -y migraphx
|
||||
|
||||
RUN pip install numpy packaging
|
||||
|
|
|
|||
|
|
@ -1,8 +1,38 @@
|
|||
FROM rocm/cupy:rocm5.5.0_ubuntu20.04_py3.8_pytorch2.0.0_cupy13.0.0
|
||||
# Refer to https://github.com/RadeonOpenCompute/ROCm-docker/blob/master/dev/Dockerfile-ubuntu-22.04-complete
|
||||
FROM ubuntu:22.04
|
||||
|
||||
ARG ROCM_VERSION=5.6
|
||||
ARG AMDGPU_VERSION=${ROCM_VERSION}
|
||||
ARG APT_PREF='Package: *\nPin: release o=repo.radeon.com\nPin-Priority: 600'
|
||||
|
||||
CMD ["/bin/bash"]
|
||||
|
||||
RUN echo "$APT_PREF" > /etc/apt/preferences.d/rocm-pin-600
|
||||
|
||||
ENV DEBIAN_FRONTEND noninteractive
|
||||
|
||||
RUN apt-get update && \
|
||||
apt-get install -y --no-install-recommends ca-certificates curl libnuma-dev gnupg && \
|
||||
curl -sL https://repo.radeon.com/rocm/rocm.gpg.key | apt-key add - &&\
|
||||
printf "deb [arch=amd64] https://repo.radeon.com/rocm/apt/$ROCM_VERSION/ jammy main" | tee /etc/apt/sources.list.d/rocm.list && \
|
||||
printf "deb [arch=amd64] https://repo.radeon.com/amdgpu/$AMDGPU_VERSION/ubuntu jammy main" | tee /etc/apt/sources.list.d/amdgpu.list && \
|
||||
apt-get update && apt-get install -y --no-install-recommends \
|
||||
sudo \
|
||||
libelf1 \
|
||||
kmod \
|
||||
file \
|
||||
python3 \
|
||||
python3-pip \
|
||||
rocm-dev \
|
||||
rocm-libs \
|
||||
build-essential && \
|
||||
apt-get clean && \
|
||||
rm -rf /var/lib/apt/lists/*
|
||||
|
||||
RUN groupadd -g 109 render
|
||||
|
||||
RUN apt-get update -y && apt-get upgrade -y && apt-get autoremove -y libprotobuf\* protobuf-compiler\* && \
|
||||
rm -f /usr/local/bin/protoc && apt-get install -y locales unzip && apt-get clean -y
|
||||
rm -f /usr/local/bin/protoc && apt-get install -y locales unzip wget git && apt-get clean -y
|
||||
RUN locale-gen en_US.UTF-8
|
||||
RUN update-locale LANG=en_US.UTF-8
|
||||
ENV LC_ALL C.UTF-8
|
||||
|
|
@ -10,13 +40,6 @@ ENV LANG C.UTF-8
|
|||
|
||||
WORKDIR /stage
|
||||
|
||||
# from rocm/pytorch's image, work around ucx's dlopen replacement conflicting with shared provider
|
||||
RUN cd /opt/mpi_install/ucx/build &&\
|
||||
make clean &&\
|
||||
../contrib/configure-release --prefix=/opt/ucx --without-rocm &&\
|
||||
make -j $(nproc) &&\
|
||||
make install
|
||||
|
||||
# CMake
|
||||
ENV CMAKE_VERSION=3.26.3
|
||||
RUN cd /usr/local && \
|
||||
|
|
@ -30,35 +53,83 @@ RUN mkdir -p /tmp/ccache && \
|
|||
cp /tmp/ccache/ccache /usr/bin && \
|
||||
rm -rf /tmp/ccache
|
||||
|
||||
RUN apt-get update && apt-get install -y cifs-utils
|
||||
# Install Conda
|
||||
ENV PATH /opt/miniconda/bin:${PATH}
|
||||
RUN wget --quiet https://repo.anaconda.com/miniconda/Miniconda3-latest-Linux-x86_64.sh -O ~/miniconda.sh --no-check-certificate && /bin/bash ~/miniconda.sh -b -p /opt/miniconda && \
|
||||
conda init bash && \
|
||||
conda config --set auto_activate_base false && \
|
||||
conda update --all && \
|
||||
rm ~/miniconda.sh && conda clean -ya
|
||||
|
||||
# Create rocm-ci environment
|
||||
ENV CONDA_ENVIRONMENT_PATH /opt/miniconda/envs/rocm-ci
|
||||
ENV CONDA_DEFAULT_ENV rocm-ci
|
||||
RUN conda create -y -n ${CONDA_DEFAULT_ENV} python=3.8
|
||||
ENV PATH ${CONDA_ENVIRONMENT_PATH}/bin:${PATH}
|
||||
|
||||
# Conda base patch
|
||||
RUN pip install cryptography==41.0.0
|
||||
|
||||
# Enable rocm-ci environment
|
||||
SHELL ["conda", "run", "-n", "rocm-ci", "/bin/bash", "-c"]
|
||||
|
||||
# ln -sf is needed to make sure that version `GLIBCXX_3.4.30' is found
|
||||
RUN ln -sf /usr/lib/x86_64-linux-gnu/libstdc++.so.6 ${CONDA_ENVIRONMENT_PATH}/bin/../lib/libstdc++.so.6
|
||||
|
||||
# Install Pytorch
|
||||
RUN pip install install torch==2.0.1 torchvision==0.15.2 -f https://repo.radeon.com/rocm/manylinux/rocm-rel-${ROCM_VERSION}/ && \
|
||||
pip install torch-ort --no-dependencies
|
||||
|
||||
|
||||
##### Install Cupy to decrease CPU utilization
|
||||
# Install non dev openmpi
|
||||
RUN wget https://download.open-mpi.org/release/open-mpi/v4.1/openmpi-4.1.5.tar.bz2 && \
|
||||
tar -jxf openmpi-4.1.5.tar.bz2 && \
|
||||
cd openmpi-4.1.5 && \
|
||||
./configure --prefix=/opt/ompi && \
|
||||
make -j4 all && \
|
||||
make install && \
|
||||
cd ../ && \
|
||||
rm -r openmpi-4.1.5 && \
|
||||
rm openmpi-4.1.5.tar.bz2
|
||||
|
||||
# Install CuPy, No stable version is available
|
||||
RUN git clone https://github.com/ROCmSoftwarePlatform/cupy && cd cupy && \
|
||||
git checkout fc251a808037f8a2270860c2a23a683bfc0de43e && \
|
||||
export CUPY_INSTALL_USE_HIP=1 && \
|
||||
export ROCM_HOME=/opt/rocm && \
|
||||
export HCC_AMDGPU_TARGET=gfx906,gfx908,gfx90a && \
|
||||
git submodule update --init && \
|
||||
pip install -e . --no-cache-dir -vvvv
|
||||
|
||||
##### Install transformers to run tests
|
||||
# rocm-ci branch contains instrumentation needed for loss curves and perf
|
||||
RUN git clone https://github.com/microsoft/huggingface-transformers.git &&\
|
||||
cd huggingface-transformers &&\
|
||||
git checkout rocm-ci &&\
|
||||
pip install -e .
|
||||
cd huggingface-transformers &&\
|
||||
git checkout rocm-ci &&\
|
||||
pip install -e .
|
||||
|
||||
RUN pip install \
|
||||
numpy==1.24.1 \
|
||||
onnx \
|
||||
cerberus \
|
||||
sympy \
|
||||
h5py \
|
||||
datasets==1.9.0 \
|
||||
requests \
|
||||
sacrebleu==1.5.1 \
|
||||
sacremoses \
|
||||
scipy==1.10.0 \
|
||||
scikit-learn \
|
||||
tokenizers \
|
||||
sentencepiece \
|
||||
dill==0.3.4 \
|
||||
wget \
|
||||
pytorch_lightning==1.6.0 \
|
||||
pytest-xdist \
|
||||
pytest-rerunfailures
|
||||
flatbuffers==2.0 \
|
||||
numpy==1.24.1 \
|
||||
onnx \
|
||||
cerberus \
|
||||
sympy \
|
||||
h5py \
|
||||
datasets==1.9.0 \
|
||||
requests \
|
||||
sacrebleu==1.5.1 \
|
||||
sacremoses \
|
||||
scipy==1.10.0 \
|
||||
scikit-learn \
|
||||
tokenizers \
|
||||
sentencepiece \
|
||||
wget \
|
||||
dill==0.3.4 \
|
||||
pytorch_lightning==1.6.0 \
|
||||
pytest-xdist \
|
||||
pytest-rerunfailures
|
||||
|
||||
RUN pip install torch-ort --no-dependencies
|
||||
ENV ORTMODULE_ONNX_OPSET_VERSION=15
|
||||
|
||||
ARG BUILD_UID=1001
|
||||
|
|
|
|||
Loading…
Reference in a new issue