mirror of
https://github.com/saymrwulf/onnxruntime.git
synced 2026-06-06 00:03:22 +00:00
Docker image release pipeline (#4682)
* create orttraining-1p-linux-gpu-ci-pipeline.yml * fix syntax * fix file path * fix template path * publish docker image to test acr * use right task name * change parameter list * use variables * use python.version * remove --enable_onnx_tests due to segfault * add back --enable_onnx_tests * fix docker push command line * change docker login command * login differently * fix docker tag script * create password.txt * add ortrelease docker image * enable test in build.sh * add pipeline parameter * add pipeline parameter * change timeout * change timeout * fix run_dockerbuild.sh * use PR checkin build docker * fix strategy syntax * fix strategy syntax * change dockerfile * change run_dockerbuild.sh * change tag name * build with root user * use build id for docker image tag * remove all user lines * change docker tag * add mpi, mellanox * add missing args * use release dockerfile for ci build * remove install wheel * use release docker image * fix syntax * use different pool * add Dockerfile.training * remove sudo to run on Linux-Multi-GPU-V100 * change docker file path * update dockerfile * use latest dockerfile * change agent pool * remove --preserve-env * add back parameter * Add test_flag * use azuredevops docker * change repository * use cmd for docker login * echo build script * use ortrelrease ACR * change key vault connection * Move --build flag * change build command * add paramter for image tag * clean up for PR * remove unnecessary changes * whitespace changes * whitespace changes * change build flag * change flag name * change flag * use latest dockerfile * enable build tests * build builder stage and run test * Add back python.version * change build directory * always run build entire dockerfile * fix yml syntax * fix syntax * add en-UTF8 locale * rename * remove unused template * Update orttraining-linux-gpu-docker-release-pipeline.yml for Azure Pipelines * Update orttraining-linux-gpu-docker-release-pipeline.yml for Azure Pipelines * Test commit sha1 in pipeline * fix parameter * update docker file * fix --from=build * remove commented blocks * PR comments * fix syntax * fix syntax * use timestamp as build number * remove latest tag * add build_timestamp variable * remove wrong property * fix docker run command * test build id * Use datestamp build id * change build tags * add no-cache to docker build * rename BUILD_VERSION -> BUILD_CONFIG Co-authored-by: Jingyan Wang <jingywa@OrtDevTest2v100.af05slrtruoetgaxwwjv5nsq5e.px.internal.cloudapp.net> Co-authored-by: Jingyan Wang <jingywa@OrtTrainingDev3.af05slrtruoetgaxwwjv5nsq5e.px.internal.cloudapp.net>
This commit is contained in:
parent
8a66ad79a6
commit
adda8c66d9
2 changed files with 95 additions and 12 deletions
|
|
@ -9,11 +9,12 @@ ARG NUMPY_VERSION=1.18.5
|
|||
ARG ONNX_VERSION=1.7.0
|
||||
ARG PYTORCH_VERSION=1.6.0
|
||||
|
||||
ARG BUILD_VERSION=Release
|
||||
ARG BUILD_CONFIG=Release
|
||||
ARG OPENMPI_PATH=/opt/openmpi-${OPENMPI_VERSION}
|
||||
ARG COMMIT=master
|
||||
|
||||
# cuda development image for building sources
|
||||
FROM nvidia/cuda:10.2-cudnn7-devel-ubuntu18.04 as build
|
||||
FROM nvidia/cuda:10.2-cudnn7-devel-ubuntu18.04 as builder
|
||||
|
||||
# set location for builds
|
||||
WORKDIR /stage
|
||||
|
|
@ -22,8 +23,12 @@ WORKDIR /stage
|
|||
RUN apt-get -y update &&\
|
||||
apt-get -y --no-install-recommends install \
|
||||
curl \
|
||||
git
|
||||
|
||||
git \
|
||||
language-pack-en
|
||||
|
||||
RUN locale-gen en_US.UTF-8 && \
|
||||
update-locale LANG=en_US.UTF-8
|
||||
|
||||
# install miniconda (comes with python 3.7 default)
|
||||
ARG CONDA_VERSION
|
||||
ARG CONDA_URL=https://repo.anaconda.com/miniconda/Miniconda3-${CONDA_VERSION}-Linux-x86_64.sh
|
||||
|
|
@ -112,16 +117,17 @@ RUN pip install torch==${PYTORCH_VERSION}
|
|||
# pip install build/wheel/*.whl
|
||||
|
||||
# build onnxruntime wheel with cuda and mpi support
|
||||
ARG BUILD_VERSION
|
||||
ARG BUILD_CONFIG
|
||||
ARG COMMIT
|
||||
RUN cd /stage && git clone https://github.com/microsoft/onnxruntime.git &&\
|
||||
cd onnxruntime &&\
|
||||
git checkout master &&\
|
||||
git checkout ${COMMIT} &&\
|
||||
cp ThirdPartyNotices.txt /stage/ThirdPartyNotices.txt &&\
|
||||
cp dockerfiles/LICENSE-IMAGE.txt /stage/LICENSE-IMAGE.txt &&\
|
||||
python tools/ci_build/build.py \
|
||||
--cmake_extra_defines \
|
||||
ONNXRUNTIME_VERSION=`cat ./VERSION_NUMBER` \
|
||||
--config ${BUILD_VERSION} \
|
||||
--config ${BUILD_CONFIG} \
|
||||
--enable_training \
|
||||
--mpi_home ${OPENMPI_PATH} \
|
||||
--use_cuda \
|
||||
|
|
@ -134,7 +140,7 @@ RUN cd /stage && git clone https://github.com/microsoft/onnxruntime.git &&\
|
|||
--build \
|
||||
--build_wheel \
|
||||
--skip_tests &&\
|
||||
pip install build/${BUILD_VERSION}/dist/*.whl
|
||||
pip install build/${BUILD_CONFIG}/dist/*.whl
|
||||
|
||||
# switch to cuda runtime environment
|
||||
# note: launch with --gpus all or nvidia-docker
|
||||
|
|
@ -143,7 +149,7 @@ WORKDIR /stage
|
|||
|
||||
# install ucx
|
||||
# note: launch with --cap-add=sys_nice to avoid 'mbind' warnings
|
||||
COPY --from=build /opt/ucx /opt/ucx
|
||||
COPY --from=builder /opt/ucx /opt/ucx
|
||||
ENV PATH=/opt/ucx/bin:$PATH
|
||||
ENV LD_LIBRARY_PATH=/opt/ucx/lib:$LD_LIBRARY_PATH
|
||||
|
||||
|
|
@ -152,7 +158,7 @@ ENV LD_LIBRARY_PATH=/opt/ucx/lib:$LD_LIBRARY_PATH
|
|||
# note: enforce openmpi select ucx or fail
|
||||
ARG OPENMPI_VERSION
|
||||
ARG OPENMPI_PATH
|
||||
COPY --from=build ${OPENMPI_PATH} ${OPENMPI_PATH}
|
||||
COPY --from=builder ${OPENMPI_PATH} ${OPENMPI_PATH}
|
||||
ENV PATH=${OPENMPI_PATH}/bin:$PATH
|
||||
ENV LD_LIBRARY_PATH=${OPENMPI_PATH}/lib:$LD_LIBRARY_PATH
|
||||
ENV OMPI_ALLOW_RUN_AS_ROOT=1
|
||||
|
|
@ -166,7 +172,7 @@ RUN apt-get -y update && apt-get -y --no-install-recommends install \
|
|||
ldconfig
|
||||
|
||||
# copy conda environment (includes numpy, mpi4py, pytorch, onnxruntime)
|
||||
COPY --from=build /opt/conda /opt/conda
|
||||
COPY --from=builder /opt/conda /opt/conda
|
||||
ENV PATH=/opt/conda/bin:${PATH}
|
||||
|
||||
# make ssh/sshd less strict for wiring containers on Azure VM scale set
|
||||
|
|
@ -203,4 +209,4 @@ RUN conda remove -y cmake &&\
|
|||
apt-get autoremove -y &&\
|
||||
rm -fr /stage
|
||||
WORKDIR /workspace
|
||||
COPY --from=build /stage/*.txt /workspace/
|
||||
COPY --from=builder /stage/*.txt /workspace/
|
||||
|
|
|
|||
|
|
@ -0,0 +1,77 @@
|
|||
parameters:
|
||||
- name: commit
|
||||
displayName: Commit (Default to master or use sha1)
|
||||
type: string
|
||||
default: master
|
||||
- name: image_tag
|
||||
displayName: Image Tag
|
||||
type: string
|
||||
default: latest
|
||||
|
||||
variables:
|
||||
docker_image_prefix: onnxruntime-training
|
||||
linux_gpu_dockerfile: dockerfiles/Dockerfile.training
|
||||
build_config: Release
|
||||
|
||||
name: $(Date:yyyyMMdd)$(Rev:.r)
|
||||
jobs:
|
||||
- job: Linux_py_GPU_Build_Test_Release_Dockerfile
|
||||
timeoutInMinutes: 90
|
||||
workspace:
|
||||
clean: all
|
||||
pool: Linux-GPU-CUDA10
|
||||
steps:
|
||||
- task: CmdLine@2
|
||||
displayName: Build builder stage of docker file
|
||||
inputs:
|
||||
script: |
|
||||
docker build \
|
||||
--pull \
|
||||
-t ${{ variables.docker_image_prefix }}-manylinux-gpu-release-stage1 \
|
||||
--target builder \
|
||||
--no-cache \
|
||||
--build-arg COMMIT="${{ parameters.commit }}" \
|
||||
--build-arg BUILD_CONFIG="${{ variables.build_config }}" \
|
||||
-f ${{ variables.linux_gpu_dockerfile }} .
|
||||
workingDirectory: $(Build.SourcesDirectory)
|
||||
|
||||
- task: CmdLine@2
|
||||
displayName: Run tests
|
||||
inputs:
|
||||
script: |
|
||||
docker run \
|
||||
--gpus all \
|
||||
--rm \
|
||||
${{ variables.docker_image_prefix }}-manylinux-gpu-release-stage1 \
|
||||
python onnxruntime/tools/ci_build/build.py \
|
||||
--build_dir onnxruntime/build \
|
||||
--config ${{ variables.build_config }} \
|
||||
--test \
|
||||
--enable_onnx_tests
|
||||
workingDirectory: $(Build.SourcesDirectory)
|
||||
|
||||
- task: Docker@2
|
||||
displayName: Build entire docker file
|
||||
inputs:
|
||||
command: build
|
||||
containerRegistry: 'ortrelease'
|
||||
repository: 'onnxruntime-training'
|
||||
arguments: --build-arg COMMIT="${{ parameters.commit }}" --build-arg BUILD_CONFIG="${{ variables.build_config }}"
|
||||
Dockerfile: ${{ variables.linux_gpu_dockerfile }}
|
||||
tags: |
|
||||
$(Build.BuildNumber)
|
||||
${{ parameters.image_tag }}
|
||||
|
||||
- task: Docker@2
|
||||
displayName: Push docker image
|
||||
inputs:
|
||||
command: push
|
||||
containerRegistry: 'ortrelease'
|
||||
repository: 'onnxruntime-training'
|
||||
tags: |
|
||||
$(Build.BuildNumber)
|
||||
${{ parameters.image_tag }}
|
||||
|
||||
- template: templates/component-governance-component-detection-steps.yml
|
||||
|
||||
- template: templates/clean-agent-build-directory-step.yml
|
||||
Loading…
Reference in a new issue