diff --git a/tools/ci_build/github/azure-pipelines/orttraining-py-packaging-pipeline-torch182-cuda102.yml b/tools/ci_build/github/azure-pipelines/orttraining-py-packaging-pipeline-torch1100-cuda102.yml similarity index 63% rename from tools/ci_build/github/azure-pipelines/orttraining-py-packaging-pipeline-torch182-cuda102.yml rename to tools/ci_build/github/azure-pipelines/orttraining-py-packaging-pipeline-torch1100-cuda102.yml index d584c0f953..de73fab127 100644 --- a/tools/ci_build/github/azure-pipelines/orttraining-py-packaging-pipeline-torch182-cuda102.yml +++ b/tools/ci_build/github/azure-pipelines/orttraining-py-packaging-pipeline-torch1100-cuda102.yml @@ -4,9 +4,10 @@ stages: - template: templates/py-packaging-training-cuda-stage.yml parameters: build_py_parameters: --enable_training --update --build - torch_version: '1.8.2' + torch_version: '1.10.0' cuda_version: '10.2' gcc_version: 8 - cmake_cuda_architectures: 35;37;50;52;60;61;70 + cmake_cuda_architectures: 37;50;52;60;61;70;75;80;86 docker_file: Dockerfile.manylinux2014_training_cuda10_2 - agent_pool: Onnxruntime-Linux-GPU-NV6 + agent_pool: Onnxruntime-Linux-GPU + upload_wheel: 'yes' diff --git a/tools/ci_build/github/azure-pipelines/orttraining-py-packaging-pipeline-torch1100-cuda113.yml b/tools/ci_build/github/azure-pipelines/orttraining-py-packaging-pipeline-torch1100-cuda113.yml new file mode 100644 index 0000000000..6d2575b5b6 --- /dev/null +++ b/tools/ci_build/github/azure-pipelines/orttraining-py-packaging-pipeline-torch1100-cuda113.yml @@ -0,0 +1,13 @@ +trigger: none + +stages: +- template: templates/py-packaging-training-cuda-stage.yml + parameters: + build_py_parameters: --enable_training --update --build + torch_version: '1.10.0' + cuda_version: '11.3' + gcc_version: 10 + cmake_cuda_architectures: 37;50;52;60;61;70;75;80;86 + docker_file: Dockerfile.manylinux2014_training_cuda11_3 + agent_pool: Onnxruntime-Linux-GPU + upload_wheel: 'yes' diff --git a/tools/ci_build/github/azure-pipelines/orttraining-py-packaging-pipeline-torch182-cuda111.yml b/tools/ci_build/github/azure-pipelines/orttraining-py-packaging-pipeline-torch182-cuda111.yml deleted file mode 100644 index 8781bd04ad..0000000000 --- a/tools/ci_build/github/azure-pipelines/orttraining-py-packaging-pipeline-torch182-cuda111.yml +++ /dev/null @@ -1,12 +0,0 @@ -trigger: none - -stages: -- template: templates/py-packaging-training-cuda-stage.yml - parameters: - build_py_parameters: --enable_training --update --build - torch_version: '1.8.2' - cuda_version: '11.1' - gcc_version: 9 - cmake_cuda_architectures: 37;50;52;60;61;70;75;80 - docker_file: Dockerfile.manylinux2014_training_cuda11_1 - agent_pool: Onnxruntime-Linux-GPU diff --git a/tools/ci_build/github/linux/docker/Dockerfile.manylinux2014_training_cuda11_3 b/tools/ci_build/github/linux/docker/Dockerfile.manylinux2014_training_cuda11_3 new file mode 100644 index 0000000000..1ba6696f7c --- /dev/null +++ b/tools/ci_build/github/linux/docker/Dockerfile.manylinux2014_training_cuda11_3 @@ -0,0 +1,202 @@ +ARG BASEIMAGE=nvcr.io/nvidia/cuda:11.3-cudnn8-devel-centos7 +ARG POLICY=manylinux2014 +ARG PLATFORM=x86_64 +ARG DEVTOOLSET_ROOTPATH= +ARG LD_LIBRARY_PATH_ARG= +ARG PREPEND_PATH= + +#We need both CUDA and manylinux. But the CUDA Toolkit End User License Agreement says NVIDIA CUDA Driver Libraries(libcuda.so, libnvidia-ptxjitcompiler.so) are only distributable in applications that meet this criteria: +#1. The application was developed starting from a NVIDIA CUDA container obtained from Docker Hub or the NVIDIA GPU Cloud, and +#2. The resulting application is packaged as a Docker container and distributed to users on Docker Hub or the NVIDIA GPU Cloud only. +#So we use CUDA as the base image then add manylinux on top of it. + +#Build manylinux2014 docker image begin +FROM $BASEIMAGE AS runtime_base +ARG POLICY +ARG PLATFORM +ARG DEVTOOLSET_ROOTPATH +ARG LD_LIBRARY_PATH_ARG +ARG PREPEND_PATH +LABEL maintainer="The ManyLinux project" + +ENV AUDITWHEEL_POLICY=${POLICY} AUDITWHEEL_ARCH=${PLATFORM} AUDITWHEEL_PLAT=${POLICY}_${PLATFORM} +ENV LC_ALL=en_US.UTF-8 LANG=en_US.UTF-8 LANGUAGE=en_US.UTF-8 +ENV DEVTOOLSET_ROOTPATH=${DEVTOOLSET_ROOTPATH} +ENV LD_LIBRARY_PATH=${LD_LIBRARY_PATH_ARG} +ENV PATH=${PREPEND_PATH}${PATH} +ENV PKG_CONFIG_PATH=/usr/local/lib/pkgconfig + +# first copy the fixup mirrors script, keep the script around +COPY build_scripts/fixup-mirrors.sh /usr/local/sbin/fixup-mirrors + +# setup entrypoint, this will wrap commands with `linux32` with i686 images +COPY build_scripts/install-entrypoint.sh \ + build_scripts/update-system-packages.sh \ + build_scripts/build_utils.sh \ + /build_scripts/ + +RUN /build_scripts/install-entrypoint.sh && rm -rf /build_scripts +COPY manylinux-entrypoint /usr/local/bin/manylinux-entrypoint +ENTRYPOINT ["manylinux-entrypoint"] + +COPY build_scripts/install-runtime-packages.sh \ + build_scripts/update-system-packages.sh \ + build_scripts/build_utils.sh \ + /build_scripts/ +RUN manylinux-entrypoint /build_scripts/install-runtime-packages.sh && rm -rf /build_scripts/ + +COPY build_scripts/build_utils.sh /build_scripts/ + +COPY build_scripts/install-autoconf.sh /build_scripts/ +RUN export AUTOCONF_ROOT=autoconf-2.71 && \ + export AUTOCONF_HASH=431075ad0bf529ef13cb41e9042c542381103e80015686222b8a9d4abef42a1c && \ + export AUTOCONF_DOWNLOAD_URL=http://ftp.gnu.org/gnu/autoconf && \ + manylinux-entrypoint /build_scripts/install-autoconf.sh + +COPY build_scripts/install-automake.sh /build_scripts/ +RUN export AUTOMAKE_ROOT=automake-1.16.5 && \ + export AUTOMAKE_HASH=07bd24ad08a64bc17250ce09ec56e921d6343903943e99ccf63bbf0705e34605 && \ + export AUTOMAKE_DOWNLOAD_URL=http://ftp.gnu.org/gnu/automake && \ + manylinux-entrypoint /build_scripts/install-automake.sh + +COPY build_scripts/install-libtool.sh /build_scripts/ +RUN export LIBTOOL_ROOT=libtool-2.4.6 && \ + export LIBTOOL_HASH=e3bd4d5d3d025a36c21dd6af7ea818a2afcd4dfc1ea5a17b39d7854bcd0c06e3 && \ + export LIBTOOL_DOWNLOAD_URL=http://ftp.gnu.org/gnu/libtool && \ + manylinux-entrypoint /build_scripts/install-libtool.sh + +COPY build_scripts/install-patchelf.sh /build_scripts/ +RUN export PATCHELF_VERSION=0.13 && \ + export PATCHELF_HASH=60c6aeadb673de9cc1838b630c81f61e31c501de324ef7f1e8094a2431197d09 && \ + export PATCHELF_DOWNLOAD_URL=https://github.com/NixOS/patchelf/archive && \ + manylinux-entrypoint /build_scripts/install-patchelf.sh + +COPY build_scripts/install-libxcrypt.sh /build_scripts/ +RUN export LIBXCRYPT_VERSION=4.4.26 && \ + export LIBXCRYPT_HASH=e8a544dd19171c1e6191a6044c96cc31496d781ba08b5a00f53310d001d58114 && \ + export LIBXCRYPT_DOWNLOAD_URL=https://github.com/besser82/libxcrypt/archive && \ + export PERL_ROOT=perl-5.34.0 && \ + export PERL_HASH=551efc818b968b05216024fb0b727ef2ad4c100f8cb6b43fab615fa78ae5be9a && \ + export PERL_DOWNLOAD_URL=https://www.cpan.org/src/5.0 && \ + manylinux-entrypoint /build_scripts/install-libxcrypt.sh + +FROM runtime_base AS build_base +COPY build_scripts/install-build-packages.sh /build_scripts/ +RUN manylinux-entrypoint /build_scripts/install-build-packages.sh + + +FROM build_base AS build_git +COPY build_scripts/build-git.sh /build_scripts/ +RUN export GIT_ROOT=git-2.33.1 && \ + export GIT_HASH=02047f8dc8934d57ff5e02aadd8a2fe8e0bcf94a7158da375e48086cc46fce1d && \ + export GIT_DOWNLOAD_URL=https://www.kernel.org/pub/software/scm/git && \ + manylinux-entrypoint /build_scripts/build-git.sh + + +FROM build_base AS build_swig +COPY build_scripts/build-swig.sh /build_scripts/ +RUN export SWIG_ROOT=swig-4.0.2 && \ + export SWIG_HASH=d53be9730d8d58a16bf0cbd1f8ac0c0c3e1090573168bfa151b01eb47fa906fc && \ + export SWIG_DOWNLOAD_URL=https://sourceforge.net/projects/swig/files/swig/${SWIG_ROOT} && \ + export PCRE_ROOT=pcre-8.45 && \ + export PCRE_HASH=4e6ce03e0336e8b4a3d6c2b70b1c5e18590a5673a98186da90d4f33c23defc09 && \ + export PCRE_DOWNLOAD_URL=https://sourceforge.net/projects/pcre/files/pcre/8.45 && \ + manylinux-entrypoint /build_scripts/build-swig.sh + + +FROM build_base AS build_cpython +COPY build_scripts/build-sqlite3.sh /build_scripts/ +RUN export SQLITE_AUTOCONF_ROOT=sqlite-autoconf-3360000 && \ + export SQLITE_AUTOCONF_HASH=bd90c3eb96bee996206b83be7065c9ce19aef38c3f4fb53073ada0d0b69bbce3 && \ + export SQLITE_AUTOCONF_DOWNLOAD_URL=https://www.sqlite.org/2021 && \ + manylinux-entrypoint /build_scripts/build-sqlite3.sh + +COPY build_scripts/build-openssl.sh /build_scripts/ +RUN export OPENSSL_ROOT=openssl-1.1.1l && \ + export OPENSSL_HASH=0b7a3e5e59c34827fe0c3a74b7ec8baef302b98fa80088d7f9153aa16fa76bd1 && \ + export OPENSSL_DOWNLOAD_URL=https://www.openssl.org/source && \ + manylinux-entrypoint /build_scripts/build-openssl.sh + +COPY build_scripts/build-cpython.sh /build_scripts/ + + +FROM build_cpython AS build_cpython36 +COPY build_scripts/cpython-pubkeys.txt /build_scripts/cpython-pubkeys.txt +RUN manylinux-entrypoint /build_scripts/build-cpython.sh 3.6.15 + + +FROM build_cpython AS build_cpython37 +COPY build_scripts/cpython-pubkeys.txt /build_scripts/cpython-pubkeys.txt +RUN manylinux-entrypoint /build_scripts/build-cpython.sh 3.7.12 + + +FROM build_cpython AS build_cpython38 +COPY build_scripts/ambv-pubkey.txt /build_scripts/cpython-pubkeys.txt +RUN manylinux-entrypoint /build_scripts/build-cpython.sh 3.8.12 + + +FROM build_cpython AS build_cpython39 +COPY build_scripts/ambv-pubkey.txt /build_scripts/cpython-pubkeys.txt +RUN manylinux-entrypoint /build_scripts/build-cpython.sh 3.9.8 + + +FROM build_cpython AS build_cpython310 +COPY build_scripts/cpython-pubkey-310-311.txt /build_scripts/cpython-pubkeys.txt +RUN manylinux-entrypoint /build_scripts/build-cpython.sh 3.10.0 + + +FROM build_cpython AS all_python +COPY build_scripts/install-pypy.sh /build_scripts/install-pypy.sh +COPY build_scripts/pypy.sha256 /build_scripts/pypy.sha256 +RUN manylinux-entrypoint /build_scripts/install-pypy.sh 3.7 7.3.7 +RUN manylinux-entrypoint /build_scripts/install-pypy.sh 3.8 7.3.7 +COPY --from=build_cpython36 /opt/_internal /opt/_internal/ +COPY --from=build_cpython37 /opt/_internal /opt/_internal/ +COPY --from=build_cpython38 /opt/_internal /opt/_internal/ +COPY --from=build_cpython39 /opt/_internal /opt/_internal/ +COPY --from=build_cpython310 /opt/_internal /opt/_internal/ +RUN hardlink -cv /opt/_internal + + +FROM runtime_base +COPY --from=build_git /manylinux-rootfs / +COPY --from=build_swig /manylinux-rootfs / +COPY --from=build_cpython /manylinux-rootfs / +COPY --from=all_python /opt/_internal /opt/_internal/ +COPY build_scripts/finalize.sh \ + build_scripts/update-system-packages.sh \ + build_scripts/python-tag-abi-tag.py \ + build_scripts/requirements3.6.txt \ + build_scripts/requirements3.7.txt \ + build_scripts/requirements3.8.txt \ + build_scripts/requirements3.9.txt \ + build_scripts/requirements3.10.txt \ + build_scripts/requirements-base-tools.txt \ + /build_scripts/ +COPY build_scripts/requirements-tools/* /build_scripts/requirements-tools/ +RUN manylinux-entrypoint /build_scripts/finalize.sh && rm -rf /build_scripts + +ENV SSL_CERT_FILE=/opt/_internal/certs.pem + +CMD ["/bin/bash"] + +#Build manylinux2014 docker image end +ARG PYTHON_VERSION=3.6 +ARG TORCH_VERSION=1.10.0 +ARG INSTALL_DEPS_EXTRA_ARGS + +#Add our own dependencies +ADD scripts /tmp/scripts +RUN cd /tmp/scripts && \ + /tmp/scripts/manylinux/install_centos.sh && \ + /tmp/scripts/install_os_deps.sh -d gpu $INSTALL_DEPS_EXTRA_ARGS && \ + /tmp/scripts/install_ninja.sh && \ + /tmp/scripts/install_python_deps.sh -d gpu -v 11.1 -p $PYTHON_VERSION -h $TORCH_VERSION $INSTALL_DEPS_EXTRA_ARGS && \ + rm -rf /tmp/scripts + +ARG BUILD_UID=1001 +ARG BUILD_USER=onnxruntimedev +RUN adduser --uid $BUILD_UID $BUILD_USER +WORKDIR /home/$BUILD_USER +USER $BUILD_USER +ENV PATH /usr/local/gradle/bin:/usr/local/dotnet:$PATH diff --git a/tools/ci_build/github/linux/docker/scripts/training/ortmodule/stage1/requirements_torch1.10.0_cu10.2.txt b/tools/ci_build/github/linux/docker/scripts/training/ortmodule/stage1/requirements_torch1.10.0_cu10.2.txt new file mode 100644 index 0000000000..589234fa09 --- /dev/null +++ b/tools/ci_build/github/linux/docker/scripts/training/ortmodule/stage1/requirements_torch1.10.0_cu10.2.txt @@ -0,0 +1,6 @@ +--pre +-f https://download.pytorch.org/whl/torch_stable.html +torch==1.10.0 +torchvision==0.11.0 +torchtext==0.10.0 +setuptools>=41.4.0 diff --git a/tools/ci_build/github/linux/docker/scripts/training/ortmodule/stage1/requirements_torch1.10.0_cu11.3.txt b/tools/ci_build/github/linux/docker/scripts/training/ortmodule/stage1/requirements_torch1.10.0_cu11.3.txt new file mode 100644 index 0000000000..2e1ff22b31 --- /dev/null +++ b/tools/ci_build/github/linux/docker/scripts/training/ortmodule/stage1/requirements_torch1.10.0_cu11.3.txt @@ -0,0 +1,6 @@ +--pre +-f https://download.pytorch.org/whl/cu113/torch_stable.html +torch==1.10.0+cu113 +torchvision==0.11.0+cu113 +torchtext==0.10.0+cu113 +setuptools>=41.4.0 diff --git a/tools/ci_build/github/linux/docker/scripts/training/ortmodule/stage1/requirements_torch1.8.2_cu10.2.txt b/tools/ci_build/github/linux/docker/scripts/training/ortmodule/stage1/requirements_torch1.8.2_cu10.2.txt deleted file mode 100644 index ecb522fd99..0000000000 --- a/tools/ci_build/github/linux/docker/scripts/training/ortmodule/stage1/requirements_torch1.8.2_cu10.2.txt +++ /dev/null @@ -1,6 +0,0 @@ ---pre --f https://download.pytorch.org/whl/lts/1.8/torch_lts.html -torch==1.8.2+cu102 -torchvision==0.9.2+cu102 -torchtext==0.9.2 -setuptools>=41.4.0 diff --git a/tools/ci_build/github/linux/docker/scripts/training/ortmodule/stage1/requirements_torch1.8.2_cu11.1.txt b/tools/ci_build/github/linux/docker/scripts/training/ortmodule/stage1/requirements_torch1.8.2_cu11.1.txt deleted file mode 100644 index b3bdccd909..0000000000 --- a/tools/ci_build/github/linux/docker/scripts/training/ortmodule/stage1/requirements_torch1.8.2_cu11.1.txt +++ /dev/null @@ -1,6 +0,0 @@ ---pre --f https://download.pytorch.org/whl/lts/1.8/torch_lts.html -torch==1.8.2+cu111 -torchvision==0.9.2+cu111 -torchtext==0.9.2 -setuptools>=41.4.0