diff --git a/onnxruntime/core/providers/rocm/rocm_execution_provider_info.cc b/onnxruntime/core/providers/rocm/rocm_execution_provider_info.cc index ba63e185c7..d0635e29b8 100644 --- a/onnxruntime/core/providers/rocm/rocm_execution_provider_info.cc +++ b/onnxruntime/core/providers/rocm/rocm_execution_provider_info.cc @@ -2,6 +2,7 @@ // Licensed under the MIT License. #include "core/providers/rocm/rocm_execution_provider_info.h" +#include "core/providers/rocm/rocm_common.h" #include "core/common/make_string.h" #include "core/framework/provider_options_utils.h" @@ -48,8 +49,20 @@ ROCMExecutionProviderInfo ROCMExecutionProviderInfo::FromProviderOptions(const P free = reinterpret_cast(address); return Status::OK(); }) - // TODO validate info.device_id - .AddAssignmentToReference(rocm::provider_option_names::kDeviceId, info.device_id) + .AddValueParser( + rocm::provider_option_names::kDeviceId, + [&info](const std::string& value_str) -> Status { + ORT_RETURN_IF_ERROR(ParseStringWithClassicLocale(value_str, info.device_id)); + int num_devices{}; + ORT_RETURN_IF_NOT( + HIP_CALL(hipGetDeviceCount(&num_devices)), + "hipGetDeviceCount() failed."); + ORT_RETURN_IF_NOT( + 0 <= info.device_id && info.device_id < num_devices, + "Invalid device ID: ", info.device_id, + ", must be between 0 (inclusive) and ", num_devices, " (exclusive)."); + return Status::OK(); + }) .AddAssignmentToReference(rocm::provider_option_names::kMemLimit, info.gpu_mem_limit) .AddAssignmentToReference(rocm::provider_option_names::kConvExhaustiveSearch, info.miopen_conv_exhaustive_search) .AddAssignmentToEnumReference( diff --git a/tools/ci_build/github/azure-pipelines/templates/py-packaging-stage.yml b/tools/ci_build/github/azure-pipelines/templates/py-packaging-stage.yml index 0a82b2953e..8aebda0d1b 100644 --- a/tools/ci_build/github/azure-pipelines/templates/py-packaging-stage.yml +++ b/tools/ci_build/github/azure-pipelines/templates/py-packaging-stage.yml @@ -311,8 +311,10 @@ stages: - script: |- echo "Tests will run using HIP_VISIBLES_DEVICES=$HIP_VISIBLE_DEVICES" video_gid=$(getent group | awk '/video/ {split($0,a,":"); print(a[3])}') + echo "Found video_gid=$video_gid; attempting to set as pipeline variable" echo "##vso[task.setvariable variable=video]$video_gid" render_gid=$(getent group | awk '/render/ {split($0,a,":"); print(a[3])}') + echo "Found render_gid=$render_gid; attempting to set as pipeline variable" echo "##vso[task.setvariable variable=render]$render_gid" displayName: 'Find video and render gid to be mapped into container' @@ -449,11 +451,13 @@ stages: --security-opt seccomp=unconfined \ --volume $(Build.SourcesDirectory):/onnxruntime_src \ --volume $(Build.BinariesDirectory):/build \ + --entrypoint /bin/bash \ -e HIP_VISIBLE_DEVICES \ -e NIGHTLY_BUILD \ -e BUILD_BUILDNUMBER \ + -e PythonManylinuxDir=$(PythonManylinuxdir) \ onnxruntimetrainingrocmbuild \ - bash -c " $(PythonManylinuxDir)/bin/python3 -m pip install /build/Release/dist/*.whl && /onnxruntime_src/tools/doc/builddoc.sh $(PythonManylinuxDir)/bin/ /onnxruntime_src /build Release " ; + /onnxruntime_src/tools/ci_build/github/pai/wrap_rocm_python_doc_publisher.sh workingDirectory: $(Build.SourcesDirectory) - task: CopyFiles@2 diff --git a/tools/ci_build/github/linux/docker/Dockerfile.manylinux2014_rocm b/tools/ci_build/github/linux/docker/Dockerfile.manylinux2014_rocm index 87a6dc1ec5..bba4fccc60 100644 --- a/tools/ci_build/github/linux/docker/Dockerfile.manylinux2014_rocm +++ b/tools/ci_build/github/linux/docker/Dockerfile.manylinux2014_rocm @@ -14,6 +14,8 @@ ARG LD_LIBRARY_PATH_ARG ARG PREPEND_PATH LABEL maintainer="The ManyLinux project" +RUN yum remove -y devtoolset\* git\* && conda remove -y cmake + ENV AUDITWHEEL_POLICY=${POLICY} AUDITWHEEL_ARCH=${PLATFORM} AUDITWHEEL_PLAT=${POLICY}_${PLATFORM} ENV LC_ALL=en_US.UTF-8 LANG=en_US.UTF-8 LANGUAGE=en_US.UTF-8 ENV DEVTOOLSET_ROOTPATH=${DEVTOOLSET_ROOTPATH} @@ -138,10 +140,10 @@ COPY build_scripts/ambv-pubkey.txt /build_scripts/cpython-pubkeys.txt RUN manylinux-entrypoint /build_scripts/build-cpython.sh 3.9.5 -FROM build_cpython AS build_cpython310 -COPY build_scripts/cpython-pubkey-310-311.txt /build_scripts/cpython-pubkeys.txt +# FROM build_cpython AS build_cpython310 +# COPY build_scripts/cpython-pubkey-310-311.txt /build_scripts/cpython-pubkeys.txt -RUN manylinux-entrypoint /build_scripts/build-cpython.sh 3.10.0b1 +# RUN manylinux-entrypoint /build_scripts/build-cpython.sh 3.10.0b1 FROM build_cpython AS all_cpython @@ -149,7 +151,7 @@ COPY --from=build_cpython36 /opt/_internal /opt/_internal/ COPY --from=build_cpython37 /opt/_internal /opt/_internal/ COPY --from=build_cpython38 /opt/_internal /opt/_internal/ COPY --from=build_cpython39 /opt/_internal /opt/_internal/ -COPY --from=build_cpython310 /opt/_internal /opt/_internal/ +# COPY --from=build_cpython310 /opt/_internal /opt/_internal/ RUN hardlink -cv /opt/_internal @@ -174,7 +176,7 @@ ARG INSTALL_DEPS_EXTRA_ARGS #Add our own dependencies ADD scripts /tmp/scripts RUN cd /tmp/scripts && \ - /tmp/scripts/install_centos.sh && \ + /tmp/scripts/manylinux/install_centos.sh && \ /tmp/scripts/install_os_deps.sh -d gpu $INSTALL_DEPS_EXTRA_ARGS && \ /tmp/scripts/install_python_deps.sh -d gpu -p $PYTHON_VERSION $INSTALL_DEPS_EXTRA_ARGS && \ rm -rf /tmp/scripts diff --git a/tools/ci_build/github/linux/docker/Dockerfile.manylinux2014_training_cuda10_2 b/tools/ci_build/github/linux/docker/Dockerfile.manylinux2014_training_cuda10_2 index defbf2e7b7..3326aeae2d 100644 --- a/tools/ci_build/github/linux/docker/Dockerfile.manylinux2014_training_cuda10_2 +++ b/tools/ci_build/github/linux/docker/Dockerfile.manylinux2014_training_cuda10_2 @@ -179,7 +179,7 @@ ARG INSTALL_DEPS_EXTRA_ARGS #Add our own dependencies ADD scripts /tmp/scripts RUN cd /tmp/scripts && \ - /tmp/scripts/install_centos.sh && \ + /tmp/scripts/manylinux/install_centos.sh && \ /tmp/scripts/install_os_deps.sh -d gpu $INSTALL_DEPS_EXTRA_ARGS && \ /tmp/scripts/install_ninja.sh && \ /tmp/scripts/install_python_deps.sh -d gpu -v 10.2 -p $PYTHON_VERSION $INSTALL_DEPS_EXTRA_ARGS && \ diff --git a/tools/ci_build/github/linux/docker/Dockerfile.manylinux2014_training_cuda11_1 b/tools/ci_build/github/linux/docker/Dockerfile.manylinux2014_training_cuda11_1 index 863a5a0b91..ab1321e8a2 100644 --- a/tools/ci_build/github/linux/docker/Dockerfile.manylinux2014_training_cuda11_1 +++ b/tools/ci_build/github/linux/docker/Dockerfile.manylinux2014_training_cuda11_1 @@ -178,7 +178,7 @@ ARG INSTALL_DEPS_EXTRA_ARGS #Add our own dependencies ADD scripts /tmp/scripts RUN cd /tmp/scripts && \ - /tmp/scripts/install_centos.sh && \ + /tmp/scripts/manylinux/install_centos.sh && \ /tmp/scripts/install_os_deps.sh -d gpu $INSTALL_DEPS_EXTRA_ARGS && \ /tmp/scripts/install_ninja.sh && \ /tmp/scripts/install_python_deps.sh -d gpu -v 11.1 -p $PYTHON_VERSION $INSTALL_DEPS_EXTRA_ARGS && \ diff --git a/tools/ci_build/github/linux/docker/build_scripts/build-git.sh b/tools/ci_build/github/linux/docker/build_scripts/build-git.sh index 588d3a92d8..2661ef0f83 100755 --- a/tools/ci_build/github/linux/docker/build_scripts/build-git.sh +++ b/tools/ci_build/github/linux/docker/build_scripts/build-git.sh @@ -19,7 +19,7 @@ fetch_source ${GIT_ROOT}.tar.gz ${GIT_DOWNLOAD_URL} check_sha256sum ${GIT_ROOT}.tar.gz ${GIT_HASH} tar -xzf ${GIT_ROOT}.tar.gz pushd ${GIT_ROOT} -make install prefix=/usr/local NO_GETTEXT=1 NO_TCLTK=1 DESTDIR=/manylinux-rootfs CPPFLAGS="${MANYLINUX_CPPFLAGS}" CFLAGS="${MANYLINUX_CFLAGS}" CXXFLAGS="${MANYLINUX_CXXFLAGS}" LDFLAGS="${MANYLINUX_LDFLAGS}" +make -j$(getconf _NPROCESSORS_ONLN) install prefix=/usr/local NO_GETTEXT=1 NO_TCLTK=1 DESTDIR=/manylinux-rootfs CPPFLAGS="${MANYLINUX_CPPFLAGS}" CFLAGS="${MANYLINUX_CFLAGS}" CXXFLAGS="${MANYLINUX_CXXFLAGS}" LDFLAGS="${MANYLINUX_LDFLAGS}" popd rm -rf ${GIT_ROOT} ${GIT_ROOT}.tar.gz diff --git a/tools/ci_build/github/linux/docker/build_scripts/install-runtime-packages.sh b/tools/ci_build/github/linux/docker/build_scripts/install-runtime-packages.sh index de42b15835..386d2b4325 100755 --- a/tools/ci_build/github/linux/docker/build_scripts/install-runtime-packages.sh +++ b/tools/ci_build/github/linux/docker/build_scripts/install-runtime-packages.sh @@ -94,7 +94,9 @@ elif [ "${AUDITWHEEL_POLICY}" == "manylinux2014" ]; then # Software collection (for devtoolset-9) yum -y install centos-release-scl-rh # EPEL support (for yasm) - yum -y install https://dl.fedoraproject.org/pub/epel/epel-release-latest-7.noarch.rpm + if ! rpm -q --quiet epel-release ; then + yum -y install https://dl.fedoraproject.org/pub/epel/epel-release-latest-7.noarch.rpm + fi TOOLCHAIN_DEPS="${TOOLCHAIN_DEPS} yasm" elif [ "${AUDITWHEEL_ARCH}" == "aarch64" ] || [ "${AUDITWHEEL_ARCH}" == "ppc64le" ] || [ "${AUDITWHEEL_ARCH}" == "s390x" ]; then # Software collection (for devtoolset-9) diff --git a/tools/ci_build/github/linux/docker/scripts/manylinux/install_centos.sh b/tools/ci_build/github/linux/docker/scripts/manylinux/install_centos.sh index d6b6538153..7940c12e31 100755 --- a/tools/ci_build/github/linux/docker/scripts/manylinux/install_centos.sh +++ b/tools/ci_build/github/linux/docker/scripts/manylinux/install_centos.sh @@ -19,12 +19,6 @@ if [ "$os_major_version" == "7" ]; then yum install -y dotnet-sdk-2.1 fi -yum install -y java-11-openjdk-devel - -# If the /opt/python folder exists, we assume this is the manylinux docker image -if [ ! -d "/opt/python/cp37-cp37m" ]; then - yum install -y ccache gcc gcc-c++ python3 python3-devel python3-pip -fi - -# install automatic documentation generation dependencies -yum install -y graphviz +# Install Java +# Install automatic documentation generation dependencies +yum install -y java-11-openjdk-devel graphviz diff --git a/tools/ci_build/github/linux/docker/scripts/manylinux/install_deps.sh b/tools/ci_build/github/linux/docker/scripts/manylinux/install_deps.sh index d35c4d19f1..8b3078e6c8 100755 --- a/tools/ci_build/github/linux/docker/scripts/manylinux/install_deps.sh +++ b/tools/ci_build/github/linux/docker/scripts/manylinux/install_deps.sh @@ -39,7 +39,11 @@ function GetFile { return $? } -PYTHON_EXES=("/opt/python/cp36-cp36m/bin/python3.6" "/opt/python/cp37-cp37m/bin/python3.7" "/opt/python/cp38-cp38/bin/python3.8" "/opt/python/cp39-cp39/bin/python3.9") +if [ ! -d "/opt/conda/bin" ]; then + PYTHON_EXES=("/opt/python/cp36-cp36m/bin/python3.6" "/opt/python/cp37-cp37m/bin/python3.7" "/opt/python/cp38-cp38/bin/python3.8" "/opt/python/cp39-cp39/bin/python3.9") +else + PYTHON_EXES=("/opt/conda/bin/python") +fi os_major_version=$(cat /etc/redhat-release | tr -dc '0-9.'|cut -d \. -f1) diff --git a/tools/ci_build/github/pai/buildtrainingdoc.sh b/tools/ci_build/github/pai/buildtrainingdoc.sh new file mode 100755 index 0000000000..83f5005975 --- /dev/null +++ b/tools/ci_build/github/pai/buildtrainingdoc.sh @@ -0,0 +1,25 @@ +#!/bin/bash + +# This script must be executed from this folder. + +# $1 python path +# $2 source folder +# $3 build folder +# $4 build config + +# Fail the document generation if anything goes wrong in the process +set -e -x + +# Install doc generation tools +$1/python -m pip install -r $2/docs/python/requirements.txt + +# Fake onnxruntime installation +export PYTHONPATH=$3/$4:$PYTHONPATH + +# Remove old docs +rm -rf $3/docs/ + +# Training doc +$1/python -m sphinx -j1 -v -T -b html -d $3/docs/training/_doctrees/html $2/docs/python/training $3/docs/training/html +$1/python -u $2/tools/doc/rename_folders.py $3/docs/training/html +# (cd $3/docs/training/html && zip -r $3/docs/python_training_doc.zip .) diff --git a/tools/ci_build/github/pai/wrap_rocm_python_doc_publisher.sh b/tools/ci_build/github/pai/wrap_rocm_python_doc_publisher.sh new file mode 100644 index 0000000000..6182bff85d --- /dev/null +++ b/tools/ci_build/github/pai/wrap_rocm_python_doc_publisher.sh @@ -0,0 +1,7 @@ +#!/bin/bash + +echo "HIP_VISIBLE_DEVICES=$HIP_VISIBLE_DEVICES" +echo "PythonManylinuxDir=$PythonManylinuxDir" + +$PythonManylinuxDir/bin/python3 -m pip install /build/Release/dist/*.whl +/onnxruntime_src/tools/ci_build/github/pai/buildtrainingdoc.sh $PythonManylinuxDir/bin/ /onnxruntime_src /build Release