Fix ROCm wheels pipeline after changes to manylinux scripts (#8026)

* update

* try fix rocm pipeline

* avoid already isntalled error

* ignore python3.10 since build fails

* fix

* try setting user

* try again

* try again

* try again

* fix script

* disable inference docs generation

* try print device id

* fix name qual

* try again

* try again

* try again

* provider_options

* add device verify

* rty again

* try again

* try aggain

* print video/render gid

* try again

* run as root

* try again with uid, gid

* cleanup

* run as root

* temp fix

* add /bin/bash

Co-authored-by: Changming Sun <chasun@microsoft.com>
This commit is contained in:
Suffian Khan 2021-06-10 21:01:28 -07:00 committed by GitHub
parent 20579595c8
commit 35ca3c99d1
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
11 changed files with 73 additions and 22 deletions

View file

@ -2,6 +2,7 @@
// Licensed under the MIT License.
#include "core/providers/rocm/rocm_execution_provider_info.h"
#include "core/providers/rocm/rocm_common.h"
#include "core/common/make_string.h"
#include "core/framework/provider_options_utils.h"
@ -48,8 +49,20 @@ ROCMExecutionProviderInfo ROCMExecutionProviderInfo::FromProviderOptions(const P
free = reinterpret_cast<void*>(address);
return Status::OK();
})
// TODO validate info.device_id
.AddAssignmentToReference(rocm::provider_option_names::kDeviceId, info.device_id)
.AddValueParser(
rocm::provider_option_names::kDeviceId,
[&info](const std::string& value_str) -> Status {
ORT_RETURN_IF_ERROR(ParseStringWithClassicLocale(value_str, info.device_id));
int num_devices{};
ORT_RETURN_IF_NOT(
HIP_CALL(hipGetDeviceCount(&num_devices)),
"hipGetDeviceCount() failed.");
ORT_RETURN_IF_NOT(
0 <= info.device_id && info.device_id < num_devices,
"Invalid device ID: ", info.device_id,
", must be between 0 (inclusive) and ", num_devices, " (exclusive).");
return Status::OK();
})
.AddAssignmentToReference(rocm::provider_option_names::kMemLimit, info.gpu_mem_limit)
.AddAssignmentToReference(rocm::provider_option_names::kConvExhaustiveSearch, info.miopen_conv_exhaustive_search)
.AddAssignmentToEnumReference(

View file

@ -311,8 +311,10 @@ stages:
- script: |-
echo "Tests will run using HIP_VISIBLES_DEVICES=$HIP_VISIBLE_DEVICES"
video_gid=$(getent group | awk '/video/ {split($0,a,":"); print(a[3])}')
echo "Found video_gid=$video_gid; attempting to set as pipeline variable"
echo "##vso[task.setvariable variable=video]$video_gid"
render_gid=$(getent group | awk '/render/ {split($0,a,":"); print(a[3])}')
echo "Found render_gid=$render_gid; attempting to set as pipeline variable"
echo "##vso[task.setvariable variable=render]$render_gid"
displayName: 'Find video and render gid to be mapped into container'
@ -449,11 +451,13 @@ stages:
--security-opt seccomp=unconfined \
--volume $(Build.SourcesDirectory):/onnxruntime_src \
--volume $(Build.BinariesDirectory):/build \
--entrypoint /bin/bash \
-e HIP_VISIBLE_DEVICES \
-e NIGHTLY_BUILD \
-e BUILD_BUILDNUMBER \
-e PythonManylinuxDir=$(PythonManylinuxdir) \
onnxruntimetrainingrocmbuild \
bash -c " $(PythonManylinuxDir)/bin/python3 -m pip install /build/Release/dist/*.whl && /onnxruntime_src/tools/doc/builddoc.sh $(PythonManylinuxDir)/bin/ /onnxruntime_src /build Release " ;
/onnxruntime_src/tools/ci_build/github/pai/wrap_rocm_python_doc_publisher.sh
workingDirectory: $(Build.SourcesDirectory)
- task: CopyFiles@2

View file

@ -14,6 +14,8 @@ ARG LD_LIBRARY_PATH_ARG
ARG PREPEND_PATH
LABEL maintainer="The ManyLinux project"
RUN yum remove -y devtoolset\* git\* && conda remove -y cmake
ENV AUDITWHEEL_POLICY=${POLICY} AUDITWHEEL_ARCH=${PLATFORM} AUDITWHEEL_PLAT=${POLICY}_${PLATFORM}
ENV LC_ALL=en_US.UTF-8 LANG=en_US.UTF-8 LANGUAGE=en_US.UTF-8
ENV DEVTOOLSET_ROOTPATH=${DEVTOOLSET_ROOTPATH}
@ -138,10 +140,10 @@ COPY build_scripts/ambv-pubkey.txt /build_scripts/cpython-pubkeys.txt
RUN manylinux-entrypoint /build_scripts/build-cpython.sh 3.9.5
FROM build_cpython AS build_cpython310
COPY build_scripts/cpython-pubkey-310-311.txt /build_scripts/cpython-pubkeys.txt
# FROM build_cpython AS build_cpython310
# COPY build_scripts/cpython-pubkey-310-311.txt /build_scripts/cpython-pubkeys.txt
RUN manylinux-entrypoint /build_scripts/build-cpython.sh 3.10.0b1
# RUN manylinux-entrypoint /build_scripts/build-cpython.sh 3.10.0b1
FROM build_cpython AS all_cpython
@ -149,7 +151,7 @@ COPY --from=build_cpython36 /opt/_internal /opt/_internal/
COPY --from=build_cpython37 /opt/_internal /opt/_internal/
COPY --from=build_cpython38 /opt/_internal /opt/_internal/
COPY --from=build_cpython39 /opt/_internal /opt/_internal/
COPY --from=build_cpython310 /opt/_internal /opt/_internal/
# COPY --from=build_cpython310 /opt/_internal /opt/_internal/
RUN hardlink -cv /opt/_internal
@ -174,7 +176,7 @@ ARG INSTALL_DEPS_EXTRA_ARGS
#Add our own dependencies
ADD scripts /tmp/scripts
RUN cd /tmp/scripts && \
/tmp/scripts/install_centos.sh && \
/tmp/scripts/manylinux/install_centos.sh && \
/tmp/scripts/install_os_deps.sh -d gpu $INSTALL_DEPS_EXTRA_ARGS && \
/tmp/scripts/install_python_deps.sh -d gpu -p $PYTHON_VERSION $INSTALL_DEPS_EXTRA_ARGS && \
rm -rf /tmp/scripts

View file

@ -179,7 +179,7 @@ ARG INSTALL_DEPS_EXTRA_ARGS
#Add our own dependencies
ADD scripts /tmp/scripts
RUN cd /tmp/scripts && \
/tmp/scripts/install_centos.sh && \
/tmp/scripts/manylinux/install_centos.sh && \
/tmp/scripts/install_os_deps.sh -d gpu $INSTALL_DEPS_EXTRA_ARGS && \
/tmp/scripts/install_ninja.sh && \
/tmp/scripts/install_python_deps.sh -d gpu -v 10.2 -p $PYTHON_VERSION $INSTALL_DEPS_EXTRA_ARGS && \

View file

@ -178,7 +178,7 @@ ARG INSTALL_DEPS_EXTRA_ARGS
#Add our own dependencies
ADD scripts /tmp/scripts
RUN cd /tmp/scripts && \
/tmp/scripts/install_centos.sh && \
/tmp/scripts/manylinux/install_centos.sh && \
/tmp/scripts/install_os_deps.sh -d gpu $INSTALL_DEPS_EXTRA_ARGS && \
/tmp/scripts/install_ninja.sh && \
/tmp/scripts/install_python_deps.sh -d gpu -v 11.1 -p $PYTHON_VERSION $INSTALL_DEPS_EXTRA_ARGS && \

View file

@ -19,7 +19,7 @@ fetch_source ${GIT_ROOT}.tar.gz ${GIT_DOWNLOAD_URL}
check_sha256sum ${GIT_ROOT}.tar.gz ${GIT_HASH}
tar -xzf ${GIT_ROOT}.tar.gz
pushd ${GIT_ROOT}
make install prefix=/usr/local NO_GETTEXT=1 NO_TCLTK=1 DESTDIR=/manylinux-rootfs CPPFLAGS="${MANYLINUX_CPPFLAGS}" CFLAGS="${MANYLINUX_CFLAGS}" CXXFLAGS="${MANYLINUX_CXXFLAGS}" LDFLAGS="${MANYLINUX_LDFLAGS}"
make -j$(getconf _NPROCESSORS_ONLN) install prefix=/usr/local NO_GETTEXT=1 NO_TCLTK=1 DESTDIR=/manylinux-rootfs CPPFLAGS="${MANYLINUX_CPPFLAGS}" CFLAGS="${MANYLINUX_CFLAGS}" CXXFLAGS="${MANYLINUX_CXXFLAGS}" LDFLAGS="${MANYLINUX_LDFLAGS}"
popd
rm -rf ${GIT_ROOT} ${GIT_ROOT}.tar.gz

View file

@ -94,7 +94,9 @@ elif [ "${AUDITWHEEL_POLICY}" == "manylinux2014" ]; then
# Software collection (for devtoolset-9)
yum -y install centos-release-scl-rh
# EPEL support (for yasm)
yum -y install https://dl.fedoraproject.org/pub/epel/epel-release-latest-7.noarch.rpm
if ! rpm -q --quiet epel-release ; then
yum -y install https://dl.fedoraproject.org/pub/epel/epel-release-latest-7.noarch.rpm
fi
TOOLCHAIN_DEPS="${TOOLCHAIN_DEPS} yasm"
elif [ "${AUDITWHEEL_ARCH}" == "aarch64" ] || [ "${AUDITWHEEL_ARCH}" == "ppc64le" ] || [ "${AUDITWHEEL_ARCH}" == "s390x" ]; then
# Software collection (for devtoolset-9)

View file

@ -19,12 +19,6 @@ if [ "$os_major_version" == "7" ]; then
yum install -y dotnet-sdk-2.1
fi
yum install -y java-11-openjdk-devel
# If the /opt/python folder exists, we assume this is the manylinux docker image
if [ ! -d "/opt/python/cp37-cp37m" ]; then
yum install -y ccache gcc gcc-c++ python3 python3-devel python3-pip
fi
# install automatic documentation generation dependencies
yum install -y graphviz
# Install Java
# Install automatic documentation generation dependencies
yum install -y java-11-openjdk-devel graphviz

View file

@ -39,7 +39,11 @@ function GetFile {
return $?
}
PYTHON_EXES=("/opt/python/cp36-cp36m/bin/python3.6" "/opt/python/cp37-cp37m/bin/python3.7" "/opt/python/cp38-cp38/bin/python3.8" "/opt/python/cp39-cp39/bin/python3.9")
if [ ! -d "/opt/conda/bin" ]; then
PYTHON_EXES=("/opt/python/cp36-cp36m/bin/python3.6" "/opt/python/cp37-cp37m/bin/python3.7" "/opt/python/cp38-cp38/bin/python3.8" "/opt/python/cp39-cp39/bin/python3.9")
else
PYTHON_EXES=("/opt/conda/bin/python")
fi
os_major_version=$(cat /etc/redhat-release | tr -dc '0-9.'|cut -d \. -f1)

View file

@ -0,0 +1,25 @@
#!/bin/bash
# This script must be executed from this folder.
# $1 python path
# $2 source folder
# $3 build folder
# $4 build config
# Fail the document generation if anything goes wrong in the process
set -e -x
# Install doc generation tools
$1/python -m pip install -r $2/docs/python/requirements.txt
# Fake onnxruntime installation
export PYTHONPATH=$3/$4:$PYTHONPATH
# Remove old docs
rm -rf $3/docs/
# Training doc
$1/python -m sphinx -j1 -v -T -b html -d $3/docs/training/_doctrees/html $2/docs/python/training $3/docs/training/html
$1/python -u $2/tools/doc/rename_folders.py $3/docs/training/html
# (cd $3/docs/training/html && zip -r $3/docs/python_training_doc.zip .)

View file

@ -0,0 +1,7 @@
#!/bin/bash
echo "HIP_VISIBLE_DEVICES=$HIP_VISIBLE_DEVICES"
echo "PythonManylinuxDir=$PythonManylinuxDir"
$PythonManylinuxDir/bin/python3 -m pip install /build/Release/dist/*.whl
/onnxruntime_src/tools/ci_build/github/pai/buildtrainingdoc.sh $PythonManylinuxDir/bin/ /onnxruntime_src /build Release