Fix python packaging test pipeline (#17204)

### Description
1. Fix python packaging test pipeline. There was an error in
tools/ci_build/github/linux/run_python_tests.sh that it installed a
released version of onnxruntime python package from pypi.org to run the
test. Supposedly it should pick one from the current build.
2. Refactor the pipeline to allow choosing cmake build type from the web
UI when manually trigger a build. Now this feature is for Linux only.
Because I don't want to change too much when we are about to cut a
release branch. After that I will expand it to all platforms. This
feature is useful for debugging pipeline issues, also, we may consider
having a nightly pipeline to run all tests in Debug mode which may catch
extra bugs because in debug mode we can enforce range check.

Test run:
https://aiinfra.visualstudio.com/Lotus/_build/results?buildId=342674&view=results

### Motivation and Context
Currently the pipeline has a crash error. 

AB#18580
This commit is contained in:
Changming Sun 2023-08-18 14:51:26 -07:00 committed by GitHub
parent dd3b2cefd6
commit 6db72165eb
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
11 changed files with 105 additions and 43 deletions

View file

@ -36,7 +36,6 @@ stages:
- template: templates/py-packaging-stage.yml
parameters:
enable_linux_gpu: true
enable_ubuntu_cpu: false
enable_linux_cpu: false
enable_windows_cpu: false
enable_windows_gpu: false

View file

@ -240,6 +240,7 @@ stages:
ld_library_path_arg: /opt/rh/devtoolset-10/root/usr/lib64:/opt/rh/devtoolset-10/root/usr/lib:/opt/rh/devtoolset-10/root/usr/lib64/dyninst:/opt/rh/devtoolset-10/root/usr/lib/dyninst:/usr/local/lib64
prepend_path: '/opt/rh/devtoolset-10/root/usr/bin:'
with_cache: true
cmake_build_type: Release
- stage: arm64_test
dependsOn: ['arm64_build']

View file

@ -123,7 +123,6 @@ stages:
parameters:
build_py_parameters: --enable_training
enable_linux_gpu: false
enable_ubuntu_cpu: false
enable_linux_cpu: false
enable_windows_cpu: true
enable_windows_gpu: false

View file

@ -4,11 +4,6 @@ parameters:
type: boolean
default: true
- name: enable_ubuntu_cpu
displayName: 'Whether Ubuntu CPU (manylinux_2_27) package is built.'
type: boolean
default: true
- name: enable_linux_gpu
displayName: 'Whether Linux GPU package is built.'
type: boolean
@ -44,6 +39,17 @@ parameters:
type: string
default: '--use_azure'
# TODO: Now the Windows jobs use a different cmake build type. Consider to merge it.
- name: cmake_build_type
type: string
displayName: 'Linux packages cmake build type. Linux Only.'
default: 'Release'
values:
- Debug
- Release
- RelWithDebInfo
- MinSizeRel
trigger: none
resources:
@ -58,11 +64,11 @@ stages:
- template: templates/py-packaging-stage.yml
parameters:
enable_linux_gpu: ${{ parameters.enable_linux_gpu }}
enable_ubuntu_cpu: ${{ parameters.enable_ubuntu_cpu }}
enable_linux_cpu: ${{ parameters.enable_linux_cpu }}
enable_windows_cpu: ${{ parameters.enable_windows_cpu }}
enable_windows_gpu: ${{ parameters.enable_windows_gpu }}
enable_mac_cpu: ${{ parameters.enable_mac_cpu }}
enable_mac_silicon: ${{ parameters.enable_mac_silicon }}
enable_linux_arm: ${{ parameters.enable_linux_arm }}
build_py_parameters: ${{ parameters.build_py_parameters }}
build_py_parameters: ${{ parameters.build_py_parameters }}
cmake_build_type: ${{ parameters.cmake_build_type }}

View file

@ -9,6 +9,15 @@ parameters:
type: string
default: ''
- name: cmake_build_type
type: string
default: 'Release'
values:
- Debug
- Release
- RelWithDebInfo
- MinSizeRel
jobs:
- job: Linux_py_GPU_Wheels_${{ parameters.arch }}
timeoutInMinutes: 240
@ -39,7 +48,7 @@ jobs:
targetType: filePath
filePath: tools/ci_build/github/linux/run_python_dockerbuild.sh
# please check ONNXRUNTIME_CUDA_VERSION in tools/ci_build/github/linux/build_linux_arm64_python_package.sh
arguments: -i onnxruntimecuda118xtrt86build${{ parameters.arch }} -d "GPU" -x "${{ parameters.extra_build_arg }}"
arguments: -i onnxruntimecuda118xtrt86build${{ parameters.arch }} -d "GPU" -c ${{ parameters.cmake_build_type }} -x "${{ parameters.extra_build_arg }}"
- task: PublishBuildArtifacts@1
displayName: 'Publish Artifact: ONNXRuntime python wheel'

View file

@ -17,6 +17,15 @@ parameters:
- name: prepend_path
type: string
- name: cmake_build_type
type: string
default: 'Release'
values:
- Debug
- Release
- RelWithDebInfo
- MinSizeRel
- name: device
type: string
default: 'CPU'
@ -73,7 +82,7 @@ jobs:
inputs:
targetType: filePath
filePath: tools/ci_build/github/linux/run_python_dockerbuild.sh
arguments: -i onnxruntimecpubuildpython${{ parameters.arch }} -d "${{ parameters.device }}" -x "${{ parameters.extra_build_arg }}"
arguments: -i onnxruntimecpubuildpython${{ parameters.arch }} -d "${{ parameters.device }}" -c ${{ parameters.cmake_build_type }} -x "${{ parameters.extra_build_arg }}"
${{ if eq(parameters.with_cache, 'true') }}:
env:
ADDITIONAL_DOCKER_PARAMETER: "--volume $(ORT_CACHE_DIR):/cache -e CCACHE_DIR=/cache -e ORT_BUILD_WITH_CACHE=1"
@ -88,4 +97,4 @@ jobs:
displayName: 'Publish Test Binaries'
inputs:
artifactName: 'drop-linux-cpu-${{ parameters.arch }}'
targetPath: '$(Build.BinariesDirectory)/Release'
targetPath: '$(Build.BinariesDirectory)/${{ parameters.cmake_build_type }}'

View file

@ -16,6 +16,17 @@ parameters:
type: string
default: ''
# TODO: Ideally it should fetch information from the build that triggers it
- name: cmake_build_type
type: string
default: 'Release'
values:
- Debug
- Release
- RelWithDebInfo
- MinSizeRel
- name: timeout
type: number
default: 120
@ -33,7 +44,7 @@ jobs:
displayName: 'Download Pipeline Artifact'
inputs:
artifactName: 'drop-linux-${{ lower(parameters.device) }}-${{ parameters.arch }}'
targetPath: '$(Build.BinariesDirectory)/Release'
targetPath: '$(Build.BinariesDirectory)/${{parameters.cmake_build_type}}'
# The public ADO project
${{ if eq(variables['System.CollectionId'], 'f3ad12f2-e480-4533-baf2-635c95467d29') }}:
buildType: current
@ -67,7 +78,7 @@ jobs:
inputs:
targetType: filePath
filePath: tools/ci_build/github/linux/run_python_tests.sh
arguments: -d ${{ parameters.device }}
arguments: -d ${{ parameters.device }} -c ${{parameters.cmake_build_type}}
- task: mspremier.PostBuildCleanup.PostBuildCleanup-task.PostBuildCleanup@3
displayName: 'Clean Agent Directories'

View file

@ -10,11 +10,6 @@ parameters:
type: boolean
default: true
- name: enable_ubuntu_cpu
displayName: 'Whether Ubuntu CPU (manylinux_2_27) package is built.'
type: boolean
default: true
- name: enable_linux_gpu
displayName: 'Whether Linux GPU package is built.'
type: boolean
@ -45,6 +40,17 @@ parameters:
type: boolean
default: true
# TODO: Now the Windows jobs use a different cmake build type. Consider to merge it.
- name: cmake_build_type
type: string
displayName: 'Linux packages cmake build type. Linux Only.'
default: 'Release'
values:
- Debug
- Release
- RelWithDebInfo
- MinSizeRel
stages:
- stage: Python_Packaging
dependsOn: []
@ -502,17 +508,19 @@ stages:
ld_library_path_arg: /opt/rh/devtoolset-10/root/usr/lib64:/opt/rh/devtoolset-10/root/usr/lib:/opt/rh/devtoolset-10/root/usr/lib64/dyninst:/opt/rh/devtoolset-10/root/usr/lib/dyninst:/usr/local/lib64
prepend_path: '/opt/rh/devtoolset-10/root/usr/bin:'
extra_build_arg: ${{ parameters.build_py_parameters }}
cmake_build_type: ${{ parameters.cmake_build_type }}
- ${{ if eq(parameters.enable_linux_cpu, true) }}:
- template: py-linux.yml
parameters:
arch: 'x86_64'
machine_pool: 'Azure-Pipelines-EO-Ubuntu-2004-aiinfra'
machine_pool: 'onnxruntime-Ubuntu2004-AMD-CPU'
base_image: 'centos:7'
devtoolset_rootpath: /opt/rh/devtoolset-11/root
ld_library_path_arg: /opt/rh/devtoolset-11/root/usr/lib64:/opt/rh/devtoolset-11/root/usr/lib:/opt/rh/devtoolset-11/root/usr/lib64/dyninst:/opt/rh/devtoolset-11/root/usr/lib/dyninst:/usr/local/lib64
prepend_path: '/opt/rh/devtoolset-11/root/usr/bin:'
extra_build_arg: ${{ parameters.build_py_parameters }}
cmake_build_type: ${{ parameters.cmake_build_type }}
- ${{ if eq(parameters.enable_linux_gpu, true) }}:
@ -521,3 +529,4 @@ stages:
arch: 'x86_64'
machine_pool: 'onnxruntime-Ubuntu2004-AMD-CPU'
extra_build_arg: ${{ parameters.build_py_parameters }}
cmake_build_type: ${{ parameters.cmake_build_type }}

View file

@ -1,25 +1,37 @@
#!/bin/bash
set -e -x
# This script invokes build.py
mkdir -p /build/dist
CFLAGS="-Wp,-D_FORTIFY_SOURCE=2 -Wp,-D_GLIBCXX_ASSERTIONS -fstack-protector-strong -O3 -pipe -Wl,--strip-all"
CXXFLAGS="-Wp,-D_FORTIFY_SOURCE=2 -Wp,-D_GLIBCXX_ASSERTIONS -fstack-protector-strong -O3 -pipe -Wl,--strip-all"
BUILD_DEVICE="CPU"
BUILD_CONFIG="Release"
EXTRA_ARG=""
PYTHON_EXES=("/opt/python/cp38-cp38/bin/python3.8" "/opt/python/cp39-cp39/bin/python3.9" "/opt/python/cp310-cp310/bin/python3.10" "/opt/python/cp311-cp311/bin/python3.11")
while getopts "d:p:x:" parameter_Option
# Put 3.8 at the last because Ubuntu 20.04 use python 3.8 and we will upload the intermediate build files of this
# config to Azure DevOps Artifacts and download them to a Ubuntu 20.04 machine to run the tests.
PYTHON_EXES=("/opt/python/cp39-cp39/bin/python3.9" "/opt/python/cp310-cp310/bin/python3.10" "/opt/python/cp311-cp311/bin/python3.11" "/opt/python/cp38-cp38/bin/python3.8")
while getopts "d:p:x:c:" parameter_Option
do case "${parameter_Option}"
in
#GPU or CPU.
d) BUILD_DEVICE=${OPTARG};;
p) PYTHON_EXES=(${OPTARG});;
x) EXTRA_ARG=(${OPTARG});;
c) BUILD_CONFIG=${OPTARG};;
esac
done
BUILD_ARGS=("--build_dir" "/build" "--config" "$BUILD_CONFIG" "--update" "--build" "--skip_submodule_sync" "--parallel" "--build_wheel")
if [ "$BUILD_CONFIG" == "Debug" ]; then
CFLAGS="-ggdb3"
CXXFLAGS="-ggdb3"
else
CFLAGS="-Wp,-D_FORTIFY_SOURCE=2 -Wp,-D_GLIBCXX_ASSERTIONS -fstack-protector-strong -O3 -pipe -Wl,--strip-all"
CXXFLAGS="-Wp,-D_FORTIFY_SOURCE=2 -Wp,-D_GLIBCXX_ASSERTIONS -fstack-protector-strong -O3 -pipe -Wl,--strip-all"
BUILD_ARGS+=("--enable_lto")
fi
# Depending on how the compiler has been configured when it was built, sometimes "gcc -dumpversion" shows the full version.
GCC_VERSION=$(gcc -dumpversion | cut -d . -f 1)
#-fstack-clash-protection prevents attacks based on an overlapping heap and stack.
@ -35,8 +47,6 @@ if [ "$ARCH" == "x86_64" ] && [ "$GCC_VERSION" -ge 9 ]; then
CXXFLAGS="$CXXFLAGS -fcf-protection"
fi
BUILD_ARGS=("--build_dir" "/build" "--config" "$BUILD_CONFIG" "--update" "--build" "--skip_submodule_sync" "--parallel" "--enable_lto" "--build_wheel")
echo "EXTRA_ARG:"
echo $EXTRA_ARG

View file

@ -1,11 +1,14 @@
#!/bin/bash
set -e -x
while getopts "i:d:x:" parameter_Option
BUILD_CONFIG="Release"
while getopts "i:d:x:c:" parameter_Option
do case "${parameter_Option}"
in
i) DOCKER_IMAGE=${OPTARG};;
d) DEVICE=${OPTARG};;
x) BUILD_EXTR_PAR=${OPTARG};;
c) BUILD_CONFIG=${OPTARG};;
esac
done
@ -20,10 +23,10 @@ docker run --rm \
-e NIGHTLY_BUILD \
-e BUILD_BUILDNUMBER \
$ADDITIONAL_DOCKER_PARAMETER \
$DOCKER_IMAGE tools/ci_build/github/linux/build_linux_arm64_python_package.sh -d $DEVICE -x $BUILD_EXTR_PAR
$DOCKER_IMAGE tools/ci_build/github/linux/build_linux_arm64_python_package.sh -d $DEVICE -c $BUILD_CONFIG -x $BUILD_EXTR_PAR
sudo rm -rf $BUILD_BINARIESDIRECTORY/Release/onnxruntime $BUILD_BINARIESDIRECTORY/Release/pybind11 \
$BUILD_BINARIESDIRECTORY/Release/models $BUILD_BINARIESDIRECTORY/Release/_deps \
$BUILD_BINARIESDIRECTORY/Release/CMakeFiles
cd $BUILD_BINARIESDIRECTORY/Release
find -executable -type f > $BUILD_BINARIESDIRECTORY/Release/perms.txt
sudo rm -rf $BUILD_BINARIESDIRECTORY/$BUILD_CONFIG/onnxruntime $BUILD_BINARIESDIRECTORY/$BUILD_CONFIG/pybind11 \
$BUILD_BINARIESDIRECTORY/$BUILD_CONFIG/models $BUILD_BINARIESDIRECTORY/$BUILD_CONFIG/_deps \
$BUILD_BINARIESDIRECTORY/$BUILD_CONFIG/CMakeFiles
cd $BUILD_BINARIESDIRECTORY/$BUILD_CONFIG
find -executable -type f > $BUILD_BINARIESDIRECTORY/$BUILD_CONFIG/perms.txt

View file

@ -6,11 +6,12 @@ set -e -x
BUILD_DEVICE="CPU"
BUILD_CONFIG="Release"
while getopts "d:" parameter_Option
while getopts "d:c:" parameter_Option
do case "${parameter_Option}"
in
#GPU or CPU.
d) BUILD_DEVICE=${OPTARG};;
c) BUILD_CONFIG=${OPTARG};;
esac
done
@ -24,6 +25,8 @@ echo "Package name:$PYTHON_PACKAGE_NAME"
BUILD_ARGS="--build_dir $BUILD_BINARIESDIRECTORY --config $BUILD_CONFIG --test --skip_submodule_sync --parallel --enable_lto --build_wheel "
ARCH=$(uname -m)
if [ $ARCH == "x86_64" ]; then
#ARM build machines do not have the test data yet.
BUILD_ARGS="$BUILD_ARGS --enable_onnx_tests"
@ -35,13 +38,16 @@ fi
sudo rm -rf /build /onnxruntime_src
sudo ln -s $BUILD_SOURCESDIRECTORY /onnxruntime_src
python3 -m pip uninstall -y $PYTHON_PACKAGE_NAME ort-nightly-gpu ort-nightly onnxruntime onnxruntime-gpu onnxruntime-training onnxruntime-directml ort-nightly-directml onnx -qq
cp $BUILD_SOURCESDIRECTORY/tools/ci_build/github/linux/docker/scripts/manylinux/requirements.txt $BUILD_BINARIESDIRECTORY/requirements.txt
# Test ORT with the latest ONNX release.
sed -i "s/git+http:\/\/github\.com\/onnx\/onnx.*/onnx/" $BUILD_BINARIESDIRECTORY/requirements.txt
python3 -m pip install -r $BUILD_BINARIESDIRECTORY/requirements.txt
python3 -m pip install --find-links $BUILD_BINARIESDIRECTORY/whl $PYTHON_PACKAGE_NAME
# Install the packages that are needed for installing the onnxruntime python package
python3 -m pip install -r $BUILD_BINARIESDIRECTORY/$BUILD_CONFIG/requirements.txt
# Install the packages that are needed for running test scripts
# Install the latest ONNX release which may contain not fixed bugs. However, it is what most people use.
python3 -m pip install onnx pytest
# The "--no-index" flag is crucial. The local whl folder is just an additional source. Pypi's doc says "there is no
# ordering in the locations that are searched" if we don't disable the default one with "--no-index"
python3 -m pip install --no-index --find-links $BUILD_BINARIESDIRECTORY/whl $PYTHON_PACKAGE_NAME
ln -s /data/models $BUILD_BINARIESDIRECTORY
cd $BUILD_BINARIESDIRECTORY/$BUILD_CONFIG
# Restore file permissions
xargs -a $BUILD_BINARIESDIRECTORY/$BUILD_CONFIG/perms.txt chmod a+x
xargs -a perms.txt chmod a+x
python3 $BUILD_SOURCESDIRECTORY/tools/ci_build/build.py $BUILD_ARGS --ctest_path ''