From 6db72165eb3269e768f0fea28a7b210f588c7abe Mon Sep 17 00:00:00 2001 From: Changming Sun Date: Fri, 18 Aug 2023 14:51:26 -0700 Subject: [PATCH] Fix python packaging test pipeline (#17204) ### Description 1. Fix python packaging test pipeline. There was an error in tools/ci_build/github/linux/run_python_tests.sh that it installed a released version of onnxruntime python package from pypi.org to run the test. Supposedly it should pick one from the current build. 2. Refactor the pipeline to allow choosing cmake build type from the web UI when manually trigger a build. Now this feature is for Linux only. Because I don't want to change too much when we are about to cut a release branch. After that I will expand it to all platforms. This feature is useful for debugging pipeline issues, also, we may consider having a nightly pipeline to run all tests in Debug mode which may catch extra bugs because in debug mode we can enforce range check. Test run: https://aiinfra.visualstudio.com/Lotus/_build/results?buildId=342674&view=results ### Motivation and Context Currently the pipeline has a crash error. AB#18580 --- .../build-perf-test-binaries-pipeline.yml | 1 - .../azure-pipelines/linux-ci-pipeline.yml | 1 + .../orttraining-py-packaging-pipeline-cpu.yml | 1 - .../azure-pipelines/py-packaging-pipeline.yml | 20 ++++++++----- .../templates/py-linux-gpu.yml | 11 +++++++- .../azure-pipelines/templates/py-linux.yml | 13 +++++++-- .../templates/py-packaging-linux-test.yml | 15 ++++++++-- .../templates/py-packaging-stage.yml | 21 ++++++++++---- .../linux/build_linux_arm64_python_package.sh | 28 +++++++++++++------ .../github/linux/run_python_dockerbuild.sh | 17 ++++++----- .../ci_build/github/linux/run_python_tests.sh | 20 ++++++++----- 11 files changed, 105 insertions(+), 43 deletions(-) diff --git a/tools/ci_build/github/azure-pipelines/build-perf-test-binaries-pipeline.yml b/tools/ci_build/github/azure-pipelines/build-perf-test-binaries-pipeline.yml index 3cfa4ba6cf..08330764ff 100644 --- a/tools/ci_build/github/azure-pipelines/build-perf-test-binaries-pipeline.yml +++ b/tools/ci_build/github/azure-pipelines/build-perf-test-binaries-pipeline.yml @@ -36,7 +36,6 @@ stages: - template: templates/py-packaging-stage.yml parameters: enable_linux_gpu: true - enable_ubuntu_cpu: false enable_linux_cpu: false enable_windows_cpu: false enable_windows_gpu: false diff --git a/tools/ci_build/github/azure-pipelines/linux-ci-pipeline.yml b/tools/ci_build/github/azure-pipelines/linux-ci-pipeline.yml index 250ed9f2b5..b784ef72d6 100644 --- a/tools/ci_build/github/azure-pipelines/linux-ci-pipeline.yml +++ b/tools/ci_build/github/azure-pipelines/linux-ci-pipeline.yml @@ -240,6 +240,7 @@ stages: ld_library_path_arg: /opt/rh/devtoolset-10/root/usr/lib64:/opt/rh/devtoolset-10/root/usr/lib:/opt/rh/devtoolset-10/root/usr/lib64/dyninst:/opt/rh/devtoolset-10/root/usr/lib/dyninst:/usr/local/lib64 prepend_path: '/opt/rh/devtoolset-10/root/usr/bin:' with_cache: true + cmake_build_type: Release - stage: arm64_test dependsOn: ['arm64_build'] diff --git a/tools/ci_build/github/azure-pipelines/orttraining-py-packaging-pipeline-cpu.yml b/tools/ci_build/github/azure-pipelines/orttraining-py-packaging-pipeline-cpu.yml index bdb0cd61db..8806707d21 100644 --- a/tools/ci_build/github/azure-pipelines/orttraining-py-packaging-pipeline-cpu.yml +++ b/tools/ci_build/github/azure-pipelines/orttraining-py-packaging-pipeline-cpu.yml @@ -123,7 +123,6 @@ stages: parameters: build_py_parameters: --enable_training enable_linux_gpu: false - enable_ubuntu_cpu: false enable_linux_cpu: false enable_windows_cpu: true enable_windows_gpu: false diff --git a/tools/ci_build/github/azure-pipelines/py-packaging-pipeline.yml b/tools/ci_build/github/azure-pipelines/py-packaging-pipeline.yml index 37fff9276f..62f84a9bb1 100644 --- a/tools/ci_build/github/azure-pipelines/py-packaging-pipeline.yml +++ b/tools/ci_build/github/azure-pipelines/py-packaging-pipeline.yml @@ -4,11 +4,6 @@ parameters: type: boolean default: true -- name: enable_ubuntu_cpu - displayName: 'Whether Ubuntu CPU (manylinux_2_27) package is built.' - type: boolean - default: true - - name: enable_linux_gpu displayName: 'Whether Linux GPU package is built.' type: boolean @@ -44,6 +39,17 @@ parameters: type: string default: '--use_azure' +# TODO: Now the Windows jobs use a different cmake build type. Consider to merge it. +- name: cmake_build_type + type: string + displayName: 'Linux packages cmake build type. Linux Only.' + default: 'Release' + values: + - Debug + - Release + - RelWithDebInfo + - MinSizeRel + trigger: none resources: @@ -58,11 +64,11 @@ stages: - template: templates/py-packaging-stage.yml parameters: enable_linux_gpu: ${{ parameters.enable_linux_gpu }} - enable_ubuntu_cpu: ${{ parameters.enable_ubuntu_cpu }} enable_linux_cpu: ${{ parameters.enable_linux_cpu }} enable_windows_cpu: ${{ parameters.enable_windows_cpu }} enable_windows_gpu: ${{ parameters.enable_windows_gpu }} enable_mac_cpu: ${{ parameters.enable_mac_cpu }} enable_mac_silicon: ${{ parameters.enable_mac_silicon }} enable_linux_arm: ${{ parameters.enable_linux_arm }} - build_py_parameters: ${{ parameters.build_py_parameters }} \ No newline at end of file + build_py_parameters: ${{ parameters.build_py_parameters }} + cmake_build_type: ${{ parameters.cmake_build_type }} \ No newline at end of file diff --git a/tools/ci_build/github/azure-pipelines/templates/py-linux-gpu.yml b/tools/ci_build/github/azure-pipelines/templates/py-linux-gpu.yml index 140c40570a..087d2cfee5 100644 --- a/tools/ci_build/github/azure-pipelines/templates/py-linux-gpu.yml +++ b/tools/ci_build/github/azure-pipelines/templates/py-linux-gpu.yml @@ -9,6 +9,15 @@ parameters: type: string default: '' +- name: cmake_build_type + type: string + default: 'Release' + values: + - Debug + - Release + - RelWithDebInfo + - MinSizeRel + jobs: - job: Linux_py_GPU_Wheels_${{ parameters.arch }} timeoutInMinutes: 240 @@ -39,7 +48,7 @@ jobs: targetType: filePath filePath: tools/ci_build/github/linux/run_python_dockerbuild.sh # please check ONNXRUNTIME_CUDA_VERSION in tools/ci_build/github/linux/build_linux_arm64_python_package.sh - arguments: -i onnxruntimecuda118xtrt86build${{ parameters.arch }} -d "GPU" -x "${{ parameters.extra_build_arg }}" + arguments: -i onnxruntimecuda118xtrt86build${{ parameters.arch }} -d "GPU" -c ${{ parameters.cmake_build_type }} -x "${{ parameters.extra_build_arg }}" - task: PublishBuildArtifacts@1 displayName: 'Publish Artifact: ONNXRuntime python wheel' diff --git a/tools/ci_build/github/azure-pipelines/templates/py-linux.yml b/tools/ci_build/github/azure-pipelines/templates/py-linux.yml index e5f262be88..fff8b8c098 100644 --- a/tools/ci_build/github/azure-pipelines/templates/py-linux.yml +++ b/tools/ci_build/github/azure-pipelines/templates/py-linux.yml @@ -17,6 +17,15 @@ parameters: - name: prepend_path type: string +- name: cmake_build_type + type: string + default: 'Release' + values: + - Debug + - Release + - RelWithDebInfo + - MinSizeRel + - name: device type: string default: 'CPU' @@ -73,7 +82,7 @@ jobs: inputs: targetType: filePath filePath: tools/ci_build/github/linux/run_python_dockerbuild.sh - arguments: -i onnxruntimecpubuildpython${{ parameters.arch }} -d "${{ parameters.device }}" -x "${{ parameters.extra_build_arg }}" + arguments: -i onnxruntimecpubuildpython${{ parameters.arch }} -d "${{ parameters.device }}" -c ${{ parameters.cmake_build_type }} -x "${{ parameters.extra_build_arg }}" ${{ if eq(parameters.with_cache, 'true') }}: env: ADDITIONAL_DOCKER_PARAMETER: "--volume $(ORT_CACHE_DIR):/cache -e CCACHE_DIR=/cache -e ORT_BUILD_WITH_CACHE=1" @@ -88,4 +97,4 @@ jobs: displayName: 'Publish Test Binaries' inputs: artifactName: 'drop-linux-cpu-${{ parameters.arch }}' - targetPath: '$(Build.BinariesDirectory)/Release' + targetPath: '$(Build.BinariesDirectory)/${{ parameters.cmake_build_type }}' diff --git a/tools/ci_build/github/azure-pipelines/templates/py-packaging-linux-test.yml b/tools/ci_build/github/azure-pipelines/templates/py-packaging-linux-test.yml index 7868941e8a..8ddc917e85 100644 --- a/tools/ci_build/github/azure-pipelines/templates/py-packaging-linux-test.yml +++ b/tools/ci_build/github/azure-pipelines/templates/py-packaging-linux-test.yml @@ -16,6 +16,17 @@ parameters: type: string default: '' + +# TODO: Ideally it should fetch information from the build that triggers it +- name: cmake_build_type + type: string + default: 'Release' + values: + - Debug + - Release + - RelWithDebInfo + - MinSizeRel + - name: timeout type: number default: 120 @@ -33,7 +44,7 @@ jobs: displayName: 'Download Pipeline Artifact' inputs: artifactName: 'drop-linux-${{ lower(parameters.device) }}-${{ parameters.arch }}' - targetPath: '$(Build.BinariesDirectory)/Release' + targetPath: '$(Build.BinariesDirectory)/${{parameters.cmake_build_type}}' # The public ADO project ${{ if eq(variables['System.CollectionId'], 'f3ad12f2-e480-4533-baf2-635c95467d29') }}: buildType: current @@ -67,7 +78,7 @@ jobs: inputs: targetType: filePath filePath: tools/ci_build/github/linux/run_python_tests.sh - arguments: -d ${{ parameters.device }} + arguments: -d ${{ parameters.device }} -c ${{parameters.cmake_build_type}} - task: mspremier.PostBuildCleanup.PostBuildCleanup-task.PostBuildCleanup@3 displayName: 'Clean Agent Directories' diff --git a/tools/ci_build/github/azure-pipelines/templates/py-packaging-stage.yml b/tools/ci_build/github/azure-pipelines/templates/py-packaging-stage.yml index e9d2402331..568ab6c8a8 100644 --- a/tools/ci_build/github/azure-pipelines/templates/py-packaging-stage.yml +++ b/tools/ci_build/github/azure-pipelines/templates/py-packaging-stage.yml @@ -10,11 +10,6 @@ parameters: type: boolean default: true -- name: enable_ubuntu_cpu - displayName: 'Whether Ubuntu CPU (manylinux_2_27) package is built.' - type: boolean - default: true - - name: enable_linux_gpu displayName: 'Whether Linux GPU package is built.' type: boolean @@ -45,6 +40,17 @@ parameters: type: boolean default: true +# TODO: Now the Windows jobs use a different cmake build type. Consider to merge it. +- name: cmake_build_type + type: string + displayName: 'Linux packages cmake build type. Linux Only.' + default: 'Release' + values: + - Debug + - Release + - RelWithDebInfo + - MinSizeRel + stages: - stage: Python_Packaging dependsOn: [] @@ -502,17 +508,19 @@ stages: ld_library_path_arg: /opt/rh/devtoolset-10/root/usr/lib64:/opt/rh/devtoolset-10/root/usr/lib:/opt/rh/devtoolset-10/root/usr/lib64/dyninst:/opt/rh/devtoolset-10/root/usr/lib/dyninst:/usr/local/lib64 prepend_path: '/opt/rh/devtoolset-10/root/usr/bin:' extra_build_arg: ${{ parameters.build_py_parameters }} + cmake_build_type: ${{ parameters.cmake_build_type }} - ${{ if eq(parameters.enable_linux_cpu, true) }}: - template: py-linux.yml parameters: arch: 'x86_64' - machine_pool: 'Azure-Pipelines-EO-Ubuntu-2004-aiinfra' + machine_pool: 'onnxruntime-Ubuntu2004-AMD-CPU' base_image: 'centos:7' devtoolset_rootpath: /opt/rh/devtoolset-11/root ld_library_path_arg: /opt/rh/devtoolset-11/root/usr/lib64:/opt/rh/devtoolset-11/root/usr/lib:/opt/rh/devtoolset-11/root/usr/lib64/dyninst:/opt/rh/devtoolset-11/root/usr/lib/dyninst:/usr/local/lib64 prepend_path: '/opt/rh/devtoolset-11/root/usr/bin:' extra_build_arg: ${{ parameters.build_py_parameters }} + cmake_build_type: ${{ parameters.cmake_build_type }} - ${{ if eq(parameters.enable_linux_gpu, true) }}: @@ -521,3 +529,4 @@ stages: arch: 'x86_64' machine_pool: 'onnxruntime-Ubuntu2004-AMD-CPU' extra_build_arg: ${{ parameters.build_py_parameters }} + cmake_build_type: ${{ parameters.cmake_build_type }} diff --git a/tools/ci_build/github/linux/build_linux_arm64_python_package.sh b/tools/ci_build/github/linux/build_linux_arm64_python_package.sh index a4cbfdeba4..58d7d32ac4 100755 --- a/tools/ci_build/github/linux/build_linux_arm64_python_package.sh +++ b/tools/ci_build/github/linux/build_linux_arm64_python_package.sh @@ -1,25 +1,37 @@ #!/bin/bash set -e -x + +# This script invokes build.py + mkdir -p /build/dist -CFLAGS="-Wp,-D_FORTIFY_SOURCE=2 -Wp,-D_GLIBCXX_ASSERTIONS -fstack-protector-strong -O3 -pipe -Wl,--strip-all" -CXXFLAGS="-Wp,-D_FORTIFY_SOURCE=2 -Wp,-D_GLIBCXX_ASSERTIONS -fstack-protector-strong -O3 -pipe -Wl,--strip-all" - -BUILD_DEVICE="CPU" -BUILD_CONFIG="Release" EXTRA_ARG="" -PYTHON_EXES=("/opt/python/cp38-cp38/bin/python3.8" "/opt/python/cp39-cp39/bin/python3.9" "/opt/python/cp310-cp310/bin/python3.10" "/opt/python/cp311-cp311/bin/python3.11") -while getopts "d:p:x:" parameter_Option +# Put 3.8 at the last because Ubuntu 20.04 use python 3.8 and we will upload the intermediate build files of this +# config to Azure DevOps Artifacts and download them to a Ubuntu 20.04 machine to run the tests. +PYTHON_EXES=("/opt/python/cp39-cp39/bin/python3.9" "/opt/python/cp310-cp310/bin/python3.10" "/opt/python/cp311-cp311/bin/python3.11" "/opt/python/cp38-cp38/bin/python3.8") +while getopts "d:p:x:c:" parameter_Option do case "${parameter_Option}" in #GPU or CPU. d) BUILD_DEVICE=${OPTARG};; p) PYTHON_EXES=(${OPTARG});; x) EXTRA_ARG=(${OPTARG});; +c) BUILD_CONFIG=${OPTARG};; esac done +BUILD_ARGS=("--build_dir" "/build" "--config" "$BUILD_CONFIG" "--update" "--build" "--skip_submodule_sync" "--parallel" "--build_wheel") + +if [ "$BUILD_CONFIG" == "Debug" ]; then + CFLAGS="-ggdb3" + CXXFLAGS="-ggdb3" +else + CFLAGS="-Wp,-D_FORTIFY_SOURCE=2 -Wp,-D_GLIBCXX_ASSERTIONS -fstack-protector-strong -O3 -pipe -Wl,--strip-all" + CXXFLAGS="-Wp,-D_FORTIFY_SOURCE=2 -Wp,-D_GLIBCXX_ASSERTIONS -fstack-protector-strong -O3 -pipe -Wl,--strip-all" + BUILD_ARGS+=("--enable_lto") +fi + # Depending on how the compiler has been configured when it was built, sometimes "gcc -dumpversion" shows the full version. GCC_VERSION=$(gcc -dumpversion | cut -d . -f 1) #-fstack-clash-protection prevents attacks based on an overlapping heap and stack. @@ -35,8 +47,6 @@ if [ "$ARCH" == "x86_64" ] && [ "$GCC_VERSION" -ge 9 ]; then CXXFLAGS="$CXXFLAGS -fcf-protection" fi -BUILD_ARGS=("--build_dir" "/build" "--config" "$BUILD_CONFIG" "--update" "--build" "--skip_submodule_sync" "--parallel" "--enable_lto" "--build_wheel") - echo "EXTRA_ARG:" echo $EXTRA_ARG diff --git a/tools/ci_build/github/linux/run_python_dockerbuild.sh b/tools/ci_build/github/linux/run_python_dockerbuild.sh index ba8269491d..18ac648282 100755 --- a/tools/ci_build/github/linux/run_python_dockerbuild.sh +++ b/tools/ci_build/github/linux/run_python_dockerbuild.sh @@ -1,11 +1,14 @@ #!/bin/bash set -e -x -while getopts "i:d:x:" parameter_Option +BUILD_CONFIG="Release" + +while getopts "i:d:x:c:" parameter_Option do case "${parameter_Option}" in i) DOCKER_IMAGE=${OPTARG};; d) DEVICE=${OPTARG};; x) BUILD_EXTR_PAR=${OPTARG};; +c) BUILD_CONFIG=${OPTARG};; esac done @@ -20,10 +23,10 @@ docker run --rm \ -e NIGHTLY_BUILD \ -e BUILD_BUILDNUMBER \ $ADDITIONAL_DOCKER_PARAMETER \ - $DOCKER_IMAGE tools/ci_build/github/linux/build_linux_arm64_python_package.sh -d $DEVICE -x $BUILD_EXTR_PAR + $DOCKER_IMAGE tools/ci_build/github/linux/build_linux_arm64_python_package.sh -d $DEVICE -c $BUILD_CONFIG -x $BUILD_EXTR_PAR -sudo rm -rf $BUILD_BINARIESDIRECTORY/Release/onnxruntime $BUILD_BINARIESDIRECTORY/Release/pybind11 \ - $BUILD_BINARIESDIRECTORY/Release/models $BUILD_BINARIESDIRECTORY/Release/_deps \ - $BUILD_BINARIESDIRECTORY/Release/CMakeFiles -cd $BUILD_BINARIESDIRECTORY/Release -find -executable -type f > $BUILD_BINARIESDIRECTORY/Release/perms.txt +sudo rm -rf $BUILD_BINARIESDIRECTORY/$BUILD_CONFIG/onnxruntime $BUILD_BINARIESDIRECTORY/$BUILD_CONFIG/pybind11 \ + $BUILD_BINARIESDIRECTORY/$BUILD_CONFIG/models $BUILD_BINARIESDIRECTORY/$BUILD_CONFIG/_deps \ + $BUILD_BINARIESDIRECTORY/$BUILD_CONFIG/CMakeFiles +cd $BUILD_BINARIESDIRECTORY/$BUILD_CONFIG +find -executable -type f > $BUILD_BINARIESDIRECTORY/$BUILD_CONFIG/perms.txt diff --git a/tools/ci_build/github/linux/run_python_tests.sh b/tools/ci_build/github/linux/run_python_tests.sh index 26df77bd1d..90362a3315 100755 --- a/tools/ci_build/github/linux/run_python_tests.sh +++ b/tools/ci_build/github/linux/run_python_tests.sh @@ -6,11 +6,12 @@ set -e -x BUILD_DEVICE="CPU" BUILD_CONFIG="Release" -while getopts "d:" parameter_Option +while getopts "d:c:" parameter_Option do case "${parameter_Option}" in #GPU or CPU. d) BUILD_DEVICE=${OPTARG};; +c) BUILD_CONFIG=${OPTARG};; esac done @@ -24,6 +25,8 @@ echo "Package name:$PYTHON_PACKAGE_NAME" BUILD_ARGS="--build_dir $BUILD_BINARIESDIRECTORY --config $BUILD_CONFIG --test --skip_submodule_sync --parallel --enable_lto --build_wheel " +ARCH=$(uname -m) + if [ $ARCH == "x86_64" ]; then #ARM build machines do not have the test data yet. BUILD_ARGS="$BUILD_ARGS --enable_onnx_tests" @@ -35,13 +38,16 @@ fi sudo rm -rf /build /onnxruntime_src sudo ln -s $BUILD_SOURCESDIRECTORY /onnxruntime_src python3 -m pip uninstall -y $PYTHON_PACKAGE_NAME ort-nightly-gpu ort-nightly onnxruntime onnxruntime-gpu onnxruntime-training onnxruntime-directml ort-nightly-directml onnx -qq -cp $BUILD_SOURCESDIRECTORY/tools/ci_build/github/linux/docker/scripts/manylinux/requirements.txt $BUILD_BINARIESDIRECTORY/requirements.txt -# Test ORT with the latest ONNX release. -sed -i "s/git+http:\/\/github\.com\/onnx\/onnx.*/onnx/" $BUILD_BINARIESDIRECTORY/requirements.txt -python3 -m pip install -r $BUILD_BINARIESDIRECTORY/requirements.txt -python3 -m pip install --find-links $BUILD_BINARIESDIRECTORY/whl $PYTHON_PACKAGE_NAME +# Install the packages that are needed for installing the onnxruntime python package +python3 -m pip install -r $BUILD_BINARIESDIRECTORY/$BUILD_CONFIG/requirements.txt +# Install the packages that are needed for running test scripts +# Install the latest ONNX release which may contain not fixed bugs. However, it is what most people use. +python3 -m pip install onnx pytest +# The "--no-index" flag is crucial. The local whl folder is just an additional source. Pypi's doc says "there is no +# ordering in the locations that are searched" if we don't disable the default one with "--no-index" +python3 -m pip install --no-index --find-links $BUILD_BINARIESDIRECTORY/whl $PYTHON_PACKAGE_NAME ln -s /data/models $BUILD_BINARIESDIRECTORY cd $BUILD_BINARIESDIRECTORY/$BUILD_CONFIG # Restore file permissions -xargs -a $BUILD_BINARIESDIRECTORY/$BUILD_CONFIG/perms.txt chmod a+x +xargs -a perms.txt chmod a+x python3 $BUILD_SOURCESDIRECTORY/tools/ci_build/build.py $BUILD_ARGS --ctest_path ''