From 6d7519ede8298a422e84e70bfdf01cc46fbf76c3 Mon Sep 17 00:00:00 2001 From: Jian Chen Date: Mon, 18 Dec 2023 21:13:03 -0500 Subject: [PATCH] Adding new pipeline for python cuda testing (#18718) ### Description ### Motivation and Context --- .../py-cuda-package-test-pipeline.yml | 35 ++++++ .../jobs/py-linux-cuda-package-test-job.yml | 118 ++++++++++++++++++ .../ci_build/github/linux/run_python_tests.sh | 4 +- 3 files changed, 156 insertions(+), 1 deletion(-) create mode 100644 tools/ci_build/github/azure-pipelines/py-cuda-package-test-pipeline.yml create mode 100644 tools/ci_build/github/azure-pipelines/stages/jobs/py-linux-cuda-package-test-job.yml diff --git a/tools/ci_build/github/azure-pipelines/py-cuda-package-test-pipeline.yml b/tools/ci_build/github/azure-pipelines/py-cuda-package-test-pipeline.yml new file mode 100644 index 0000000000..d852e1132e --- /dev/null +++ b/tools/ci_build/github/azure-pipelines/py-cuda-package-test-pipeline.yml @@ -0,0 +1,35 @@ +parameters: + - name: build_id + type: string + default: 'latest' + - name: project + type: string + default: 'Lotus' + - name: pipeline + type: string + default: 'Python-CUDA-Packaging-Pipeline' + +resources: + repositories: + - repository: manylinux + type: Github + endpoint: Microsoft + name: pypa/manylinux + ref: 5eda9aded5462201e6310105728d33016e637ea7 + +stages: + # ****The following Stage depend on all previous tags. *** + # GPU resources are very limited, + # To utilize gpu resource more efficiently, run GPU job only after all cpus jobs succeed + - stage: Linux_Test_GPU_x86_64_stage + dependsOn: + jobs: + - template: stages/jobs/py-linux-cuda-package-test-job.yml + parameters: + CudaVersion: '12.2' + machine_pool: 'Onnxruntime-Linux-GPU' + timeout: 480 + build_id: ${{ parameters.build_id }} + project: ${{ parameters.project }} + pipeline: ${{ parameters.pipeline }} + diff --git a/tools/ci_build/github/azure-pipelines/stages/jobs/py-linux-cuda-package-test-job.yml b/tools/ci_build/github/azure-pipelines/stages/jobs/py-linux-cuda-package-test-job.yml new file mode 100644 index 0000000000..1a6e07ef00 --- /dev/null +++ b/tools/ci_build/github/azure-pipelines/stages/jobs/py-linux-cuda-package-test-job.yml @@ -0,0 +1,118 @@ +parameters: + - name: CudaVersion + displayName: 'CUDA version' + type: string + default: '11.8' + values: + - 11.8 + - 12.2 + - name: machine_pool + type: string + + - name: timeout + type: number + default: 120 + - name: build_id + type: string + default: 'latest' + - name: project + type: string + default: 'Lotus' + - name: pipeline + type: string + default: 'Python-CUDA-Packaging-Pipeline' + - name: dependencies + type: string + default: 'none' + # TODO: Ideally it should fetch information from the build that triggers it + - name: cmake_build_type + type: string + default: 'Release' + values: + - Debug + - Release + - RelWithDebInfo + - MinSizeRel + +jobs: + - job: Linux_Python_CUDA_Package_Test + ${{ if ne(parameters.dependencies, 'none') }}: + dependsOn: ${{ parameters.dependencies }} + ${{ if eq(parameters.dependencies, 'none') }}: + dependsOn: [ ] + timeoutInMinutes: ${{ parameters.timeout }} + variables: + - name: docker_base_image + ${{ if eq(parameters.CudaVersion, '11.8') }}: + value: nvidia/cuda:11.8.0-cudnn8-devel-ubi8 + ${{ if eq(parameters.CudaVersion, '12.2') }}: + value: nvidia/cuda:12.2.2-cudnn8-devel-ubi8 + - name: linux_trt_version + ${{ if eq(parameters.CudaVersion, '11.8') }}: + value: 8.6.1.6-1.cuda11.8 + ${{ if eq(parameters.CudaVersion, '12.2') }}: + value: 8.6.1.6-1.cuda12.0 + pool: ${{ parameters.machine_pool }} + steps: + - checkout: self + - task: DownloadPipelineArtifact@2 + inputs: + artifact: 'drop-linux-gpu-x86_64' + targetPath: '$(Build.SourcesDirectory)/drop-linux-gpu-x86_64' + ${{ if ne(parameters.build_id, 'latest') }}: + buildType: 'specific' + project: '${{ parameters.project }}' + pipeline: '${{ parameters.pipeline }}' + buildVersionToDownload: 'specific' + buildId: '${{ parameters.build_id }}' + displayName: 'Download Build Artifacts - drop-linux-gpu-x86_64' + + - task: DownloadPipelineArtifact@2 + inputs: + artifact: 'onnxruntime_gpu' + targetPath: '$(Build.SourcesDirectory)/onnxruntime_gpu' + ${{ if ne(parameters.build_id, 'latest') }}: + buildType: 'specific' + project: '${{ parameters.project }}' + pipeline: '${{ parameters.pipeline }}' + buildVersionToDownload: 'specific' + buildId: '${{ parameters.build_id }}' + displayName: 'Download Build Artifacts - onnxruntime_gpu' + + - bash: | + set -e -x + ls $(Build.SourcesDirectory) + mv "$(Build.SourcesDirectory)/drop-linux-gpu-x86_64" $(Build.BinariesDirectory)/${{parameters.cmake_build_type}} + mv "$(Build.SourcesDirectory)/onnxruntime_gpu" "$(Build.BinariesDirectory)/whl" + cp -r "$(Build.BinariesDirectory)/whl" $(Build.BinariesDirectory)/tmp + find "$(Build.BinariesDirectory)/tmp" -name '*.whl' -exec bash -c 'unzip -d "${1%.*}" "$1"' _ {} \; + displayName: 'Prepare artifacts' + + - task: BinSkim@4 + displayName: 'Run BinSkim' + inputs: + AnalyzeTargetGlob: '$(Build.BinariesDirectory)/tmp/**/*.so' + continueOnError: true + + - template: ../../templates/get-docker-image-steps.yml + parameters: + Dockerfile: tools/ci_build/github/linux/docker/Dockerfile.manylinux2_28_cuda + Context: tools/ci_build/github/linux/docker + DockerBuildArgs: " + --network=host + --build-arg BASEIMAGE=${{ variables.docker_base_image }} + --build-arg TRT_VERSION=${{ variables.linux_trt_version }} + --build-arg BUILD_UID=$( id -u ) + --build-arg PLATFORM=x86_64 + " + Repository: onnxruntimecuda${{ replace(parameters.CudaVersion, '.', '') }}xtrt86buildx86_64 + + - task: Bash@3 + displayName: 'Run Python Docker Test' + inputs: + targetType: filePath + filePath: tools/ci_build/github/linux/run_python_dockertest.sh + arguments: -d GPU -c ${{parameters.cmake_build_type}} -i onnxruntimecuda${{ replace(parameters.CudaVersion, '.', '') }}xtrt86buildx86_64 -u 12.2 + - template: ../../templates/component-governance-component-detection-steps.yml + parameters: + condition: 'succeeded' \ No newline at end of file diff --git a/tools/ci_build/github/linux/run_python_tests.sh b/tools/ci_build/github/linux/run_python_tests.sh index f080c7e8c3..3164a10a09 100755 --- a/tools/ci_build/github/linux/run_python_tests.sh +++ b/tools/ci_build/github/linux/run_python_tests.sh @@ -33,7 +33,9 @@ if [ $ARCH == "x86_64" ]; then BUILD_ARGS="$BUILD_ARGS --enable_onnx_tests" fi if [ $BUILD_DEVICE == "GPU" ]; then - BUILD_ARGS="$BUILD_ARGS --use_cuda --use_tensorrt --cuda_version=11.8 --tensorrt_home=/usr --cuda_home=/usr/local/cuda-11.8 --cudnn_home=/usr/local/cuda-11.8" + SHORT_CUDA_VERSION=$(echo $CUDA_VERSION | sed 's/\([[:digit:]]\+\.[[:digit:]]\+\)\.[[:digit:]]\+/\1/') + + BUILD_ARGS="$BUILD_ARGS --use_cuda --use_tensorrt --cuda_version=$SHORT_CUDA_VERSION --tensorrt_home=/usr --cuda_home=/usr/local/cuda-$SHORT_CUDA_VERSION --cudnn_home=/usr/local/cuda-$SHORT_CUDA_VERSION" fi # We assume the machine doesn't have gcc and python development header files, so we don't build onnxruntime from source python3 -m pip install --upgrade pip