onnxruntime/tools/ci_build/github/azure-pipelines/templates/py-packaging-linux-test-cuda.yml
Yifan Li bb76ead96c
[TensorRT EP] support TensorRT 10.2-GA (#21395)
### Description
<!-- Describe your changes. -->
* promote trt version to 10.2.0.19
* EP_Perf CI: clean config of legacy TRT<8.6, promote test env to
trt10.2-cu118/cu125
* skip two tests as Float8/BF16 are supported by TRT>10.0 but TRT CIs
are not hardware-compatible on these:
 ```
 1: [  FAILED  ] 2 tests, listed below:
 1: [  FAILED  ] IsInfTest.test_isinf_bfloat16
 1: [  FAILED  ] IsInfTest.test_Float8E4M3FN
 ```

### Motivation and Context
<!-- - Why is this change required? What problem does it solve?
- If it fixes an open issue, please link to the issue here. -->
2024-07-18 12:11:52 -07:00

107 lines
3.3 KiB
YAML

parameters:
- name: arch
type: string
- name: machine_pool
type: string
- name: extra_job_id
type: string
default: ''
- name: python_wheel_suffix
type: string
default: ''
- name: docker_base_image
type: string
- name: trt_version
type: string
default: '10.2.0.19-1.cuda11.8'
values:
- 10.2.0.19-1.cuda11.8
- 10.2.0.19-1.cuda12.5
- name: cuda_version
type: string
default: '11.8'
values:
- 11.8
- 12.2
# TODO: Ideally it should fetch information from the build that triggers it
- name: cmake_build_type
type: string
default: 'Release'
values:
- Debug
- Release
- RelWithDebInfo
- MinSizeRel
- name: timeout
type: number
default: 120
jobs:
- job: Linux_Test_GPU${{ parameters.extra_job_id }}_${{ parameters.arch }}
timeoutInMinutes: ${{ parameters.timeout }}
variables:
skipComponentGovernanceDetection: true
workspace:
clean: all
pool: ${{ parameters.machine_pool }}
steps:
- checkout: self
clean: true
submodules: none
# The public ADO project
# - ${{ if eq(variables['System.CollectionId'], 'f3ad12f2-e480-4533-baf2-635c95467d29') }}:
# The private ADO project
- ${{ if eq(variables['System.CollectionId'], 'bc038106-a83b-4dab-9dd3-5a41bc58f34c') }}:
- download: build # pipeline resource identifier.
artifact: 'drop-linux-gpu-${{ parameters.arch }}'
- download: build # pipeline resource identifier.
artifact: 'onnxruntime${{ parameters.python_wheel_suffix }}'
- bash: |
set -e -x
ls $(Pipeline.Workspace)/build
mv "$(Pipeline.Workspace)/build/drop-linux-gpu-${{ parameters.arch }}" $(Build.BinariesDirectory)/${{parameters.cmake_build_type}}
mv "$(Pipeline.Workspace)/build/onnxruntime${{ parameters.python_wheel_suffix }}" "$(Build.BinariesDirectory)/whl"
cp -r "$(Build.BinariesDirectory)/whl" $(Build.BinariesDirectory)/tmp
find "$(Build.BinariesDirectory)/tmp" -name '*.whl' -exec bash -c 'unzip -d "${1%.*}" "$1"' _ {} \;
# The BinSkim task uses a dotnet program which doesn't support ARM CPUs yet
- ${{ if eq(parameters.arch, 'x86_64') }}:
- task: BinSkim@4
displayName: 'Run BinSkim'
inputs:
AnalyzeTargetGlob: '$(Build.BinariesDirectory)/tmp/**/*.so'
continueOnError: true
#- task: PostAnalysis@2
# inputs:
# GdnBreakAllTools: true
# GdnBreakPolicy: M365
# GdnBreakPolicyMinSev: Error
- template: get-docker-image-steps.yml
parameters:
Dockerfile: tools/ci_build/github/linux/docker/inference/${{ parameters.arch }}/python/cuda/Dockerfile
Context: tools/ci_build/github/linux/docker/inference/${{ parameters.arch }}/python/cuda
DockerBuildArgs: "--build-arg BASEIMAGE=${{ parameters.docker_base_image }} --build-arg TRT_VERSION=${{ parameters.trt_version }} --build-arg BUILD_UID=$( id -u )"
Repository: onnxruntimecuda${{ replace(parameters.cuda_version, '.', '') }}xtrt86build${{ parameters.arch }}
- task: Bash@3
displayName: 'Bash Script'
inputs:
targetType: filePath
filePath: tools/ci_build/github/linux/run_python_dockertest.sh
arguments: -d GPU -c ${{parameters.cmake_build_type}} -i onnxruntimecuda118xtrt86build${{ parameters.arch }}
- task: mspremier.PostBuildCleanup.PostBuildCleanup-task.PostBuildCleanup@3
displayName: 'Clean Agent Directories'
condition: always()