onnxruntime/tools/ci_build/github/azure-pipelines/templates/rocm.yml
PeixuanZuo 446aa986a1
[ROCm] Extend the Pipeline restriction time (#21158)
ROCm EP builds are taking longer.
2024-06-27 15:36:04 +08:00

148 lines
6.3 KiB
YAML

parameters:
- name: PythonVersion
type: string
- name: RocmVersion
type: string
- name: RocmVersionPatchSuffix
type: string
default: ''
- name: BuildConfig
type: string
default: 'Release'
jobs:
- job: wheels_python_${{ replace(parameters.PythonVersion,'.','_') }}_rocm_${{ replace(parameters.RocmVersion,'.','_') }}_${{ parameters.BuildConfig }}
workspace:
clean: all
timeoutInMinutes: 360
pool: Ubuntu-2204-rocm-aiinfra
variables:
- name: PythonVersion
value: ${{ parameters.PythonVersion }}
- name: EnableProfiling
${{ if eq(parameters.BuildConfig, 'Release') }}:
value: ''
${{ else }}:
value: '--enable_rocm_profiling'
- name: ArtifactName
${{ if eq(parameters.BuildConfig, 'Release') }}:
value: 'onnxruntime_rocm'
${{ else }}:
value: 'onnxruntime_rocm_Debug'
steps:
- task: CmdLine@2
displayName: 'check variables'
inputs:
script: |
echo "BuildConfig is "${{ parameters.BuildConfig }} && \
echo "EnableProfiling is "${{ variables['EnableProfiling'] }} && \
echo "ArtifactName is "${{ variables['ArtifactName'] }}
- checkout: self
clean: true
submodules: recursive
- template: set-python-manylinux-variables-step.yml
- template: get-docker-image-steps.yml
parameters:
Dockerfile: tools/ci_build/github/linux/docker/Dockerfile.manylinux2_28_rocm
Context: tools/ci_build/github/linux/docker
CheckOutManyLinux: true
DockerBuildArgs: >-
--build-arg INSTALL_DEPS_EXTRA_ARGS=-tmur
--build-arg BUILD_UID=$(id -u)
--network=host --build-arg POLICY=manylinux_2_28 --build-arg PLATFORM=x86_64
--build-arg DEVTOOLSET_ROOTPATH=/opt/rh/gcc-toolset-12/root
--build-arg PREPEND_PATH=/opt/rh/gcc-toolset-12/root/usr/bin:
--build-arg LD_LIBRARY_PATH_ARG=/opt/rh/gcc-toolset-12/root/usr/lib64:/opt/rh/gcc-toolset-12/root/usr/lib:/opt/rh/gcc-toolset-12/root/usr/lib64/dyninst:/opt/rh/gcc-toolset-12/root/usr/lib/dyninst:/usr/local/lib64:/usr/local/lib
--build-arg ROCM_VERSION=${{ parameters.RocmVersion }}${{ parameters.RocmVersionPatchSuffix }}
Repository: onnxruntimetrainingrocmbuild-rocm${{ parameters.RocmVersion }}
- task: CmdLine@2
inputs:
script: |
docker run --rm \
--privileged \
--ipc=host \
--network=host \
--cap-add=SYS_PTRACE \
--security-opt seccomp=unconfined \
-e CC=/opt/rh/gcc-toolset-12/root/usr/bin/cc -e CXX=/opt/rh/gcc-toolset-12/root/usr/bin/c++ -e CFLAGS="-Wp,-D_FORTIFY_SOURCE=2 -Wp,-D_GLIBCXX_ASSERTIONS -fstack-protector-strong -fstack-clash-protection -fcf-protection -O3 -Wl,--strip-all" -e CXXFLAGS="-Wp,-D_FORTIFY_SOURCE=2 -Wp,-D_GLIBCXX_ASSERTIONS -fstack-protector-strong -fstack-clash-protection -fcf-protection -O3 -Wl,--strip-all" \
--volume $(Build.SourcesDirectory):/onnxruntime_src \
--volume $(Build.BinariesDirectory):/build \
--workdir /onnxruntime_src \
--entrypoint $(PythonManylinuxDir)/bin/python3 \
-e NIGHTLY_BUILD \
-e BUILD_BUILDNUMBER \
-e ORT_DISABLE_PYTHON_PACKAGE_LOCAL_VERSION \
--user onnxruntimedev \
onnxruntimetrainingrocmbuild-rocm${{ parameters.RocmVersion }} \
/onnxruntime_src/tools/ci_build/build.py \
--config ${{ parameters.BuildConfig }} \
--use_rocm \
--use_migraphx \
--rocm_version=${{ parameters.RocmVersion }} \
--rocm_home=/opt/rocm \
--nccl_home=/opt/rocm \
--update \
--parallel \
--build_dir /build \
--build \
--build_wheel \
--skip_tests \
--enable_training \
--cmake_extra_defines \
CMAKE_HIP_COMPILER=/opt/rocm/llvm/bin/clang++ \
onnxruntime_BUILD_UNIT_TESTS=OFF FETCHCONTENT_TRY_FIND_PACKAGE_MODE=NEVER \
${{ variables['EnableProfiling'] }}
workingDirectory: $(Build.SourcesDirectory)
displayName: 'Build onnxruntime (in container)'
# All UTs were here are now covered in AMD CI - see orttraining-pai-ci-pipeline.yml
# This CI is mainly responsible for packaging. The uploaded whl could be used in the downstream CIs (if any).
# For example, docker image build (e.g., PTCA), reporting CI, etc. to further verify the whl as needed.
# To view the UTs disabled from this CI - see https://github.com/microsoft/onnxruntime/pull/11945 for examples
- script: |-
# Do not output ##vso[] commands with `set -x` or they may be parsed again and include a trailing quote.
set +x
echo "Tests will run using HIP_VISIBLES_DEVICES=$HIP_VISIBLE_DEVICES"
video_gid=$(getent group | awk '/video/ {split($0,a,":"); print(a[3])}')
echo "Found video_gid=$video_gid; attempting to set as pipeline variable"
echo "##vso[task.setvariable variable=video]$video_gid"
render_gid=$(getent group | awk '/render/ {split($0,a,":"); print(a[3])}')
echo "Found render_gid=$render_gid; attempting to set as pipeline variable"
echo "##vso[task.setvariable variable=render]$render_gid"
condition: and(succeeded(), eq('${{ parameters.BuildConfig }}', 'Release'))
displayName: 'Find video and render gid to be mapped into container'
- task: CopyFiles@2
displayName: 'Copy Python Wheel to: $(Build.ArtifactStagingDirectory)'
inputs:
SourceFolder: '$(Build.BinariesDirectory)'
Contents: "${{ parameters.BuildConfig }}/dist/*.whl"
TargetFolder: '$(Build.ArtifactStagingDirectory)'
- task: PublishBuildArtifacts@1
displayName: 'Upload Rocm wheel as build artifact'
inputs:
ArtifactName: ${{ variables['ArtifactName'] }}
- script: |
files=($(Build.ArtifactStagingDirectory)/${{ parameters.BuildConfig }}/dist/*.whl) && \
echo ${files[0]} && \
python3 tools/ci_build/upload_python_package_to_azure_storage.py \
--python_wheel_path ${files[0]} \
--final_storage
condition: and(ne(variables['ORT_DISABLE_PYTHON_PACKAGE_LOCAL_VERSION'], 'true'), and(succeeded(), eq(variables['DRY_RUN'], '0')))
displayName: 'Upload Rocm wheel to release repository'
- template: component-governance-component-detection-steps.yml
parameters:
condition: 'succeeded'
- template: clean-agent-build-directory-step.yml