[ROCm] add rocm python package pipeline with --use_rocm_profiling (#13068)

### Description
<!-- Describe your changes. -->

ROCm developers always need to build onnxruntime *whl with
`--enable_rocm_profiling`.
Add a ROCm dev python package pipeline which product *.whl with build
args `--enable_rocm_profiling`.
The dev *whl need to upload to azure storage and can get from
https://download.onnxruntime.ai/onnxruntime_nightly_rocm53.profiling.html


### Motivation and Context
<!-- - Why is this change required? What problem does it solve?
- If it fixes an open issue, please link to the issue here. -->
This commit is contained in:
PeixuanZuo 2022-10-17 10:11:20 +08:00 committed by GitHub
parent c4d3c7003f
commit b4853a978a
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
4 changed files with 56 additions and 10 deletions

View file

@ -445,6 +445,7 @@ requirements_file = "requirements.txt"
local_version = None
enable_training = parse_arg_remove_boolean(sys.argv, "--enable_training")
enable_training_on_device = parse_arg_remove_boolean(sys.argv, "--enable_training_on_device")
enable_rocm_profiling = parse_arg_remove_boolean(sys.argv, "--enable_rocm_profiling")
disable_auditwheel_repair = parse_arg_remove_boolean(sys.argv, "--disable_auditwheel_repair")
default_training_package_device = parse_arg_remove_boolean(sys.argv, "--default_training_package_device")
@ -611,6 +612,8 @@ if nightly_build:
if local_version:
version_number = version_number + local_version
if is_rocm and enable_rocm_profiling:
version_number = version_number + ".profiling"
if wheel_name_suffix:
if not (enable_training and wheel_name_suffix == "gpu"):

View file

@ -2053,6 +2053,7 @@ def build_python_wheel(
use_ninja=False,
build_eager_mode=False,
enable_training_on_device=False,
enable_rocm_profiling=False,
):
for config in configs:
cwd = get_config_build_dir(build_dir, config)
@ -2074,6 +2075,8 @@ def build_python_wheel(
args.append("--enable_training_on_device")
if build_eager_mode:
args.append("--disable_auditwheel_repair")
if enable_rocm_profiling:
args.append("--enable_rocm_profiling")
# The following arguments are mutually exclusive
if use_cuda:
@ -2776,6 +2779,7 @@ def main():
use_ninja=(args.cmake_generator == "Ninja"),
build_eager_mode=args.build_eager_mode,
enable_training_on_device=args.enable_training_on_device,
enable_rocm_profiling=args.enable_rocm_profiling,
)
if args.build_nuget:
build_nuget_package(

View file

@ -35,3 +35,18 @@ stages:
parameters:
PythonVersion: '3.9'
RocmVersion: '5.3'
- template: templates/rocm.yml
parameters:
PythonVersion: '3.7'
RocmVersion: '5.3'
BuildConfig: 'RelWithDebInfo'
- template: templates/rocm.yml
parameters:
PythonVersion: '3.8'
RocmVersion: '5.3'
BuildConfig: 'RelWithDebInfo'
- template: templates/rocm.yml
parameters:
PythonVersion: '3.9'
RocmVersion: '5.3'
BuildConfig: 'RelWithDebInfo'

View file

@ -5,18 +5,39 @@ parameters:
- name: RocmVersion
type: string
- name: BuildConfig
type: string
default: 'Release'
jobs:
- job: wheels_python_${{ replace(parameters.PythonVersion,'.','_') }}_rocm_${{ replace(parameters.RocmVersion,'.','_') }}
- job: wheels_python_${{ replace(parameters.PythonVersion,'.','_') }}_rocm_${{ replace(parameters.RocmVersion,'.','_') }}_${{ parameters.BuildConfig }}
workspace:
clean: all
timeoutInMinutes: 180
pool: Ubuntu-2004-rocm-aiinfra
variables:
PythonVersion: ${{ parameters.PythonVersion }}
- name: PythonVersion
value: ${{ parameters.PythonVersion }}
- name: EnableProfiling
${{ if eq(parameters.BuildConfig, 'Release') }}:
value: ''
${{ else }}:
value: '--enable_rocm_profiling'
- name: ArtifactName
${{ if eq(parameters.BuildConfig, 'Release') }}:
value: 'onnxruntime_rocm'
${{ else }}:
value: 'onnxruntime_rocm_enable_profiling'
steps:
- task: CmdLine@2
displayName: 'check variables'
inputs:
script: |
echo "BuildConfig is "${{ parameters.BuildConfig }} && \
echo "EnableProfiling is "${{ variables['EnableProfiling'] }} && \
echo "ArtifactName is "${{ variables['ArtifactName'] }}
- checkout: self
clean: true
submodules: recursive
@ -34,6 +55,7 @@ jobs:
--build-arg PREPEND_PATH=/opt/rh/devtoolset-10/root/usr/bin:
--build-arg LD_LIBRARY_PATH_ARG=/opt/rh/devtoolset-10/root/usr/lib64:/opt/rh/devtoolset-10/root/usr/lib:/opt/rh/devtoolset-10/root/usr/lib64/dyninst:/opt/rh/devtoolset-10/root/usr/lib/dyninst:/usr/local/lib64:/usr/local/lib
Repository: onnxruntimetrainingrocmbuild-rocm${{ parameters.RocmVersion }}
- task: CmdLine@2
inputs:
script: |
@ -53,7 +75,7 @@ jobs:
--user onnxruntimedev \
onnxruntimetrainingrocmbuild-rocm${{ parameters.RocmVersion }} \
/onnxruntime_src/tools/ci_build/build.py \
--config Release \
--config ${{ parameters.BuildConfig }} \
--use_rocm \
--rocm_version=${{ parameters.RocmVersion }} \
--rocm_home=/opt/rocm \
@ -68,7 +90,8 @@ jobs:
--cmake_extra_defines \
CMAKE_HIP_COMPILER=/opt/rocm/llvm/bin/clang++ \
onnxruntime_BUILD_UNIT_TESTS=OFF \
--enable_training_torch_interop
--enable_training_torch_interop \
${{ variables['EnableProfiling'] }}
workingDirectory: $(Build.SourcesDirectory)
displayName: 'Build onnxruntime (in container)'
@ -85,18 +108,19 @@ jobs:
render_gid=$(getent group | awk '/render/ {split($0,a,":"); print(a[3])}')
echo "Found render_gid=$render_gid; attempting to set as pipeline variable"
echo "##vso[task.setvariable variable=render]$render_gid"
condition: and(succeeded(), eq('${{ parameters.BuildConfig }}', 'Release'))
displayName: 'Find video and render gid to be mapped into container'
- task: CopyFiles@2
displayName: 'Copy Python Wheel to: $(Build.ArtifactStagingDirectory)'
inputs:
SourceFolder: '$(Build.BinariesDirectory)'
Contents: 'Release/dist/*.whl'
Contents: "${{ parameters.BuildConfig }}/dist/*.whl"
TargetFolder: '$(Build.ArtifactStagingDirectory)'
- task: CmdLine@2
displayName: 'Build Python Documentation'
condition: and(succeeded(), ne(variables['PythonVersion'], '3.9')) # tensorflow not available on python 3.9
condition: and(succeeded(), ne('${{ parameters.PythonVersion }}', '3.9'), eq('${{ parameters.BuildConfig }}', 'Release')) # tensorflow not available on python 3.9
inputs:
script: |
mkdir -p $HOME/.onnx
@ -123,7 +147,7 @@ jobs:
- task: CopyFiles@2
displayName: 'Copy Python Documentation to: $(Build.ArtifactStagingDirectory)'
condition: and(succeeded(), ne(variables['PythonVersion'], '3.9')) # tensorflow not available on python 3.9
condition: and(succeeded(), ne('${{ parameters.PythonVersion }}', '3.9'), eq('${{ parameters.BuildConfig }}', 'Release')) # tensorflow not available on python 3.9
inputs:
SourceFolder: '$(Build.BinariesDirectory)/docs/training/html'
Contents: '**'
@ -132,10 +156,10 @@ jobs:
- task: PublishBuildArtifacts@1
displayName: 'Upload Rocm wheel as build artifact'
inputs:
ArtifactName: onnxruntime_rocm
ArtifactName: ${{ variables['ArtifactName'] }}
- script: |
files=($(Build.ArtifactStagingDirectory)/Release/dist/*.whl) && \
files=($(Build.ArtifactStagingDirectory)/${{ parameters.BuildConfig }}/dist/*.whl) && \
echo ${files[0]} && \
python3 tools/ci_build/upload_python_package_to_azure_storage.py \
--python_wheel_path ${files[0]} \