From da2bd3ad4df3edcf2d6b849bba4e4372d4d2d48f Mon Sep 17 00:00:00 2001 From: PeixuanZuo <94887879+PeixuanZuo@users.noreply.github.com> Date: Mon, 21 Nov 2022 10:04:10 +0800 Subject: [PATCH] [ROCm] Build ROCm CI with Release config and enable kernel explorer test (#13687) ### Description 1. Build ROCm CI with Release config to save time. 2. use 32 threads to build, we have 256 threads on new CI machine. 3. enable ROCm kernel explorer test. ### Motivation and Context Co-authored-by: peixuanzuo --- .../linux-migraphx-ci-pipeline.yml | 6 +-- .../orttraining-pai-ci-pipeline.yml | 39 ++++++++++++------- 2 files changed, 27 insertions(+), 18 deletions(-) diff --git a/tools/ci_build/github/azure-pipelines/linux-migraphx-ci-pipeline.yml b/tools/ci_build/github/azure-pipelines/linux-migraphx-ci-pipeline.yml index f5c85e52d7..91d6a3b013 100644 --- a/tools/ci_build/github/azure-pipelines/linux-migraphx-ci-pipeline.yml +++ b/tools/ci_build/github/azure-pipelines/linux-migraphx-ci-pipeline.yml @@ -46,7 +46,7 @@ jobs: --workdir /onnxruntime_src \ onnxruntimetrainingmigraphx-cibuild-rocm$(RocmVersion) \ python tools/ci_build/build.py \ - --config RelWithDebInfo \ + --config Release \ --cmake_extra_defines \ CMAKE_HIP_COMPILER=/opt/rocm/llvm/bin/clang++ \ --mpi_home /opt/ompi \ @@ -57,7 +57,7 @@ jobs: --update \ --build_dir /build \ --build \ - --parallel 16 \ + --parallel 32 \ --build_wheel \ --skip_submodule_sync \ --skip_tests @@ -79,7 +79,7 @@ jobs: --user $UID:$(id -g $USER) \ --volume $(Build.SourcesDirectory):/onnxruntime_src \ --volume $(Build.BinariesDirectory):/build \ - --workdir /build/RelWithDebInfo \ + --workdir /build/Release \ onnxruntimetrainingmigraphx-cibuild-rocm$(RocmVersion) \ /onnxruntime_src/tools/ci_build/github/pai/migraphx_test_launcher.sh workingDirectory: $(Build.SourcesDirectory) diff --git a/tools/ci_build/github/azure-pipelines/orttraining-pai-ci-pipeline.yml b/tools/ci_build/github/azure-pipelines/orttraining-pai-ci-pipeline.yml index 99f517e68e..7344b909d7 100644 --- a/tools/ci_build/github/azure-pipelines/orttraining-pai-ci-pipeline.yml +++ b/tools/ci_build/github/azure-pipelines/orttraining-pai-ci-pipeline.yml @@ -51,12 +51,13 @@ jobs: script: |- export ROCM_HOME=/opt/rocm python tools/ci_build/build.py \ - --config RelWithDebInfo \ + --config Release \ --enable_training \ --enable_training_torch_interop \ --mpi_home /opt/ompi \ --cmake_extra_defines \ CMAKE_HIP_COMPILER=${ROCM_HOME}/llvm/bin/clang++ \ + onnxruntime_BUILD_KERNEL_EXPLORER=ON \ --use_rocm \ --rocm_version=$(RocmVersion) \ --rocm_home ${ROCM_HOME} \ @@ -64,7 +65,7 @@ jobs: --update \ --build_dir ./build \ --build \ - --parallel 16 \ + --parallel 32 \ --build_wheel \ --skip_tests displayName: 'Build onnxruntime' @@ -76,14 +77,22 @@ jobs: - task: CmdLine@2 inputs: script: |- - cd ./build/RelWithDebInfo &&\ + cd ./build/Release &&\ ../../tools/ci_build/github/pai/pai_test_launcher.sh displayName: 'Run onnxruntime unit tests' - task: CmdLine@2 inputs: script: |- - cd ./build/RelWithDebInfo + export KERNEL_EXPLORER_BUILD_DIR=./build/Release + pytest ./onnxruntime/python/tools/kernel_explorer/ + displayName: 'Run kernel explorer tests' + condition: and(succeededOrFailed(), eq(variables.onnxruntimeBuildSucceeded, 'true')) + + - task: CmdLine@2 + inputs: + script: |- + cd ./build/Release export PYTHONPATH=$PWD python -m onnxruntime.training.ortmodule.torch_cpp_extensions.install displayName: 'Compile torch extensions into build directory' @@ -92,7 +101,7 @@ jobs: - task: CmdLine@2 inputs: script: |- - cd ./build/RelWithDebInfo + cd ./build/Release export PYTHONPATH=$PWD export ORTMODULE_ONNX_OPSET_VERSION=15 python \ @@ -120,7 +129,7 @@ jobs: - task: CmdLine@2 inputs: script: |- - cd ./build/RelWithDebInfo + cd ./build/Release export PYTHONPATH=$PWD export ORTMODULE_ONNX_OPSET_VERSION=15 python \ @@ -147,7 +156,7 @@ jobs: condition: and(succeededOrFailed(), eq(variables.onnxruntimeBuildSucceeded, 'true')) # ensure all tests are run when the build successed # - script: |- -# cd ./build/RelWithDebInfo +# cd ./build/Release # export PYTHONPATH=$PWD # python \ # /stage/huggingface-transformers/examples/pytorch/translation/run_translation.py \ @@ -176,7 +185,7 @@ jobs: # todo: investigate RoBERTa high run variability on ROCm 5.2 #- script: |- - # cd ./build/RelWithDebInfo + # cd ./build/Release # export PYTHONPATH=$PWD # python \ # /stage/huggingface-transformers/examples/pytorch/question-answering/run_qa.py \ @@ -204,7 +213,7 @@ jobs: - task: CmdLine@2 inputs: script: |- - cd ./build/RelWithDebInfo + cd ./build/Release export PYTHONPATH=$PWD export ORTMODULE_ONNX_OPSET_VERSION=15 python \ @@ -230,7 +239,7 @@ jobs: condition: and(succeededOrFailed(), eq(variables.onnxruntimeBuildSucceeded, 'true')) # ensure all tests are run when the build successed #- script: |- - # cd ./build/RelWithDebInfo + # cd ./build/Release # export PYTHONPATH=$PWD # python \ # /stage/huggingface-transformers/examples/pytorch/text-classification/run_glue.py \ @@ -252,7 +261,7 @@ jobs: # condition: succeededOrFailed() # ensure all tests are run #- script: |- - # cd ./build/RelWithDebInfo + # cd ./build/Release # export PYTHONPATH=$PWD # python \ # /stage/huggingface-transformers/examples/pytorch/translation/run_translation.py \ @@ -297,7 +306,7 @@ jobs: inputs: script: |- python orttraining/tools/ci_test/run_batch_size_test.py \ - --binary_dir build/RelWithDebInfo \ + --binary_dir build/Release \ --model_root training_e2e_test_data/models \ --gpu_sku MI100_32G displayName: 'Run C++ BERT-L batch size test' @@ -308,7 +317,7 @@ jobs: inputs: script: |- python orttraining/tools/ci_test/run_bert_perf_test.py \ - --binary_dir build/RelWithDebInfo \ + --binary_dir build/Release \ --model_root training_e2e_test_data/models \ --training_data_root training_e2e_test_data/data \ --gpu_sku MI100_32G @@ -320,7 +329,7 @@ jobs: inputs: script: |- python orttraining/tools/ci_test/run_convergence_test.py \ - --binary_dir build/RelWithDebInfo \ + --binary_dir build/Release \ --model_root training_e2e_test_data/models \ --training_data_root training_e2e_test_data/data \ --gpu_sku MI100_32G @@ -359,7 +368,7 @@ jobs: # - task: CmdLine@2 # inputs: # script: |- - # cd ./build/RelWithDebInfo + # cd ./build/Release # unset PYTHONPATH # rm -rf onnxruntime # pip install ./dist/onnxruntime*.whl