From d3a09cf77f6e2ca3f55108ee8aef0d79d1b30ae0 Mon Sep 17 00:00:00 2001 From: PeixuanZuo <94887879+PeixuanZuo@users.noreply.github.com> Date: Fri, 13 Jan 2023 16:57:50 +0800 Subject: [PATCH] [ROCm] use pytest-xdist for fast pytest (#14261) ### Description Use pytest-xdist to distribute tests across multiple CPUs to speed up test execution. Use pytest-rerunfailures to rerun failed test in case of pytest-xdist crash. `pytest -n 16` can reduce pytest time from 80 minutes to 20 minutes. ### Motivation and Context Now kernel explorer pytest of ROCm CI takes nearly 1 hour 20 minutes. It will take longer time when we add more tunableOp in the future. --- .../github/azure-pipelines/orttraining-pai-ci-pipeline.yml | 2 +- tools/ci_build/github/pai/rocm-ci-pipeline-env.Dockerfile | 4 +++- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/tools/ci_build/github/azure-pipelines/orttraining-pai-ci-pipeline.yml b/tools/ci_build/github/azure-pipelines/orttraining-pai-ci-pipeline.yml index a13e380af6..42e5fc5269 100644 --- a/tools/ci_build/github/azure-pipelines/orttraining-pai-ci-pipeline.yml +++ b/tools/ci_build/github/azure-pipelines/orttraining-pai-ci-pipeline.yml @@ -84,7 +84,7 @@ jobs: inputs: script: |- export KERNEL_EXPLORER_BUILD_DIR=./build/Release - pytest ./onnxruntime/python/tools/kernel_explorer/ + pytest ./onnxruntime/python/tools/kernel_explorer/ -n 16 --reruns 1 displayName: 'Run kernel explorer tests' condition: and(succeededOrFailed(), eq(variables.onnxruntimeBuildSucceeded, 'true')) diff --git a/tools/ci_build/github/pai/rocm-ci-pipeline-env.Dockerfile b/tools/ci_build/github/pai/rocm-ci-pipeline-env.Dockerfile index 106573a30b..bdee3cc0cd 100644 --- a/tools/ci_build/github/pai/rocm-ci-pipeline-env.Dockerfile +++ b/tools/ci_build/github/pai/rocm-ci-pipeline-env.Dockerfile @@ -37,7 +37,9 @@ RUN pip install \ sentencepiece \ dill==0.3.4 \ wget \ - pytorch_lightning==1.6.0 + pytorch_lightning==1.6.0 \ + pytest-xdist \ + pytest-rerunfailures RUN pip install torch-ort --no-dependencies ENV ORTMODULE_ONNX_OPSET_VERSION=15