From b01e89587ec0f8bb07a2401fdb54fd3f9130c24a Mon Sep 17 00:00:00 2001 From: Jithun Nair <37884920+jithunnair-amd@users.noreply.github.com> Date: Mon, 13 Nov 2023 21:25:49 +0000 Subject: [PATCH] [ROCM][CI] Introduce tests-to-include as rocm-test workflow input (#110511) Fixes https://github.com/pytorch/pytorch/issues/110181 Pull Request resolved: https://github.com/pytorch/pytorch/pull/110511 Approved by: https://github.com/huydhn --- .ci/pytorch/test.sh | 19 ++++++++++++++++--- .github/workflows/_rocm-test.yml | 8 ++++++++ .github/workflows/trunk.yml | 22 ++++++++++++++++++++++ 3 files changed, 46 insertions(+), 3 deletions(-) diff --git a/.ci/pytorch/test.sh b/.ci/pytorch/test.sh index c398060232c..bd583448297 100755 --- a/.ci/pytorch/test.sh +++ b/.ci/pytorch/test.sh @@ -80,6 +80,11 @@ if [[ "$BUILD_ENVIRONMENT" != *bazel* ]]; then CUSTOM_TEST_ARTIFACT_BUILD_DIR=$(realpath "${CUSTOM_TEST_ARTIFACT_BUILD_DIR:-"build/custom_test_artifacts"}") fi +# Reduce set of tests to include when running run_test.py +if [[ -n $TESTS_TO_INCLUDE ]]; then + echo "Setting INCLUDE_CLAUSE" + INCLUDE_CLAUSE="--include $TESTS_TO_INCLUDE" +fi # shellcheck source=./common.sh source "$(dirname "${BASH_SOURCE[0]}")/common.sh" @@ -228,13 +233,16 @@ test_python_shard() { exit 1 fi - time python test/run_test.py --exclude-jit-executor --exclude-distributed-tests --shard "$1" "$NUM_TEST_SHARDS" --verbose + # Bare --include flag is not supported and quoting for lint ends up with flag not being interpreted correctly + # shellcheck disable=SC2086 + time python test/run_test.py --exclude-jit-executor --exclude-distributed-tests $INCLUDE_CLAUSE --shard "$1" "$NUM_TEST_SHARDS" --verbose assert_git_not_dirty } test_python() { - time python test/run_test.py --exclude-jit-executor --exclude-distributed-tests --verbose + # shellcheck disable=SC2086 + time python test/run_test.py --exclude-jit-executor --exclude-distributed-tests $INCLUDE_CLAUSE --verbose assert_git_not_dirty } @@ -681,7 +689,8 @@ test_vulkan() { test_distributed() { echo "Testing distributed python tests" - time python test/run_test.py --distributed-tests --shard "$SHARD_NUMBER" "$NUM_TEST_SHARDS" --verbose + # shellcheck disable=SC2086 + time python test/run_test.py --distributed-tests --shard "$SHARD_NUMBER" "$NUM_TEST_SHARDS" $INCLUDE_CLAUSE --verbose assert_git_not_dirty if [[ ("$BUILD_ENVIRONMENT" == *cuda* || "$BUILD_ENVIRONMENT" == *rocm*) && "$SHARD_NUMBER" == 1 ]]; then @@ -1092,6 +1101,10 @@ elif [[ "${BUILD_ENVIRONMENT}" == *-mobile-lightweight-dispatch* ]]; then test_libtorch elif [[ "${TEST_CONFIG}" = docs_test ]]; then test_docs_test +elif [[ "${BUILD_ENVIRONMENT}" == *rocm* && -n "$TESTS_TO_INCLUDE" ]]; then + install_torchvision + test_python + test_aten else install_torchvision install_monkeytype diff --git a/.github/workflows/_rocm-test.yml b/.github/workflows/_rocm-test.yml index 5a1c26e955e..01950f817f0 100644 --- a/.github/workflows/_rocm-test.yml +++ b/.github/workflows/_rocm-test.yml @@ -32,6 +32,12 @@ on: default: 300 description: | Set the maximum (in minutes) how long the workflow should take to finish + tests-to-include: + required: false + type: string + default: "" + description: | + List of tests to include (empty string implies default list) env: GIT_DEFAULT_BRANCH: ${{ github.event.repository.default_branch }} @@ -136,6 +142,7 @@ jobs: XLA_CLANG_CACHE_S3_BUCKET_NAME: ossci-compiler-clang-cache-circleci-xla PYTORCH_TEST_CUDA_MEM_LEAK_CHECK: ${{ matrix.mem_leak_check && '1' || '0' }} PYTORCH_TEST_RERUN_DISABLED_TESTS: ${{ matrix.rerun_disabled_tests && '1' || '0' }} + TESTS_TO_INCLUDE: ${{ inputs.tests-to-include }} timeout-minutes: ${{ fromJson(steps.test-timeout.outputs.timeout) }} run: | set -x @@ -180,6 +187,7 @@ jobs: -e XLA_CLANG_CACHE_S3_BUCKET_NAME \ -e PYTORCH_TEST_CUDA_MEM_LEAK_CHECK \ -e PYTORCH_TEST_RERUN_DISABLED_TESTS \ + -e TESTS_TO_INCLUDE \ --env-file="/tmp/github_env_${GITHUB_RUN_ID}" \ --ulimit stack=10485760:83886080 \ --ulimit core=0 \ diff --git a/.github/workflows/trunk.yml b/.github/workflows/trunk.yml index c4831d364cb..13d864084eb 100644 --- a/.github/workflows/trunk.yml +++ b/.github/workflows/trunk.yml @@ -174,3 +174,25 @@ jobs: { config: "default", shard: 6, num_shards: 6, runner: "windows.g5.4xlarge.nvidia.gpu" }, { config: "force_on_cpu", shard: 1, num_shards: 1, runner: "windows.4xlarge.nonephemeral" }, ]} + + linux-focal-rocm5_7-py3_8-build: + name: linux-focal-rocm5.7-py3.8 + uses: ./.github/workflows/_linux-build.yml + with: + build-environment: linux-focal-rocm5.7-py3.8 + docker-image-name: pytorch-linux-focal-rocm-n-py3 + sync-tag: rocm-build + test-matrix: | + { include: [ + { config: "default", shard: 1, num_shards: 1, runner: "linux.rocm.gpu" }, + ]} + + linux-focal-rocm5_7-py3_8-test: + name: linux-focal-rocm5.7-py3.8 + uses: ./.github/workflows/_rocm-test.yml + needs: linux-focal-rocm5_7-py3_8-build + with: + build-environment: linux-focal-rocm5.7-py3.8 + docker-image: ${{ needs.linux-focal-rocm5_7-py3_8-build.outputs.docker-image }} + test-matrix: ${{ needs.linux-focal-rocm5_7-py3_8-build.outputs.test-matrix }} + tests-to-include: "test_nn test_torch test_cuda test_ops test_unary_ufuncs test_binary_ufuncs test_autograd"