mirror of
https://github.com/saymrwulf/pytorch.git
synced 2026-05-14 20:57:59 +00:00
Rerun all disabled test to gather their latest result so that we can close disabled tickets automatically. When running under this mode (RERUN_DISABLED_TESTS=true), only disabled tests are run while the rest are skipped `<skipped message="Test is enabled but --rerun-disabled-tests verification mode is set, so only disabled tests are run" type="skip"/>`
The logic is roughly as follows, the test runs multiple times (n=50)
* If the disabled test passes, and it's flaky, do nothing because it's still flaky. In the test report, we'll see the test passes with the following skipped message:
```
<testcase classname="TestMultiprocessing" file="test_multiprocessing.py" line="357" name="test_fs" time="0.000" timestamp="0001-01-01T00:00:00">
<skipped message="{"flaky": True, "num_red": 4, "num_green": 0, "max_num_retries": 3, "rerun_disabled_test": true}" type="skip"/>
</testcase>
```
* If the disabled test passes every single time, and it is not flaky anymore, mark it so that it can be closed later. We will see the test runs and passes, i.e.
```
<testcase classname="TestCommonCUDA" name="test_out_warning_linalg_lu_factor_cuda" time="0.170" file="test_ops.py" />
```
* If the disabled test fails after all retries, this is also expected. So only report this but don't fail the job (because we don't care about red signals here), we'll see the test is skipped (without the `flaky` field), i.e.
```
<testcase classname="TestMultiprocessing" file="test_multiprocessing.py" line="357" name="test_fs" time="0.000" timestamp="0001-01-01T00:00:00">
<skipped message="{"num_red": 4, "num_green": 0, "max_num_retries": 3, "rerun_disabled_test": true}" type="skip"/>
</testcase>
```
This runs at the same schedule as `mem_leak_check` (daily). The change to update test stats, and (potentially) grouping on HUD will come in separated PRs.
### Testing
* pull https://github.com/pytorch/pytorch/actions/runs/3447434434
* trunk https://github.com/pytorch/pytorch/actions/runs/3447434928
Pull Request resolved: https://github.com/pytorch/pytorch/pull/88646
Approved by: https://github.com/clee2000
199 lines
7.2 KiB
YAML
199 lines
7.2 KiB
YAML
name: win-test
|
|
|
|
on:
|
|
workflow_call:
|
|
inputs:
|
|
build-environment:
|
|
required: true
|
|
type: string
|
|
description: Top-level label for what's being built/tested.
|
|
cuda-version:
|
|
required: true
|
|
type: string
|
|
description: What CUDA version to build with, "cpu" for none.
|
|
test-matrix:
|
|
required: true
|
|
type: string
|
|
description: JSON description of what test configs to run.
|
|
sync-tag:
|
|
required: false
|
|
type: string
|
|
default: ""
|
|
description: |
|
|
If this is set, our linter will use this to make sure that every other
|
|
job with the same `sync-tag` is identical.
|
|
|
|
env:
|
|
GIT_DEFAULT_BRANCH: ${{ github.event.repository.default_branch }}
|
|
|
|
jobs:
|
|
# This needs to be run right before the test starts so that it can gather the
|
|
# latest labels from the PR
|
|
filter:
|
|
runs-on: [self-hosted, linux.large]
|
|
outputs:
|
|
test-matrix: ${{ steps.filter.outputs.test-matrix }}
|
|
is-test-matrix-empty: ${{ steps.filter.outputs.is-test-matrix-empty }}
|
|
steps:
|
|
- name: Checkout PyTorch
|
|
uses: pytorch/pytorch/.github/actions/checkout-pytorch@master
|
|
with:
|
|
fetch-depth: 1
|
|
submodules: false
|
|
|
|
- name: Select all requested test configurations
|
|
id: filter
|
|
uses: ./.github/actions/filter-test-configs
|
|
with:
|
|
github-token: ${{ secrets.GITHUB_TOKEN }}
|
|
test-matrix: ${{ inputs.test-matrix }}
|
|
|
|
test:
|
|
needs: filter
|
|
# Don't run on forked repos or empty test matrix
|
|
if: github.repository_owner == 'pytorch' && needs.filter.outputs.is-test-matrix-empty == 'False'
|
|
strategy:
|
|
matrix: ${{ fromJSON(needs.filter.outputs.test-matrix) }}
|
|
fail-fast: false
|
|
runs-on: ${{ matrix.runner }}
|
|
timeout-minutes: 300
|
|
steps:
|
|
- name: Enable git symlinks on Windows
|
|
shell: bash
|
|
run: |
|
|
git config --global core.symlinks true
|
|
|
|
# [see note: pytorch repo ref]
|
|
- name: Checkout PyTorch
|
|
uses: pytorch/pytorch/.github/actions/checkout-pytorch@master
|
|
with:
|
|
no-sudo: true
|
|
|
|
- name: Setup Windows
|
|
uses: ./.github/actions/setup-win
|
|
with:
|
|
cuda-version: ${{ inputs.cuda-version }}
|
|
|
|
- name: Setup SSH (Click me for login details)
|
|
uses: pytorch/test-infra/.github/actions/setup-ssh@main
|
|
with:
|
|
github-secret: ${{ secrets.GITHUB_TOKEN }}
|
|
|
|
- name: Start monitoring script
|
|
id: monitor-script
|
|
shell: bash
|
|
run: |
|
|
python3 -m pip install psutil==5.9.1
|
|
python3 -m pip install pynvml==11.4.1
|
|
python3 -m tools.stats.monitor > usage_log.txt 2>&1 &
|
|
echo "monitor-script-pid=${!}" >> "${GITHUB_OUTPUT}"
|
|
|
|
- name: Download PyTorch Build Artifacts
|
|
uses: seemethere/download-artifact-s3@v4
|
|
with:
|
|
name: ${{ inputs.build-environment }}
|
|
path: C:\${{ github.run_id }}\build-results
|
|
|
|
- name: Check build-results folder
|
|
shell: powershell
|
|
run: |
|
|
tree /F C:\$Env:GITHUB_RUN_ID\build-results
|
|
|
|
- name: Test
|
|
id: test
|
|
shell: bash
|
|
env:
|
|
USE_CUDA: ${{ inputs.cuda-version != 'cpu' && '1' || '0' }}
|
|
INSTALL_WINDOWS_SDK: 1
|
|
PYTHON_VERSION: 3.8
|
|
PYTORCH_RETRY_TEST_CASES: 1
|
|
PYTORCH_OVERRIDE_FLAKY_SIGNAL: 1
|
|
VC_PRODUCT: "BuildTools"
|
|
VC_VERSION: ""
|
|
VS_VERSION: "16.8.6"
|
|
VC_YEAR: "2019"
|
|
AWS_DEFAULT_REGION: us-east-1
|
|
PR_NUMBER: ${{ github.event.pull_request.number }}
|
|
SHA1: ${{ github.event.pull_request.head.sha || github.sha }}
|
|
CUDA_VERSION: ${{ inputs.cuda-version }}
|
|
PYTORCH_FINAL_PACKAGE_DIR: /c/${{ github.run_id }}/build-results/
|
|
BUILD_ENVIRONMENT: ${{ inputs.build-environment }}
|
|
ALPINE_IMAGE: "308535385114.dkr.ecr.us-east-1.amazonaws.com/tool/alpine"
|
|
SHARD_NUMBER: ${{ matrix.shard }}
|
|
NUM_TEST_SHARDS: ${{ matrix.num_shards }}
|
|
TEST_CONFIG: ${{ matrix.config }}
|
|
PR_BODY: ${{ github.event.pull_request.body }}
|
|
TORCH_CUDA_ARCH_LIST: "7.0"
|
|
PYTORCH_TEST_CUDA_MEM_LEAK_CHECK: ${{ matrix.mem_leak_check && '1' || '0' }}
|
|
PYTORCH_TEST_RERUN_DISABLED_TESTS: ${{ matrix.rerun_disabled_tests && '1' || '0' }}
|
|
run: |
|
|
COMMIT_MESSAGES=$(git cherry -v "origin/${GIT_DEFAULT_BRANCH:-master}")
|
|
|
|
# sanitize the input commit message and PR body here:
|
|
#
|
|
# trim all new lines from commit messages + PR_BODY to avoid issues with batch environment
|
|
# variable copying. see https://github.com/pytorch/pytorch/pull/80043#issuecomment-1167796028
|
|
COMMIT_MESSAGES="${COMMIT_MESSAGES//[$'\n\r']}"
|
|
PR_BODY="${PR_BODY//[$'\n\r']}"
|
|
|
|
# then trim all special characters like single and double quotes to avoid unescaped inputs to
|
|
# wreak havoc internally
|
|
export COMMIT_MESSAGES="${COMMIT_MESSAGES//[\'\"]}"
|
|
export PR_BODY="${PR_BODY//[\'\"]}"
|
|
|
|
.jenkins/pytorch/win-test.sh
|
|
|
|
- name: Get workflow job id
|
|
id: get-job-id
|
|
uses: ./.github/actions/get-workflow-job-id
|
|
if: always()
|
|
with:
|
|
github-token: ${{ secrets.GITHUB_TOKEN }}
|
|
|
|
- name: Stop monitoring script
|
|
if: always() && steps.monitor-script.outputs.monitor-script-pid
|
|
shell: bash
|
|
continue-on-error: true
|
|
env:
|
|
MONITOR_SCRIPT_PID: ${{ steps.monitor-script.outputs.monitor-script-pid }}
|
|
run: |
|
|
kill "$MONITOR_SCRIPT_PID"
|
|
|
|
- name: Upload test artifacts
|
|
uses: ./.github/actions/upload-test-artifacts
|
|
if: always() && (steps.test.conclusion == 'success' || steps.test.conclusion == 'failure')
|
|
with:
|
|
file-suffix: ${{ github.job }}-${{ matrix.config }}-${{ matrix.shard }}-${{ matrix.num_shards }}-${{ matrix.runner }}_${{ steps.get-job-id.outputs.job-id }}
|
|
|
|
- name: Parse ref
|
|
id: parse-ref
|
|
run: .github/scripts/parse_ref.py
|
|
|
|
- name: Upload test statistics
|
|
if: always()
|
|
env:
|
|
AWS_DEFAULT_REGION: us-east-1
|
|
GIT_DEFAULT_BRANCH: ${{ github.event.repository.default_branch }}
|
|
BRANCH: ${{ steps.parse-ref.outputs.branch }}
|
|
TEST_CONFIG: ${{ matrix.config }}
|
|
SHARD_NUMBER: ${{ matrix.shard }}
|
|
BUILD_ENVIRONMENT: ${{ inputs.build-environment }}
|
|
PR_NUMBER: ${{ github.event.pull_request.number }}
|
|
PYTORCH_RETRY_TEST_CASES: 1
|
|
PYTORCH_OVERRIDE_FLAKY_SIGNAL: 1
|
|
SHA1: ${{ github.event.pull_request.head.sha || github.sha }}
|
|
TAG: ${{ steps.parse-ref.outputs.tag }}
|
|
WORKFLOW_ID: ${{ github.run_id }}
|
|
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
|
|
GHA_WORKFLOW_JOB_ID: ${{ steps.get-job-id.outputs.job-id }}
|
|
shell: bash
|
|
run: |
|
|
set -x
|
|
python3 -m pip install -r requirements.txt
|
|
python3 -m pip install boto3==1.19.12
|
|
python3 -m tools.stats.print_test_stats --upload-to-s3 --compare-with-s3 test
|
|
|
|
- name: Teardown Windows
|
|
uses: ./.github/actions/teardown-win
|
|
if: always()
|
|
timeout-minutes: 120
|