mirror of
https://github.com/saymrwulf/pytorch.git
synced 2026-05-14 20:57:59 +00:00
Test [ci-verbose-test-logs] (this worked, the test logs printing while running and interleaved and are really long) Settings for no timeout (step timeout still applies, only gets rid of ~30 min timeout for shard of test file) and no piping logs/extra verbose test logs (good for debugging deadlocks but results in very long and possibly interleaved logs). Also allows these to be set via pr body if the label name is in brackets ex [label name] or the test above. Pull Request resolved: https://github.com/pytorch/pytorch/pull/117668 Approved by: https://github.com/huydhn
227 lines
8.8 KiB
YAML
227 lines
8.8 KiB
YAML
name: win-test
|
|
|
|
on:
|
|
workflow_call:
|
|
inputs:
|
|
build-environment:
|
|
required: true
|
|
type: string
|
|
description: Top-level label for what's being built/tested.
|
|
cuda-version:
|
|
required: true
|
|
type: string
|
|
description: What CUDA version to build with, "cpu" for none.
|
|
test-matrix:
|
|
required: true
|
|
type: string
|
|
description: JSON description of what test configs to run.
|
|
sync-tag:
|
|
required: false
|
|
type: string
|
|
default: ""
|
|
description: |
|
|
If this is set, our linter will use this to make sure that every other
|
|
job with the same `sync-tag` is identical.
|
|
timeout-minutes:
|
|
required: false
|
|
type: number
|
|
default: 300
|
|
description: |
|
|
Set the maximum (in minutes) how long the workflow should take to finish
|
|
|
|
env:
|
|
GIT_DEFAULT_BRANCH: ${{ github.event.repository.default_branch }}
|
|
|
|
jobs:
|
|
test:
|
|
# Don't run on forked repos or empty test matrix
|
|
if: github.repository_owner == 'pytorch' && toJSON(fromJSON(inputs.test-matrix).include) != '[]'
|
|
strategy:
|
|
matrix: ${{ fromJSON(inputs.test-matrix) }}
|
|
fail-fast: false
|
|
runs-on: ${{ matrix.runner }}
|
|
timeout-minutes: ${{ matrix.mem_leak_check == 'mem_leak_check' && 600 || inputs.timeout-minutes }}
|
|
steps:
|
|
# Duplicated in win-build because this MUST go before a checkout
|
|
- name: Enable git symlinks on Windows and disable fsmonitor daemon
|
|
shell: bash
|
|
run: |
|
|
git config --global core.symlinks true
|
|
|
|
# https://git-scm.com/docs/git-fsmonitor--daemon. The daemon could lock
|
|
# the directory on Windows and prevent GHA from checking out as reported
|
|
# in https://github.com/actions/checkout/issues/1018
|
|
git config --global core.fsmonitor false
|
|
|
|
- name: Clean up leftover processes on non-ephemeral Windows runner
|
|
uses: pytorch/test-infra/.github/actions/cleanup-runner@main
|
|
|
|
- name: Setup SSH (Click me for login details)
|
|
uses: pytorch/test-infra/.github/actions/setup-ssh@main
|
|
with:
|
|
github-secret: ${{ secrets.GITHUB_TOKEN }}
|
|
instructions: |
|
|
To forward remote desktop on your local machine ssh as follows:
|
|
ssh -L 3389:localhost:3389 %%username%%@%%hostname%%
|
|
And then change password using `passwd` command.
|
|
|
|
To start tests locally, change working folder to \actions-runner\_work\pytorch\pytorch\test,
|
|
Activate miniconda and Visual Studio environment and set PYTHON_PATH, by running:
|
|
call C:\Jenkins\Miniconda3\Scripts\activate.bat C:\Jenkins\Miniconda3
|
|
call "C:\Program Files (x86)\Microsoft Visual Studio\2019\BuildTools\VC\Auxiliary\Build\vcvarsall.bat" x64
|
|
set PYTHONPATH=C:\actions-runner\_work\pytorch\pytorch\build\win_tmp\build
|
|
|
|
# [see note: pytorch repo ref]
|
|
- name: Checkout PyTorch
|
|
uses: pytorch/pytorch/.github/actions/checkout-pytorch@main
|
|
with:
|
|
no-sudo: true
|
|
|
|
- name: Setup Windows
|
|
uses: ./.github/actions/setup-win
|
|
with:
|
|
cuda-version: ${{ inputs.cuda-version }}
|
|
|
|
# TODO: Move to a requirements.txt file for windows
|
|
- name: Install pip dependencies
|
|
uses: nick-fields/retry@3e91a01664abd3c5cd539100d10d33b9c5b68482
|
|
with:
|
|
shell: bash
|
|
timeout_minutes: 5
|
|
max_attempts: 5
|
|
retry_wait_seconds: 30
|
|
command: |
|
|
set -eu
|
|
python3 -m pip install rockset==1.0.3
|
|
|
|
- name: Start monitoring script
|
|
id: monitor-script
|
|
shell: bash
|
|
continue-on-error: true
|
|
run: |
|
|
# Windows conda doesn't have python3 binary, only python, but it's python3
|
|
${CONDA_RUN} python -m tools.stats.monitor > usage_log.txt 2>&1 &
|
|
echo "monitor-script-pid=${!}" >> "${GITHUB_OUTPUT}"
|
|
|
|
- name: Download PyTorch Build Artifacts
|
|
uses: seemethere/download-artifact-s3@v4
|
|
with:
|
|
name: ${{ inputs.build-environment }}
|
|
path: C:\${{ github.run_id }}\build-results
|
|
|
|
- name: Check build-results folder
|
|
shell: powershell
|
|
run: |
|
|
tree /F C:\$Env:GITHUB_RUN_ID\build-results
|
|
|
|
- name: Get workflow job id
|
|
id: get-job-id
|
|
uses: ./.github/actions/get-workflow-job-id
|
|
if: always()
|
|
with:
|
|
github-token: ${{ secrets.GITHUB_TOKEN }}
|
|
|
|
- name: Check for keep-going label and re-enabled test issues
|
|
# This uses the filter-test-configs action because it conviniently
|
|
# checks for labels and re-enabled test issues. It does not actually do
|
|
# any filtering. All filtering is done in the build step.
|
|
id: keep-going
|
|
uses: ./.github/actions/filter-test-configs
|
|
with:
|
|
github-token: ${{ secrets.GITHUB_TOKEN }}
|
|
test-matrix: ${{ inputs.test-matrix }}
|
|
job-name: ${{ steps.get-job-id.outputs.job-name }}
|
|
|
|
- name: Test
|
|
id: test
|
|
shell: bash
|
|
env:
|
|
USE_CUDA: ${{ inputs.cuda-version != 'cpu' && '1' || '0' }}
|
|
INSTALL_WINDOWS_SDK: 1
|
|
PYTHON_VERSION: 3.8
|
|
CONTINUE_THROUGH_ERROR: ${{ steps.keep-going.outputs.keep-going }}
|
|
VERBOSE_TEST_LOGS: ${{ steps.keep-going.outputs.ci-verbose-test-logs }}
|
|
NO_TEST_TIMEOUT: ${{ steps.keep-going.outputs.ci-no-test-timeout }}
|
|
VC_PRODUCT: "BuildTools"
|
|
VC_VERSION: ""
|
|
VS_VERSION: "16.8.6"
|
|
VC_YEAR: "2019"
|
|
AWS_DEFAULT_REGION: us-east-1
|
|
PR_NUMBER: ${{ github.event.pull_request.number }}
|
|
GITHUB_REPOSITORY: ${{ github.repository }}
|
|
GITHUB_WORKFLOW: ${{ github.workflow }}
|
|
GITHUB_JOB: ${{ github.job }}
|
|
GITHUB_RUN_ID: ${{ github.run_id }}
|
|
GITHUB_RUN_NUMBER: ${{ github.run_number }}
|
|
GITHUB_RUN_ATTEMPT: ${{ github.run_attempt }}
|
|
JOB_ID: ${{ steps.get-job-id.outputs.job-id }}
|
|
JOB_NAME: ${{ steps.get-job-id.outputs.job-name }}
|
|
SHA1: ${{ github.event.pull_request.head.sha || github.sha }}
|
|
CUDA_VERSION: ${{ inputs.cuda-version }}
|
|
PYTORCH_FINAL_PACKAGE_DIR: /c/${{ github.run_id }}/build-results/
|
|
BUILD_ENVIRONMENT: ${{ inputs.build-environment }}
|
|
ALPINE_IMAGE: "308535385114.dkr.ecr.us-east-1.amazonaws.com/tool/alpine"
|
|
SHARD_NUMBER: ${{ matrix.shard }}
|
|
NUM_TEST_SHARDS: ${{ matrix.num_shards }}
|
|
TEST_CONFIG: ${{ matrix.config }}
|
|
REENABLED_ISSUES: ${{ github.event.pull_request.reenabled-issues }}
|
|
TORCH_CUDA_ARCH_LIST: "8.6"
|
|
PYTORCH_TEST_CUDA_MEM_LEAK_CHECK: ${{ matrix.mem_leak_check && '1' || '0' }}
|
|
PYTORCH_TEST_RERUN_DISABLED_TESTS: ${{ matrix.rerun_disabled_tests && '1' || '0' }}
|
|
run: |
|
|
pushd "${PYTORCH_FINAL_PACKAGE_DIR}"
|
|
# shellcheck disable=SC2046,SC2102
|
|
python3 -mpip install $(echo *.whl)[opt-einsum,optree]
|
|
popd
|
|
|
|
.ci/pytorch/win-test.sh
|
|
|
|
- name: Upload pytest cache if tests failed
|
|
uses: ./.github/actions/pytest-cache-upload
|
|
continue-on-error: true
|
|
if: failure() && steps.test.conclusion && steps.test.conclusion == 'failure'
|
|
with:
|
|
cache_dir: .pytest_cache
|
|
shard: ${{ matrix.shard }}
|
|
sha: ${{ github.event.pull_request.head.sha || github.sha }}
|
|
test_config: ${{ matrix.config }}
|
|
job_identifier: ${{ github.workflow }}_${{ inputs.build-environment }}
|
|
|
|
- name: Print remaining test logs
|
|
shell: bash
|
|
if: always() && steps.test.conclusion
|
|
run: |
|
|
cat test/**/*_toprint.log || true
|
|
|
|
- name: Stop monitoring script
|
|
if: always() && steps.monitor-script.outputs.monitor-script-pid
|
|
shell: bash
|
|
continue-on-error: true
|
|
env:
|
|
MONITOR_SCRIPT_PID: ${{ steps.monitor-script.outputs.monitor-script-pid }}
|
|
run: |
|
|
kill "$MONITOR_SCRIPT_PID"
|
|
|
|
- name: Upload test artifacts
|
|
uses: ./.github/actions/upload-test-artifacts
|
|
if: always() && steps.test.conclusion && steps.test.conclusion != 'skipped'
|
|
with:
|
|
file-suffix: ${{ github.job }}-${{ matrix.config }}-${{ matrix.shard }}-${{ matrix.num_shards }}-${{ matrix.runner }}_${{ steps.get-job-id.outputs.job-id }}
|
|
|
|
- name: Parse ref
|
|
id: parse-ref
|
|
run: python3 .github/scripts/parse_ref.py
|
|
|
|
- name: Uninstall PyTorch
|
|
if: always()
|
|
continue-on-error: true
|
|
shell: bash
|
|
run: |
|
|
# This step removes PyTorch installed by the test to give a clean slate
|
|
# to the next job
|
|
python3 -mpip uninstall -y torch
|
|
|
|
- name: Teardown Windows
|
|
uses: ./.github/actions/teardown-win
|
|
if: always()
|
|
timeout-minutes: 120
|