mirror of
https://github.com/saymrwulf/pytorch.git
synced 2026-05-15 21:00:47 +00:00
Attempt to fix the missing python3 command on the new Windows AMI https://github.com/pytorch/pytorch/actions/runs/9551494945/job/26325922503. I added the logic to copy python to python3 to make the command available, it worked with the previous AMI, but start to fail now and the cause is not clear (maybe it's not the AMI, but a new GitHub runner version) Pull Request resolved: https://github.com/pytorch/pytorch/pull/128854 Approved by: https://github.com/kit1980, https://github.com/malfet, https://github.com/atalman
245 lines
9.4 KiB
YAML
245 lines
9.4 KiB
YAML
name: win-test
|
|
|
|
on:
|
|
workflow_call:
|
|
inputs:
|
|
build-environment:
|
|
required: true
|
|
type: string
|
|
description: Top-level label for what's being built/tested.
|
|
cuda-version:
|
|
required: true
|
|
type: string
|
|
description: What CUDA version to build with, "cpu" for none.
|
|
test-matrix:
|
|
required: true
|
|
type: string
|
|
description: JSON description of what test configs to run.
|
|
sync-tag:
|
|
required: false
|
|
type: string
|
|
default: ""
|
|
description: |
|
|
If this is set, our linter will use this to make sure that every other
|
|
job with the same `sync-tag` is identical.
|
|
timeout-minutes:
|
|
required: false
|
|
type: number
|
|
default: 240
|
|
description: |
|
|
Set the maximum (in minutes) how long the workflow should take to finish
|
|
|
|
env:
|
|
GIT_DEFAULT_BRANCH: ${{ github.event.repository.default_branch }}
|
|
|
|
jobs:
|
|
test:
|
|
# Don't run on forked repos or empty test matrix
|
|
if: github.repository_owner == 'pytorch' && toJSON(fromJSON(inputs.test-matrix).include) != '[]'
|
|
strategy:
|
|
matrix: ${{ fromJSON(inputs.test-matrix) }}
|
|
fail-fast: false
|
|
runs-on: ${{ matrix.runner }}
|
|
timeout-minutes: ${{ matrix.mem_leak_check == 'mem_leak_check' && 600 || inputs.timeout-minutes }}
|
|
defaults:
|
|
run:
|
|
shell: bash
|
|
steps:
|
|
# Duplicated in win-build because this MUST go before a checkout
|
|
- name: Enable git symlinks on Windows and disable fsmonitor daemon
|
|
shell: bash
|
|
run: |
|
|
git config --global core.symlinks true
|
|
|
|
# https://git-scm.com/docs/git-fsmonitor--daemon. The daemon could lock
|
|
# the directory on Windows and prevent GHA from checking out as reported
|
|
# in https://github.com/actions/checkout/issues/1018
|
|
git config --global core.fsmonitor false
|
|
|
|
- name: Clean up leftover processes on non-ephemeral Windows runner
|
|
uses: pytorch/test-infra/.github/actions/cleanup-runner@main
|
|
|
|
- name: Setup SSH (Click me for login details)
|
|
uses: pytorch/test-infra/.github/actions/setup-ssh@main
|
|
with:
|
|
github-secret: ${{ secrets.GITHUB_TOKEN }}
|
|
instructions: |
|
|
To forward remote desktop on your local machine ssh as follows:
|
|
ssh -L 3389:localhost:3389 %%username%%@%%hostname%%
|
|
And then change password using `passwd` command.
|
|
|
|
To start tests locally, change working folder to \actions-runner\_work\pytorch\pytorch\test,
|
|
Activate miniconda and Visual Studio environment and set PYTHON_PATH, by running:
|
|
call C:\Jenkins\Miniconda3\Scripts\activate.bat C:\Jenkins\Miniconda3
|
|
call "C:\Program Files (x86)\Microsoft Visual Studio\2019\BuildTools\VC\Auxiliary\Build\vcvarsall.bat" x64
|
|
set PYTHONPATH=C:\actions-runner\_work\pytorch\pytorch\build\win_tmp\build
|
|
|
|
# [see note: pytorch repo ref]
|
|
- name: Checkout PyTorch
|
|
uses: pytorch/pytorch/.github/actions/checkout-pytorch@main
|
|
with:
|
|
no-sudo: true
|
|
|
|
- name: Setup Windows
|
|
uses: ./.github/actions/setup-win
|
|
with:
|
|
cuda-version: ${{ inputs.cuda-version }}
|
|
|
|
# TODO: Move to a requirements.txt file for windows
|
|
- name: Install pip dependencies
|
|
uses: nick-fields/retry@3e91a01664abd3c5cd539100d10d33b9c5b68482
|
|
with:
|
|
shell: bash
|
|
timeout_minutes: 5
|
|
max_attempts: 5
|
|
retry_wait_seconds: 30
|
|
command: |
|
|
set -eu
|
|
python3 -m pip install rockset==1.0.3 'xdoctest>=1.1.0'
|
|
|
|
- name: Start monitoring script
|
|
id: monitor-script
|
|
shell: bash
|
|
continue-on-error: true
|
|
run: |
|
|
# Windows conda doesn't have python3 binary, only python, but it's python3
|
|
${CONDA_RUN} python -m tools.stats.monitor > usage_log.txt 2>&1 &
|
|
echo "monitor-script-pid=${!}" >> "${GITHUB_OUTPUT}"
|
|
|
|
- name: Download PyTorch Build Artifacts
|
|
uses: seemethere/download-artifact-s3@v4
|
|
with:
|
|
name: ${{ inputs.build-environment }}
|
|
path: C:\${{ github.run_id }}\build-results
|
|
|
|
- name: Check build-results folder
|
|
shell: powershell
|
|
run: |
|
|
tree /F C:\$Env:GITHUB_RUN_ID\build-results
|
|
|
|
- name: Download TD artifacts
|
|
continue-on-error: true
|
|
uses: ./.github/actions/download-td-artifacts
|
|
|
|
- name: Get workflow job id
|
|
id: get-job-id
|
|
uses: ./.github/actions/get-workflow-job-id
|
|
if: always()
|
|
with:
|
|
github-token: ${{ secrets.GITHUB_TOKEN }}
|
|
|
|
- name: Check for keep-going label and re-enabled test issues
|
|
# This uses the filter-test-configs action because it conviniently
|
|
# checks for labels and re-enabled test issues. It does not actually do
|
|
# any filtering. All filtering is done in the build step.
|
|
id: keep-going
|
|
uses: ./.github/actions/filter-test-configs
|
|
with:
|
|
github-token: ${{ secrets.GITHUB_TOKEN }}
|
|
test-matrix: ${{ inputs.test-matrix }}
|
|
job-name: ${{ steps.get-job-id.outputs.job-name }}
|
|
|
|
- name: Set Test step time
|
|
id: test-timeout
|
|
shell: bash
|
|
env:
|
|
JOB_TIMEOUT: ${{ matrix.mem_leak_check == 'mem_leak_check' && 600 || inputs.timeout-minutes }}
|
|
run: |
|
|
echo "timeout=$((JOB_TIMEOUT-30))" >> "${GITHUB_OUTPUT}"
|
|
|
|
- name: Test
|
|
id: test
|
|
shell: bash
|
|
timeout-minutes: ${{ fromJson(steps.test-timeout.outputs.timeout) }}
|
|
env:
|
|
USE_CUDA: ${{ inputs.cuda-version != 'cpu' && '1' || '0' }}
|
|
INSTALL_WINDOWS_SDK: 1
|
|
PYTHON_VERSION: 3.8
|
|
CONTINUE_THROUGH_ERROR: ${{ steps.keep-going.outputs.keep-going }}
|
|
VERBOSE_TEST_LOGS: ${{ steps.keep-going.outputs.ci-verbose-test-logs }}
|
|
NO_TEST_TIMEOUT: ${{ steps.keep-going.outputs.ci-no-test-timeout }}
|
|
NO_TD: ${{ steps.keep-going.outputs.ci-no-td }}
|
|
VC_PRODUCT: "BuildTools"
|
|
VC_VERSION: ""
|
|
VS_VERSION: "16.8.6"
|
|
VC_YEAR: "2019"
|
|
AWS_DEFAULT_REGION: us-east-1
|
|
PR_NUMBER: ${{ github.event.pull_request.number }}
|
|
GITHUB_REPOSITORY: ${{ github.repository }}
|
|
GITHUB_WORKFLOW: ${{ github.workflow }}
|
|
GITHUB_JOB: ${{ github.job }}
|
|
GITHUB_RUN_ID: ${{ github.run_id }}
|
|
GITHUB_RUN_NUMBER: ${{ github.run_number }}
|
|
GITHUB_RUN_ATTEMPT: ${{ github.run_attempt }}
|
|
JOB_ID: ${{ steps.get-job-id.outputs.job-id }}
|
|
JOB_NAME: ${{ steps.get-job-id.outputs.job-name }}
|
|
SHA1: ${{ github.event.pull_request.head.sha || github.sha }}
|
|
CUDA_VERSION: ${{ inputs.cuda-version }}
|
|
PYTORCH_FINAL_PACKAGE_DIR: /c/${{ github.run_id }}/build-results/
|
|
BUILD_ENVIRONMENT: ${{ inputs.build-environment }}
|
|
ALPINE_IMAGE: "308535385114.dkr.ecr.us-east-1.amazonaws.com/tool/alpine"
|
|
SHARD_NUMBER: ${{ matrix.shard }}
|
|
NUM_TEST_SHARDS: ${{ matrix.num_shards }}
|
|
TEST_CONFIG: ${{ matrix.config }}
|
|
REENABLED_ISSUES: ${{ github.event.pull_request.reenabled-issues }}
|
|
TORCH_CUDA_ARCH_LIST: "8.6"
|
|
PYTORCH_TEST_CUDA_MEM_LEAK_CHECK: ${{ matrix.mem_leak_check && '1' || '0' }}
|
|
PYTORCH_TEST_RERUN_DISABLED_TESTS: ${{ matrix.rerun_disabled_tests && '1' || '0' }}
|
|
run: |
|
|
pushd "${PYTORCH_FINAL_PACKAGE_DIR}"
|
|
# shellcheck disable=SC2046,SC2102
|
|
python3 -mpip install $(echo *.whl)[opt-einsum,optree]
|
|
popd
|
|
|
|
.ci/pytorch/win-test.sh
|
|
|
|
- name: Upload pytest cache if tests failed
|
|
uses: ./.github/actions/pytest-cache-upload
|
|
continue-on-error: true
|
|
if: failure() && steps.test.conclusion && steps.test.conclusion == 'failure'
|
|
with:
|
|
cache_dir: .pytest_cache
|
|
shard: ${{ matrix.shard }}
|
|
sha: ${{ github.event.pull_request.head.sha || github.sha }}
|
|
test_config: ${{ matrix.config }}
|
|
job_identifier: ${{ github.workflow }}_${{ inputs.build-environment }}
|
|
|
|
- name: Print remaining test logs
|
|
shell: bash
|
|
if: always() && steps.test.conclusion
|
|
run: |
|
|
cat test/**/*_toprint.log || true
|
|
|
|
- name: Stop monitoring script
|
|
if: always() && steps.monitor-script.outputs.monitor-script-pid
|
|
shell: bash
|
|
continue-on-error: true
|
|
env:
|
|
MONITOR_SCRIPT_PID: ${{ steps.monitor-script.outputs.monitor-script-pid }}
|
|
run: |
|
|
kill "$MONITOR_SCRIPT_PID"
|
|
|
|
- name: Upload test artifacts
|
|
uses: ./.github/actions/upload-test-artifacts
|
|
if: always() && steps.test.conclusion && steps.test.conclusion != 'skipped'
|
|
with:
|
|
file-suffix: ${{ github.job }}-${{ matrix.config }}-${{ matrix.shard }}-${{ matrix.num_shards }}-${{ matrix.runner }}_${{ steps.get-job-id.outputs.job-id }}
|
|
|
|
- name: Parse ref
|
|
id: parse-ref
|
|
shell: bash
|
|
run: python3 .github/scripts/parse_ref.py
|
|
|
|
- name: Uninstall PyTorch
|
|
if: always()
|
|
continue-on-error: true
|
|
shell: bash
|
|
run: |
|
|
# This step removes PyTorch installed by the test to give a clean slate
|
|
# to the next job
|
|
python3 -mpip uninstall -y torch
|
|
|
|
- name: Teardown Windows
|
|
uses: ./.github/actions/teardown-win
|
|
if: always()
|
|
timeout-minutes: 120
|