mirror of
https://github.com/saymrwulf/pytorch.git
synced 2026-05-14 20:57:59 +00:00
sccache-0.2.14 can query it thru IMDSv1 and sccache-0.8.2 can do it thru v2 (or may be just use trust relationships between host and bucket Pull Request resolved: https://github.com/pytorch/pytorch/pull/140611 Approved by: https://github.com/wdvr
213 lines
7.6 KiB
YAML
213 lines
7.6 KiB
YAML
name: bazel
|
|
|
|
on:
|
|
workflow_call:
|
|
inputs:
|
|
build-environment:
|
|
required: true
|
|
type: string
|
|
description: Top-level label for what's being built/tested.
|
|
docker-image-name:
|
|
required: true
|
|
type: string
|
|
description: Name of the base docker image to build with.
|
|
cuda-version:
|
|
required: true
|
|
type: string
|
|
description: What CUDA version to build with (i.e. "11.7"), "cpu" for none.
|
|
sync-tag:
|
|
required: false
|
|
type: string
|
|
default: ""
|
|
description: |
|
|
If this is set, our linter will use this to make sure that every other
|
|
job with the same `sync-tag` is identical.
|
|
test-matrix:
|
|
required: true
|
|
type: string
|
|
description: |
|
|
A JSON description of what configs to run later on.
|
|
runner:
|
|
required: false
|
|
type: string
|
|
default: "linux.large"
|
|
description: Runner type
|
|
|
|
env:
|
|
GIT_DEFAULT_BRANCH: ${{ github.event.repository.default_branch }}
|
|
|
|
jobs:
|
|
filter:
|
|
if: github.repository_owner == 'pytorch'
|
|
runs-on: ${{ inputs.runner }}
|
|
outputs:
|
|
test-matrix: ${{ steps.filter.outputs.test-matrix }}
|
|
is-test-matrix-empty: ${{ steps.filter.outputs.is-test-matrix-empty }}
|
|
keep-going: ${{ steps.filter.outputs.keep-going }}
|
|
reenabled-issues: ${{ steps.filter.outputs.reenabled-issues }}
|
|
steps:
|
|
- name: Checkout PyTorch
|
|
uses: pytorch/pytorch/.github/actions/checkout-pytorch@main
|
|
with:
|
|
fetch-depth: 1
|
|
submodules: false
|
|
|
|
- name: Select all requested test configurations
|
|
id: filter
|
|
uses: ./.github/actions/filter-test-configs
|
|
with:
|
|
github-token: ${{ secrets.GITHUB_TOKEN }}
|
|
test-matrix: ${{ inputs.test-matrix }}
|
|
|
|
build-and-test:
|
|
needs: filter
|
|
# Don't run on forked repos.
|
|
if: github.repository_owner == 'pytorch' && needs.filter.outputs.is-test-matrix-empty == 'False'
|
|
strategy:
|
|
matrix: ${{ fromJSON(needs.filter.outputs.test-matrix) }}
|
|
fail-fast: false
|
|
runs-on: ${{ matrix.runner }}
|
|
steps:
|
|
- name: Setup SSH (Click me for login details)
|
|
uses: pytorch/test-infra/.github/actions/setup-ssh@main
|
|
with:
|
|
github-secret: ${{ secrets.GITHUB_TOKEN }}
|
|
|
|
# [see note: pytorch repo ref]
|
|
- name: Checkout PyTorch
|
|
uses: pytorch/pytorch/.github/actions/checkout-pytorch@main
|
|
|
|
- name: Setup Linux
|
|
uses: ./.github/actions/setup-linux
|
|
|
|
- name: Calculate docker image
|
|
id: calculate-docker-image
|
|
uses: pytorch/test-infra/.github/actions/calculate-docker-image@main
|
|
with:
|
|
docker-image-name: ${{ inputs.docker-image-name }}
|
|
|
|
- name: Pull docker image
|
|
uses: pytorch/test-infra/.github/actions/pull-docker-image@main
|
|
with:
|
|
docker-image: ${{ steps.calculate-docker-image.outputs.docker-image }}
|
|
|
|
- name: Check if in a container runner
|
|
shell: bash
|
|
id: check_container_runner
|
|
run: echo "IN_CONTAINER_RUNNER=$(if [ -f /.inarc ] || [ -f /.incontainer ]; then echo true ; else echo false; fi)" >> "$GITHUB_OUTPUT"
|
|
|
|
- name: Install nvidia driver, nvidia-docker runtime, set GPU_FLAG
|
|
uses: pytorch/test-infra/.github/actions/setup-nvidia@main
|
|
if: ${{ inputs.cuda-version != 'cpu' && steps.check_container_runner.outputs.IN_CONTAINER_RUNNER == 'false' }}
|
|
|
|
- name: Output disk space left
|
|
run: |
|
|
sudo df -H
|
|
|
|
- name: Preserve github env variables for use in docker
|
|
run: |
|
|
env | grep '^GITHUB' >> "/tmp/github_env_${GITHUB_RUN_ID}"
|
|
env | grep '^CI' >> "/tmp/github_env_${GITHUB_RUN_ID}"
|
|
|
|
- name: Parse ref
|
|
id: parse-ref
|
|
run: .github/scripts/parse_ref.py
|
|
|
|
- name: Get workflow job id
|
|
id: get-job-id
|
|
uses: ./.github/actions/get-workflow-job-id
|
|
if: always()
|
|
with:
|
|
github-token: ${{ secrets.GITHUB_TOKEN }}
|
|
|
|
- name: Build
|
|
env:
|
|
BUILD_ENVIRONMENT: ${{ inputs.build-environment }}
|
|
PR_NUMBER: ${{ github.event.pull_request.number }}
|
|
BRANCH: ${{ steps.parse-ref.outputs.branch }}
|
|
GITHUB_REPOSITORY: ${{ github.repository }}
|
|
GITHUB_WORKFLOW: ${{ github.workflow }}
|
|
GITHUB_JOB: ${{ github.job }}
|
|
GITHUB_RUN_ID: ${{ github.run_id }}
|
|
GITHUB_RUN_NUMBER: ${{ github.run_number }}
|
|
GITHUB_RUN_ATTEMPT: ${{ github.run_attempt }}
|
|
JOB_ID: ${{ steps.get-job-id.outputs.job-id }}
|
|
REENABLED_ISSUES: ${{ needs.filter.outputs.reenabled-issues }}
|
|
# TODO duplicated
|
|
AWS_DEFAULT_REGION: us-east-1
|
|
SHA1: ${{ github.event.pull_request.head.sha || github.sha }}
|
|
SCCACHE_BUCKET: ossci-compiler-cache-circleci-v2
|
|
SCCACHE_REGION: us-east-1
|
|
TORCH_CUDA_ARCH_LIST: 5.2
|
|
DOCKER_IMAGE: ${{ steps.calculate-docker-image.outputs.docker-image }}
|
|
OUR_GITHUB_JOB_ID: ${{ steps.get-job-id.outputs.job-id }}
|
|
CUDA_VERSION: ${{ inputs.cuda-version }}
|
|
run: |
|
|
export SHARD_NUMBER=0
|
|
# detached container should get cleaned up by teardown_ec2_linux
|
|
# TODO: Stop building test binaries as part of the build phase
|
|
# Make sure we copy test results from bazel-testlogs symlink to
|
|
# a regular directory ./test/test-reports
|
|
# shellcheck disable=SC2086
|
|
container_name=$(docker run \
|
|
${GPU_FLAG:-} \
|
|
-e AWS_DEFAULT_REGION \
|
|
-e BUILD_ENVIRONMENT \
|
|
-e GITHUB_ACTIONS \
|
|
-e GITHUB_REPOSITORY \
|
|
-e GITHUB_WORKFLOW \
|
|
-e GITHUB_JOB \
|
|
-e GITHUB_RUN_NUMBER \
|
|
-e GITHUB_RUN_ATTEMPT \
|
|
-e JOB_ID \
|
|
-e GIT_DEFAULT_BRANCH="$GIT_DEFAULT_BRANCH" \
|
|
-e SHARD_NUMBER \
|
|
-e NUM_TEST_SHARDS \
|
|
-e MAX_JOBS="$(nproc --ignore=2)" \
|
|
-e SCCACHE_BUCKET \
|
|
-e SCCACHE_REGION \
|
|
-e SKIP_SCCACHE_INITIALIZATION=1 \
|
|
-e REENABLED_ISSUES \
|
|
-e TORCH_CUDA_ARCH_LIST \
|
|
-e OUR_GITHUB_JOB_ID \
|
|
-e CUDA_VERSION \
|
|
--env-file="/tmp/github_env_${GITHUB_RUN_ID}" \
|
|
--security-opt seccomp=unconfined \
|
|
--cap-add=SYS_PTRACE \
|
|
--shm-size="1g" \
|
|
--tty \
|
|
--detach \
|
|
--user jenkins \
|
|
-v "${GITHUB_WORKSPACE}:/var/lib/jenkins/workspace" \
|
|
-w /var/lib/jenkins/workspace \
|
|
"${DOCKER_IMAGE}"
|
|
)
|
|
docker exec -t "${container_name}" sh -c '.ci/pytorch/build.sh'
|
|
echo "container_id=${container_name}" >> "${GITHUB_ENV}"
|
|
|
|
- name: Test
|
|
id: test
|
|
# Time out the test phase after 3.5 hours
|
|
timeout-minutes: 120
|
|
run: |
|
|
docker exec -t "${container_id}" sh -c '.ci/pytorch/test.sh && cp -Lr ./bazel-testlogs ./test/test-reports'
|
|
|
|
- name: Print remaining test logs
|
|
shell: bash
|
|
if: always() && steps.test.conclusion
|
|
run: |
|
|
cat test/**/*_toprint.log || true
|
|
|
|
- name: Chown workspace
|
|
uses: ./.github/actions/chown-workspace
|
|
if: always()
|
|
|
|
- name: Upload test artifacts
|
|
uses: ./.github/actions/upload-test-artifacts
|
|
if: always() && steps.test.conclusion && steps.test.conclusion != 'skipped'
|
|
with:
|
|
file-suffix: bazel-${{ github.job }}_${{ steps.get-job-id.outputs.job-id }}
|
|
|
|
- name: Teardown Linux
|
|
uses: pytorch/test-infra/.github/actions/teardown-linux@main
|
|
if: always()
|