pytorch/.github/workflows/_docs.yml
Zain Rizvi 1bda3a3135 Migrate nightly.yml workflow & docs to Amazon 2023 (#131821)
A continuation of the migration started in
- https://github.com/pytorch/pytorch/pull/131250

Migrates nightly jobs and the linux-docs job in pull.yml

To preserve reusability, I'm switching to a new format here that allows one to only specify the runner prefix instead of the full runner name, allowing multiple jobs to continue using the same base runner type like how they did before

**Validation:**
- Nightly builds passed in the prev commit: https://github.com/pytorch/pytorch/actions/runs/10102118461/job/27937632823?pr=131821
- Latest commit only updated the docs job in pull.yml, and that has already passed: https://github.com/pytorch/pytorch/actions/runs/10114635537/job/27974392472?pr=131821

The other in-progress jobs are irrelevant
Pull Request resolved: https://github.com/pytorch/pytorch/pull/131821
Approved by: https://github.com/atalman, https://github.com/seemethere
2024-07-26 20:54:43 +00:00

226 lines
8.9 KiB
YAML

name: build docs
on:
workflow_call:
inputs:
build-environment:
required: true
type: string
description: Top-level label for what's being built/tested.
docker-image:
required: true
type: string
description: Docker image to run in.
push:
required: false
type: boolean
default: false
description: If set, push the docs to the docs website.
run-doxygen:
required: false
type: boolean
default: false
description: If set, will enable C++ API doc generation using doxygen / breathe / exhale.
sync-tag:
required: false
type: string
default: ""
description: |
If this is set, our linter will use this to make sure that every other
job with the same `sync-tag` is identical.
s3-bucket:
description: S3 bucket to download artifact
required: false
type: string
default: "gha-artifacts"
aws-role-to-assume:
description: role to assume for downloading artifacts
required: false
type: string
default: ""
upload-aws-role-to-assume:
description: role to assume for downloading artifacts
required: false
type: string
default: ""
runner_prefix:
description: prefix for runner label
type: string
default: ""
secrets:
GH_PYTORCHBOT_TOKEN:
required: false
description: Permissions for pushing to the docs site.
jobs:
build-docs:
# Don't run on forked repos.
if: github.repository_owner == 'pytorch'
runs-on: ${{ matrix.runner }}
environment: ${{ (github.ref == 'refs/heads/main' || startsWith(github.event.ref, 'refs/tags/v')) && 'pytorchbot-env' || '' }}
strategy:
fail-fast: false
matrix:
include:
- docs_type: cpp
# We recently seeing lots of exit code 137 running this in Docker indicating
# an OOM issue when running the job, so this upgrades the runner from 4xlarge
# to the next available tier of 12xlarge. So much memory just to generate cpp
# doc
runner: ${{ inputs.runner_prefix }}linux.12xlarge
# TODO: Nightly cpp docs take longer and longer to finish (more than 3h now)
# Let's try to figure out how this can be improved
timeout-minutes: 240
- docs_type: python
runner: ${{ inputs.runner_prefix }}linux.2xlarge
# It takes less than 30m to finish python docs unless there are issues
timeout-minutes: 30
- docs_type: functorch
runner: ${{ inputs.runner_prefix }}linux.2xlarge
# It takes less than 15m to finish functorch docs unless there are issues
timeout-minutes: 15
# Set a fixed name for this job instead of using the current matrix-generated name, i.e. build-docs (cpp, linux.12xlarge, 180)
# The current name requires updating the Rockset last docs push query from test-infra every time the matrix is updated
name: build-docs-${{ matrix.docs_type }}-${{ inputs.push }}
steps:
- name: Setup SSH (Click me for login details)
uses: pytorch/test-infra/.github/actions/setup-ssh@main
with:
github-secret: ${{ secrets.GITHUB_TOKEN }}
instructions: |
All builds are done inside the container, to start an interactive session run:
docker exec -it $(docker container ps --format '{{.ID}}') bash
To start Python docs build type:
cd docs && make html && make coverage
# [see note: pytorch repo ref]
- name: Checkout PyTorch
uses: pytorch/pytorch/.github/actions/checkout-pytorch@main
- name: Setup Linux
uses: ./.github/actions/setup-linux
- name: configure aws credentials
if : ${{ inputs.aws-role-to-assume != '' }}
uses: aws-actions/configure-aws-credentials@v3
with:
role-to-assume: ${{ inputs.aws-role-to-assume }}
role-session-name: gha-linux-test
aws-region: us-east-1
- name: Calculate docker image
id: calculate-docker-image
uses: pytorch/test-infra/.github/actions/calculate-docker-image@main
with:
docker-image-name: ${{ inputs.docker-image }}
- name: Pull docker image
uses: pytorch/test-infra/.github/actions/pull-docker-image@main
with:
docker-image: ${{ steps.calculate-docker-image.outputs.docker-image }}
- name: Download build artifacts
uses: ./.github/actions/download-build-artifacts
with:
name: ${{ inputs.build-environment }}
s3-bucket: ${{ inputs.s3-bucket }}
- name: Generate netrc (only for docs-push)
if: inputs.push
env:
GITHUB_PYTORCHBOT_TOKEN: ${{ secrets.GH_PYTORCHBOT_TOKEN }}
run: |
# sometimes .netrc exists as a directory even though this is the temp folder
rm -rf "${RUNNER_TEMP}/.netrc"
# set credentials for https pushing
echo "machine github.com" > "${RUNNER_TEMP}/.netrc"
echo "login pytorchbot" >> "${RUNNER_TEMP}/.netrc"
echo "password ${GITHUB_PYTORCHBOT_TOKEN}" >> "${RUNNER_TEMP}/.netrc"
- name: Build ${{ matrix.docs_type }} docs
timeout-minutes: ${{ matrix.timeout-minutes }}
id: build-docs
env:
# After https://github.com/pytorch/pytorch/pull/88373, pull workflow can now be run periodically,
# so using a schedule event to determine if the docs should be pushed or not doesn't hold true
# anymore
WITH_PUSH: ${{ inputs.push }}
DOCKER_IMAGE: ${{ inputs.docker-image }}
DOCS_TYPE: ${{ matrix.docs_type }}
RUN_DOXYGEN: ${{ inputs.run-doxygen }}
BUILD_ENVIRONMENT: ${{ inputs.build-environment }}
run: |
set -ex
# Convert refs/tags/v1.12.0rc3 into 1.12
if [[ "${GITHUB_REF}" =~ ^refs/tags/v([0-9]+\.[0-9]+)\.* ]]; then
target="${BASH_REMATCH[1]}"
else
target="main"
fi
# detached container should get cleaned up by teardown_ec2_linux
container_name=$(docker run \
-e BUILD_ENVIRONMENT \
-e MAX_JOBS="$(nproc --ignore=2)" \
-e SHA1="$GITHUB_SHA" \
-e DOCS_VERSION="${target}" \
-e DOCS_TYPE \
-e RUN_DOXYGEN \
-e WITH_PUSH \
--env-file="/tmp/github_env_${GITHUB_RUN_ID}" \
--security-opt seccomp=unconfined \
--cap-add=SYS_PTRACE \
--tty \
--detach \
--user jenkins \
-v "${RUNNER_TEMP}/.netrc":/var/lib/jenkins/.netrc \
-v "${GITHUB_WORKSPACE}:/var/lib/jenkins/workspace" \
-w /var/lib/jenkins/workspace \
"${DOCKER_IMAGE}"
)
docker exec -t "${container_name}" bash -c "sudo chown -R jenkins . && pip install $(echo dist/*.whl)[opt-einsum] && ./.ci/pytorch/${DOCS_TYPE}_doc_push_script.sh"
- name: Chown workspace
uses: ./.github/actions/chown-workspace
if: always()
- name: configure aws credentials
if : ${{ inputs.upload-aws-role-to-assume != '' }}
uses: aws-actions/configure-aws-credentials@v3
with:
role-to-assume: ${{ inputs.upload-aws-role-to-assume }}
role-session-name: gha-linux-test
aws-region: us-east-1
- name: Upload Python Docs Preview
uses: seemethere/upload-artifact-s3@v5
if: ${{ github.event_name == 'pull_request' && matrix.docs_type == 'python' && steps.build-docs.outcome == 'success' }}
with:
retention-days: 14
s3-bucket: doc-previews
if-no-files-found: error
path: pytorch_docs/main/
s3-prefix: pytorch/pytorch/${{ github.event.pull_request.number }}
- name: Upload C++ Docs Preview
uses: seemethere/upload-artifact-s3@v5
if: ${{ github.event_name == 'pull_request' && matrix.docs_type == 'cpp' && steps.build-docs.outcome == 'success' }}
with:
retention-days: 14
if-no-files-found: error
s3-bucket: doc-previews
path: cppdocs/
s3-prefix: pytorch/pytorch/${{ github.event.pull_request.number }}/cppdocs
- name: Upload functorch Docs Preview
uses: seemethere/upload-artifact-s3@v5
if: ${{ github.event_name == 'pull_request' && matrix.docs_type == 'functorch' && steps.build-docs.outcome == 'success' }}
with:
retention-days: 14
s3-bucket: doc-previews
if-no-files-found: error
path: functorch_ghpages/nightly/
s3-prefix: pytorch/pytorch/${{ github.event.pull_request.number }}/functorchdocs
- name: Teardown Linux
uses: pytorch/test-infra/.github/actions/teardown-linux@main
if: always()