name: linux-build on: workflow_call: inputs: build-environment: required: true type: string description: Top-level label for what's being built/tested. docker-image-name: required: true type: string description: Name of the base docker image to build with. build-generates-artifacts: required: false type: boolean default: true description: If set, upload generated build artifacts. build-with-debug: required: false type: boolean default: false description: If set, build in debug mode. sync-tag: required: false type: string default: "" description: | If this is set, our linter will use this to make sure that every other job with the same `sync-tag` is identical. cuda-arch-list: required: false type: string default: "5.2" description: | List of CUDA architectures CI build should target. runner_prefix: required: false default: "" type: string description: Prefix for runner label runner: required: false type: string default: "linux.2xlarge" description: | Label of the runner this job should run on. test-matrix: required: false type: string description: | An option JSON description of what test configs to run later on. This is moved here from the Linux test workflow so that we can apply filter logic using test-config labels earlier and skip unnecessary builds selected-test-configs: description: | A comma-separated list of test configurations from the test matrix to keep, The empty list means we are going to keep every configurations by defaults required: false type: string default: "" s3-bucket: description: S3 bucket to download artifact required: false type: string default: "gha-artifacts" aws-role-to-assume: description: Role to assume for downloading artifacts required: false type: string default: "" use_split_build: description: | [Experimental] Build a libtorch only wheel and build pytorch such that are built from the libtorch wheel. required: false type: boolean default: false secrets: HUGGING_FACE_HUB_TOKEN: required: false description: | HF Auth token to avoid rate limits when downloading models or datasets from hub SCRIBE_GRAPHQL_ACCESS_TOKEN: required: false description: | FB app token to write to scribe endpoint outputs: docker-image: value: ${{ jobs.build.outputs.docker-image }} description: The docker image containing the built PyTorch. test-matrix: value: ${{ jobs.build.outputs.test-matrix }} description: An optional JSON description of what test configs to run later on. jobs: build: environment: ${{ github.ref == 'refs/heads/main' && 'scribe-protected' || startsWith(github.ref, 'refs/heads/release/') && 'scribe-protected' || contains(github.event.pull_request.labels.*.name, 'ci-scribe') && 'scribe-pr' || '' }} # Don't run on forked repos if: github.repository_owner == 'pytorch' runs-on: ${{ inputs.runner_prefix}}${{ inputs.runner }} timeout-minutes: 240 outputs: docker-image: ${{ steps.calculate-docker-image.outputs.docker-image }} test-matrix: ${{ steps.filter.outputs.test-matrix }} steps: - name: Setup SSH (Click me for login details) uses: pytorch/test-infra/.github/actions/setup-ssh@main if: inputs.build-environment != 'linux-s390x-binary-manywheel' with: github-secret: ${{ secrets.GITHUB_TOKEN }} # [pytorch repo ref] # Use a pytorch/pytorch reference instead of a reference to the local # checkout because when we run this action we don't *have* a local # checkout. In other cases you should prefer a local checkout. - name: Checkout PyTorch uses: pytorch/pytorch/.github/actions/checkout-pytorch@main with: no-sudo: true - name: Setup Linux uses: ./.github/actions/setup-linux if: inputs.build-environment != 'linux-s390x-binary-manywheel' - name: configure aws credentials uses: aws-actions/configure-aws-credentials@v3 if: ${{ inputs.aws-role-to-assume != '' && inputs.build-environment != 'linux-s390x-binary-manywheel' }} with: role-to-assume: ${{ inputs.aws-role-to-assume }} role-session-name: gha-linux-build aws-region: us-east-1 - name: Calculate docker image id: calculate-docker-image uses: pytorch/test-infra/.github/actions/calculate-docker-image@main if: inputs.build-environment != 'linux-s390x-binary-manywheel' with: docker-image-name: ${{ inputs.docker-image-name }} - name: Use following to pull public copy of the image id: print-ghcr-mirror if: inputs.build-environment != 'linux-s390x-binary-manywheel' env: ECR_DOCKER_IMAGE: ${{ steps.calculate-docker-image.outputs.docker-image }} shell: bash run: | tag=${ECR_DOCKER_IMAGE##*/} echo "docker pull ghcr.io/pytorch/ci-image:${tag/:/-}" - name: Pull docker image uses: pytorch/test-infra/.github/actions/pull-docker-image@main if: inputs.build-environment != 'linux-s390x-binary-manywheel' with: docker-image: ${{ steps.calculate-docker-image.outputs.docker-image }} - name: Parse ref id: parse-ref run: .github/scripts/parse_ref.py - name: Get workflow job id id: get-job-id uses: ./.github/actions/get-workflow-job-id if: always() with: github-token: ${{ secrets.GITHUB_TOKEN }} # Apply the filter logic to the build step too if the test-config label is already there - name: Select all requested test configurations (if the test matrix is available) id: filter uses: ./.github/actions/filter-test-configs with: github-token: ${{ secrets.GITHUB_TOKEN }} test-matrix: ${{ inputs.test-matrix }} selected-test-configs: ${{ inputs.selected-test-configs }} job-name: ${{ steps.get-job-id.outputs.job-name }} - name: Download pytest cache uses: ./.github/actions/pytest-cache-download continue-on-error: true if: inputs.build-environment != 'linux-s390x-binary-manywheel' with: cache_dir: .pytest_cache job_identifier: ${{ github.workflow }}_${{ inputs.build-environment }} s3_bucket: ${{ inputs.s3-bucket }} - name: Build if: steps.filter.outputs.is-test-matrix-empty == 'False' || inputs.test-matrix == '' id: build env: BUILD_ENVIRONMENT: ${{ inputs.build-environment }} BRANCH: ${{ steps.parse-ref.outputs.branch }} # TODO duplicated AWS_DEFAULT_REGION: us-east-1 PR_NUMBER: ${{ github.event.pull_request.number }} SHA1: ${{ github.event.pull_request.head.sha || github.sha }} # Do not set SCCACHE_S3_KEY_PREFIX to share the cache between all build jobs SCCACHE_BUCKET: ossci-compiler-cache-circleci-v2 SCCACHE_REGION: us-east-1 XLA_CLANG_CACHE_S3_BUCKET_NAME: ossci-compiler-clang-cache-circleci-xla PR_LABELS: ${{ toJson(github.event.pull_request.labels.*.name) }} TORCH_CUDA_ARCH_LIST: ${{ inputs.cuda-arch-list }} DOCKER_IMAGE: ${{ steps.calculate-docker-image.outputs.docker-image }} DOCKER_IMAGE_S390X: ${{ inputs.docker-image-name }} XLA_CUDA: ${{ contains(inputs.build-environment, 'xla') && '0' || '' }} DEBUG: ${{ inputs.build-with-debug && '1' || '0' }} OUR_GITHUB_JOB_ID: ${{ steps.get-job-id.outputs.job-id }} HUGGING_FACE_HUB_TOKEN: ${{ secrets.HUGGING_FACE_HUB_TOKEN }} SCRIBE_GRAPHQL_ACCESS_TOKEN: ${{ secrets.SCRIBE_GRAPHQL_ACCESS_TOKEN }} USE_SPLIT_BUILD: ${{ inputs.use_split_build }} run: | START_TIME=$(date +%s) if [[ ${BUILD_ENVIRONMENT} == *"s390x"* ]]; then JENKINS_USER= USED_IMAGE="${DOCKER_IMAGE_S390X}" # ensure that docker container cleanly exits in 12 hours # if for some reason cleanup action doesn't stop container # when job is cancelled DOCKER_SHELL_CMD="sleep 12h" # since some steps are skipped on s390x, if they are necessary, run them here env | grep '^GITHUB' >> "/tmp/github_env_${GITHUB_RUN_ID}" env | grep '^CI' >> "/tmp/github_env_${GITHUB_RUN_ID}" else JENKINS_USER="--user jenkins" USED_IMAGE="${DOCKER_IMAGE}" DOCKER_SHELL_CMD= fi # Leaving 1GB for the runner and other things TOTAL_AVAILABLE_MEMORY_IN_GB=$(awk '/MemTotal/ { printf "%.3f \n", $2/1024/1024 - 1 }' /proc/meminfo) # https://docs.docker.com/engine/containers/resource_constraints/#--memory-swap-details, the 3GB swap # comes from https://github.com/pytorch/test-infra/pull/6058 TOTAL_MEMORY_WITH_SWAP=$(("${TOTAL_AVAILABLE_MEMORY_IN_GB%.*}" + 3)) # detached container should get cleaned up by teardown_ec2_linux # Used for JENKINS_USER and DOCKER_SHELL_CMD, which can be empty # shellcheck disable=SC2086 container_name=$(docker run \ -e BUILD_ENVIRONMENT \ -e MAX_JOBS="$(nproc --ignore=2)" \ -e AWS_DEFAULT_REGION \ -e PR_NUMBER \ -e SHA1 \ -e BRANCH \ -e SCCACHE_BUCKET \ -e SCCACHE_REGION \ -e XLA_CUDA \ -e XLA_CLANG_CACHE_S3_BUCKET_NAME \ -e SKIP_SCCACHE_INITIALIZATION=1 \ -e TORCH_CUDA_ARCH_LIST \ -e PR_LABELS \ -e OUR_GITHUB_JOB_ID \ -e HUGGING_FACE_HUB_TOKEN \ -e SCRIBE_GRAPHQL_ACCESS_TOKEN \ -e USE_SPLIT_BUILD \ --memory="${TOTAL_AVAILABLE_MEMORY_IN_GB%.*}g" \ --memory-swap="${TOTAL_MEMORY_WITH_SWAP}g" \ --env-file="/tmp/github_env_${GITHUB_RUN_ID}" \ --security-opt seccomp=unconfined \ --cap-add=SYS_PTRACE \ --tty \ --detach \ ${JENKINS_USER} \ -v "${GITHUB_WORKSPACE}:/var/lib/jenkins/workspace" \ -w /var/lib/jenkins/workspace \ "${USED_IMAGE}" \ ${DOCKER_SHELL_CMD} ) docker exec -t "${container_name}" sh -c '.ci/pytorch/build.sh' END_TIME=$(date +%s) echo "build_time=$((END_TIME - START_TIME))" >> "$GITHUB_OUTPUT" - name: Archive artifacts into zip if: inputs.build-generates-artifacts && steps.build.outcome != 'skipped' run: | zip -1 -r artifacts.zip dist/ build/custom_test_artifacts build/lib build/bin .additional_ci_files - name: Store PyTorch Build Artifacts on S3 uses: seemethere/upload-artifact-s3@v5 if: inputs.build-generates-artifacts && steps.build.outcome != 'skipped' && !inputs.use_split_build && inputs.build-environment != 'linux-s390x-binary-manywheel' with: name: ${{ inputs.build-environment }} retention-days: 14 if-no-files-found: error path: artifacts.zip s3-bucket: ${{ inputs.s3-bucket }} - name: Store PyTorch Build Artifacts on S3 for split build uses: seemethere/upload-artifact-s3@v5 if: inputs.build-generates-artifacts && steps.build.outcome != 'skipped' && inputs.use_split_build && inputs.build-environment != 'linux-s390x-binary-manywheel' with: name: ${{ inputs.build-environment }}-experimental-split-build retention-days: 14 if-no-files-found: error path: artifacts.zip s3-bucket: ${{ inputs.s3-bucket }} - name: Store PyTorch Build Artifacts for s390x uses: actions/upload-artifact@v4 if: inputs.build-generates-artifacts && steps.build.outcome != 'skipped' && !inputs.use_split_build && inputs.build-environment == 'linux-s390x-binary-manywheel' with: name: ${{ inputs.build-environment }} retention-days: 14 if-no-files-found: error path: artifacts.zip - name: Store PyTorch Build Artifacts for s390x for split build uses: actions/upload-artifact@v4 if: inputs.build-generates-artifacts && steps.build.outcome != 'skipped' && inputs.use_split_build && inputs.build-environment == 'linux-s390x-binary-manywheel' with: name: ${{ inputs.build-environment }}-experimental-split-build retention-days: 14 if-no-files-found: error path: artifacts.zip - name: Upload sccache stats if: steps.build.outcome != 'skipped' && inputs.build-environment != 'linux-s390x-binary-manywheel' uses: ./.github/actions/upload-sccache-stats with: github-token: ${{ secrets.GITHUB_TOKEN }} build-time: ${{ steps.build.outputs.build_time }} - name: Teardown Linux uses: pytorch/test-infra/.github/actions/teardown-linux@main if: always() && inputs.build-environment != 'linux-s390x-binary-manywheel' - name: Cleanup docker if: always() && inputs.build-environment == 'linux-s390x-binary-manywheel' shell: bash run: | # on s390x stop the container for clean worker stop docker stop -a || true docker kill -a || true