pytorch/.github/workflows/generated-linux-binary-manywheel-master.yml
Nikita Shulga c1c796446c [CI] Fix concurrency for tagged pushes (#81154)
For pull request or tagged pushes there should be only one active
workflow per PR/tag

For branch pushes there could be multiple

Test plan: See [branch push](https://github.com/malfet/deleteme/runs/7260062596?check_suite_focus=true#step:2:5) vs [tagged push](https://github.com/malfet/deleteme/runs/7260075080?check_suite_focus=true#step:2:5) in my toy repo

Fixes  https://github.com/pytorch/pytorch/issues/81148
Pull Request resolved: https://github.com/pytorch/pytorch/pull/81154
Approved by: https://github.com/suo, https://github.com/b0noI
2022-07-09 16:13:17 +00:00

289 lines
11 KiB
YAML
Generated

# @generated DO NOT EDIT MANUALLY
# Template is at: .github/templates/linux_binary_build_workflow.yml.j2
# Generation script: .github/scripts/generate_ci_workflows.py
name: linux-binary-manywheel
on:
push:
branches:
- master
tags:
- 'ciflow/trunk/*'
workflow_dispatch:
env:
# Needed for conda builds
ALPINE_IMAGE: "308535385114.dkr.ecr.us-east-1.amazonaws.com/tool/alpine"
ANACONDA_USER: pytorch
AWS_DEFAULT_REGION: us-east-1
BINARY_ENV_FILE: /tmp/env
BUILD_ENVIRONMENT: linux-binary-manywheel
BUILDER_ROOT: /builder
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
PR_NUMBER: ${{ github.event.pull_request.number }}
PYTORCH_FINAL_PACKAGE_DIR: /artifacts
PYTORCH_ROOT: /pytorch
SHA1: ${{ github.event.pull_request.head.sha || github.sha }}
SKIP_ALL_TESTS: 1
concurrency:
group: linux-binary-manywheel-${{ github.event.pull_request.number || github.ref_name }}-${{ github.ref_type == 'branch' && github.sha }}-${{ github.event_name == 'workflow_dispatch' }}
cancel-in-progress: true
jobs:
manywheel-py3_7-cuda10_2-build:
if: ${{ github.repository_owner == 'pytorch' }}
runs-on: linux.4xlarge
timeout-minutes: 240
env:
PACKAGE_TYPE: manywheel
# TODO: This is a legacy variable that we eventually want to get rid of in
# favor of GPU_ARCH_VERSION
DESIRED_CUDA: cu102
GPU_ARCH_VERSION: 10.2
GPU_ARCH_TYPE: cuda
DOCKER_IMAGE: pytorch/manylinux-builder:cuda10.2
SKIP_ALL_TESTS: 1
DESIRED_PYTHON: "3.7"
steps:
- name: Checkout PyTorch
uses: pytorch/pytorch/.github/actions/checkout-pytorch@master
- name: Setup Linux
uses: ./.github/actions/setup-linux
- name: Chown workspace
run: |
retry () {
"$@" || (sleep 1 && "$@") || (sleep 2 && "$@")
}
retry docker pull "${ALPINE_IMAGE}"
# Ensure the working directory gets chowned back to the current user
docker run --pull=never --rm -v "$(pwd)":/v -w /v "${ALPINE_IMAGE}" chown -R "$(id -u):$(id -g)" .
- name: Clean workspace
run: |
rm -rf "${GITHUB_WORKSPACE}"
mkdir "${GITHUB_WORKSPACE}"
- name: "[FB EMPLOYEES] Enable SSH (Click me for login details)"
uses: seemethere/add-github-ssh-key@v1
with:
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
- name: Checkout PyTorch
uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
with:
ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
submodules: recursive
path: pytorch
- name: Clean PyTorch checkout
run: |
# Remove any artifacts from the previous checkouts
git clean -fxd
working-directory: pytorch
- name: Checkout pytorch/builder
uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
with:
ref: main
submodules: recursive
repository: pytorch/builder
path: builder
- name: Clean pytorch/builder checkout
run: |
# Remove any artifacts from the previous checkouts
git clean -fxd
working-directory: builder
- name: Pull Docker image
run: |
retry () {
"$@" || (sleep 1 && "$@") || (sleep 2 && "$@")
}
retry docker pull "${DOCKER_IMAGE}"
- name: Build PyTorch binary
run: |
set -x
mkdir -p artifacts/
container_name=$(docker run \
-e BINARY_ENV_FILE \
-e BUILDER_ROOT \
-e BUILD_ENVIRONMENT \
-e BUILD_SPLIT_CUDA \
-e DESIRED_CUDA \
-e DESIRED_DEVTOOLSET \
-e DESIRED_PYTHON \
-e GITHUB_ACTIONS \
-e GPU_ARCH_TYPE \
-e GPU_ARCH_VERSION \
-e LIBTORCH_VARIANT \
-e PACKAGE_TYPE \
-e PYTORCH_FINAL_PACKAGE_DIR \
-e PYTORCH_ROOT \
-e SKIP_ALL_TESTS \
--tty \
--detach \
-v "${GITHUB_WORKSPACE}/pytorch:/pytorch" \
-v "${GITHUB_WORKSPACE}/builder:/builder" \
-v "${RUNNER_TEMP}/artifacts:/artifacts" \
-w / \
"${DOCKER_IMAGE}"
)
docker exec -t -w "${PYTORCH_ROOT}" "${container_name}" bash -c "bash .circleci/scripts/binary_populate_env.sh"
docker exec -t "${container_name}" bash -c "source ${BINARY_ENV_FILE} && bash /builder/manywheel/build.sh"
- name: Chown artifacts
if: always()
run: |
# Ensure the working directory gets chowned back to the current user
docker run --rm -v "${RUNNER_TEMP}/artifacts:/v" -w /v "${ALPINE_IMAGE}" chown -R "$(id -u):$(id -g)" .
- uses: seemethere/upload-artifact-s3@v5
with:
name: manywheel-py3_7-cuda10_2
retention-days: 14
if-no-files-found: error
path:
${{ runner.temp }}/artifacts/*
- name: Hold runner for 2 hours or until ssh sessions have drained
working-directory: pytorch/
# Always hold for active ssh sessions
if: always()
run: .github/scripts/wait_for_ssh_to_drain.sh
- name: Chown workspace
if: always()
run: |
# Ensure the working directory gets chowned back to the current user
docker run --rm -v "$(pwd)":/v -w /v "${ALPINE_IMAGE}" chown -R "$(id -u):$(id -g)" .
- name: Kill containers, clean up images
if: always()
run: |
# ignore expansion of "docker ps -q" since it could be empty
# shellcheck disable=SC2046
docker stop $(docker ps -q) || true
# Prune all of the docker images
docker system prune -af
manywheel-py3_7-cuda10_2-test: # Testing
if: ${{ github.repository_owner == 'pytorch' }}
needs: manywheel-py3_7-cuda10_2-build
runs-on: linux.4xlarge.nvidia.gpu
timeout-minutes: 240
env:
PACKAGE_TYPE: manywheel
# TODO: This is a legacy variable that we eventually want to get rid of in
# favor of GPU_ARCH_VERSION
DESIRED_CUDA: cu102
GPU_ARCH_VERSION: 10.2
GPU_ARCH_TYPE: cuda
DOCKER_IMAGE: pytorch/manylinux-builder:cuda10.2
SKIP_ALL_TESTS: 1
DESIRED_PYTHON: "3.7"
steps:
- name: Checkout PyTorch
uses: pytorch/pytorch/.github/actions/checkout-pytorch@master
- name: Setup Linux
uses: ./.github/actions/setup-linux
- name: Chown workspace
run: |
retry () {
"$@" || (sleep 1 && "$@") || (sleep 2 && "$@")
}
retry docker pull "${ALPINE_IMAGE}"
# Ensure the working directory gets chowned back to the current user
docker run --pull=never --rm -v "$(pwd)":/v -w /v "${ALPINE_IMAGE}" chown -R "$(id -u):$(id -g)" .
- name: Clean workspace
run: |
rm -rf "${GITHUB_WORKSPACE}"
mkdir "${GITHUB_WORKSPACE}"
- name: "[FB EMPLOYEES] Enable SSH (Click me for login details)"
uses: seemethere/add-github-ssh-key@v1
with:
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
- uses: seemethere/download-artifact-s3@v4
name: Download Build Artifacts
with:
name: manywheel-py3_7-cuda10_2
path: "${{ runner.temp }}/artifacts/"
- name: Checkout PyTorch
uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
with:
ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
submodules: recursive
path: pytorch
- name: Clean PyTorch checkout
run: |
# Remove any artifacts from the previous checkouts
git clean -fxd
working-directory: pytorch
- name: Checkout pytorch/builder
uses: zhouzhuojie/checkout@05b13c9a0d21f08f6d5e64a1d5042246d13619d9
with:
ref: main
submodules: recursive
repository: pytorch/builder
path: builder
- name: Clean pytorch/builder checkout
run: |
# Remove any artifacts from the previous checkouts
git clean -fxd
working-directory: builder
- uses: nick-fields/retry@71062288b76e2b6214ebde0e673ce0de1755740a
name: Install nvidia driver, nvidia-docker runtime, set GPU_FLAG
with:
timeout_minutes: 10
max_attempts: 3
command: |
set -ex
pushd pytorch
bash .github/scripts/install_nvidia_utils_linux.sh
echo "GPU_FLAG=--gpus all" >> "${GITHUB_ENV}"
popd
- name: Pull Docker image
run: |
retry () {
"$@" || (sleep 1 && "$@") || (sleep 2 && "$@")
}
retry docker pull "${DOCKER_IMAGE}"
- name: Test PyTorch binary
run: |
set -x
# shellcheck disable=SC2086,SC2090
container_name=$(docker run \
${GPU_FLAG:-} \
-e BINARY_ENV_FILE \
-e BUILDER_ROOT \
-e BUILD_ENVIRONMENT \
-e BUILD_SPLIT_CUDA \
-e DESIRED_CUDA \
-e DESIRED_DEVTOOLSET \
-e DESIRED_PYTHON \
-e GITHUB_ACTIONS \
-e GPU_ARCH_TYPE \
-e GPU_ARCH_VERSION \
-e LIBTORCH_VARIANT \
-e PACKAGE_TYPE \
-e PYTORCH_FINAL_PACKAGE_DIR \
-e PYTORCH_ROOT \
-e SKIP_ALL_TESTS \
--tty \
--detach \
-v "${GITHUB_WORKSPACE}/pytorch:/pytorch" \
-v "${GITHUB_WORKSPACE}/builder:/builder" \
-v "${RUNNER_TEMP}/artifacts:/final_pkgs" \
-w / \
"${DOCKER_IMAGE}"
)
docker exec -t -w "${PYTORCH_ROOT}" "${container_name}" bash -c "bash .circleci/scripts/binary_populate_env.sh"
# Generate test script
docker exec -t -w "${PYTORCH_ROOT}" -e OUTPUT_SCRIPT="/run.sh" "${container_name}" bash -c "bash .circleci/scripts/binary_linux_test.sh"
docker exec -t "${container_name}" bash -c "source ${BINARY_ENV_FILE} && bash -x /run.sh"
- name: Hold runner for 2 hours or until ssh sessions have drained
working-directory: pytorch/
# Always hold for active ssh sessions
if: always()
run: .github/scripts/wait_for_ssh_to_drain.sh
- name: Chown workspace
if: always()
run: |
# Ensure the working directory gets chowned back to the current user
docker run --rm -v "$(pwd)":/v -w /v "${ALPINE_IMAGE}" chown -R "$(id -u):$(id -g)" .
- name: Kill containers, clean up images
if: always()
run: |
# ignore expansion of "docker ps -q" since it could be empty
# shellcheck disable=SC2046
docker stop $(docker ps -q) || true
# Prune all of the docker images
docker system prune -af