[Split Build] Support nightly release (#129011)

This PR adds the split build to our binaries workflow. Validation for the workflow is done using the PR above in conjunction with https://github.com/pytorch/builder/pull/1876.

Test Workflow: Check CI in the workflow above
Pull Request resolved: https://github.com/pytorch/pytorch/pull/129011
Approved by: https://github.com/atalman
This commit is contained in:
PaliC 2024-06-21 16:31:26 -07:00 committed by PyTorch MergeBot
parent b72ef9df0d
commit b0044e2e18
12 changed files with 1192 additions and 3 deletions

View file

@ -97,8 +97,19 @@ if [[ "$PACKAGE_TYPE" == conda ]]; then
)
elif [[ "$PACKAGE_TYPE" != libtorch ]]; then
if [[ "\$BUILD_ENVIRONMENT" != *s390x* ]]; then
pip install "\$pkg" --index-url "https://download.pytorch.org/whl/\${CHANNEL}/${DESIRED_CUDA}"
retry pip install -q numpy protobuf typing-extensions
if [[ "$USE_SPLIT_BUILD" == "true" ]]; then
pkg="$(ls -1 /final_pkgs/torch_no_python* | sort |tail -1)"
# todo: after folder is populated use the pypi_pkg channel instead
pip install "\$pkg" --index-url "https://download.pytorch.org/whl/\${CHANNEL}/${DESIRED_CUDA}_pypi_pkg"
retry pip install -q numpy protobuf typing-extensions
pkg="$(ls -1 /final_pkgs/torch-* | sort |tail -1)"
# todo: after folder is populated use the pypi_pkg channel instead
pip install "\$pkg" --index-url "https://download.pytorch.org/whl/\${CHANNEL}/${DESIRED_CUDA}_pypi_pkg"
retry pip install -q numpy protobuf typing-extensions
else
pip install "\$pkg" --index-url "https://download.pytorch.org/whl/\${CHANNEL}/${DESIRED_CUDA}"
retry pip install -q numpy protobuf typing-extensions
fi
else
pip install "\$pkg"
retry pip install -q numpy protobuf typing-extensions

View file

@ -136,6 +136,7 @@ export DESIRED_PYTHON="${DESIRED_PYTHON:-}"
export DESIRED_CUDA="$DESIRED_CUDA"
export LIBTORCH_VARIANT="${LIBTORCH_VARIANT:-}"
export BUILD_PYTHONLESS="${BUILD_PYTHONLESS:-}"
export USE_SPLIT_BUILD="${USE_SPLIT_BUILD:-}"
if [[ "${OSTYPE}" == "msys" ]]; then
export LIBTORCH_CONFIG="${LIBTORCH_CONFIG:-}"
if [[ "${LIBTORCH_CONFIG:-}" == 'debug' ]]; then

View file

@ -25,6 +25,10 @@ if [[ "${DRY_RUN}" = "disabled" ]]; then
AWS_S3_CP="aws s3 cp"
fi
if [[ "$USE_SPLIT_BUILD" == "true" ]]; then
UPLOAD_SUBFOLDER="${UPLOAD_SUBFOLDER}_pypi_pkg"
fi
# Sleep 2 minutes between retries for conda upload
retry () {
"$@" || (sleep 5m && "$@") || (sleep 5m && "$@") || (sleep 5m && "$@") || (sleep 5m && "$@")

View file

@ -26,6 +26,7 @@ runs:
-e PYTORCH_FINAL_PACKAGE_DIR \
-e PYTORCH_ROOT \
-e SKIP_ALL_TESTS \
-e USE_SPLIT_BUILD \
--tty \
--detach \
-v "${GITHUB_WORKSPACE}/pytorch:/pytorch" \

View file

@ -386,6 +386,31 @@ def generate_wheels_matrix(
),
}
)
if arch_version != "cuda-aarch64":
ret.append(
{
"python_version": python_version,
"gpu_arch_type": gpu_arch_type,
"gpu_arch_version": gpu_arch_version,
"desired_cuda": translate_desired_cuda(
gpu_arch_type, gpu_arch_version
),
"use_split_build": "True",
"devtoolset": (
"cxx11-abi" if arch_version == "cuda-aarch64" else ""
),
"container_image": WHEEL_CONTAINER_IMAGES[arch_version],
"package_type": package_type,
"pytorch_extra_install_requirements": (
PYTORCH_EXTRA_INSTALL_REQUIREMENTS[arch_version] # fmt: skip
if os != "linux-aarch64"
else ""
),
"build_name": f"{package_type}-py{python_version}-{gpu_arch_type}{gpu_arch_version}-split".replace( # noqa: B950
".", "_"
),
}
)
else:
ret.append(
{

View file

@ -30,6 +30,9 @@
{%- if config["devtoolset"] %}
DESIRED_DEVTOOLSET: !{{ config["devtoolset"] }}
{%- endif %}
{%- if config.use_split_build is defined %}
use_split_build: !{{ config["use_split_build"] }}
{%- endif %}
{%- endif %}
{%- if config["package_type"] == "libtorch" %}
{%- if config["libtorch_config"] %}
@ -44,6 +47,7 @@
# without this value pip does not get installed for some reason
DESIRED_PYTHON: "3.8"
{%- endif %}
{%- else %}
DESIRED_PYTHON: "!{{ config["python_version"] }}"
{%- endif %}

View file

@ -21,6 +21,13 @@ on:
default: 210
type: number
description: timeout for the job
use_split_build:
description: |
[Experimental] Build a libtorch only wheel and build pytorch such that
are built from the libtorch wheel.
required: false
type: boolean
default: false
ALPINE_IMAGE:
required: false
type: string
@ -110,6 +117,7 @@ jobs:
PR_NUMBER: ${{ github.event.pull_request.number }}
PYTORCH_FINAL_PACKAGE_DIR: /artifacts
SHA1: ${{ github.event.pull_request.head.sha || github.sha }}
USE_SPLIT_BUILD: ${{ inputs.use_split_build }}
steps:
- name: Make the env permanent during this workflow (but not the secrets)
shell: bash
@ -137,6 +145,7 @@ jobs:
echo "PR_NUMBER=${{ env.PR_NUMBER }}"
echo "PYTORCH_FINAL_PACKAGE_DIR=${{ env.PYTORCH_FINAL_PACKAGE_DIR }}"
echo "SHA1=${{ env.SHA1 }}"
echo "USE_SPLIT_BUILD=${{ env.use_split_build }}"
} >> "${GITHUB_ENV} }}"
- name: List the env
@ -246,6 +255,7 @@ jobs:
-e PYTORCH_ROOT \
-e SKIP_ALL_TESTS \
-e PYTORCH_EXTRA_INSTALL_REQUIREMENTS \
-e USE_SPLIT_BUILD \
--tty \
--detach \
-v "${GITHUB_WORKSPACE}/pytorch:/pytorch" \

View file

@ -63,6 +63,13 @@ on:
required: true
type: string
description: Hardware to run this job on. Valid values are linux.4xlarge, linux.4xlarge.nvidia.gpu, linux.arm64.2xlarge, and linux.rocm.gpu
use_split_build:
description: |
[Experimental] Build a libtorch only wheel and build pytorch such that
are built from the libtorch wheel.
required: false
type: boolean
default: false
secrets:
github-token:
required: true
@ -97,6 +104,7 @@ jobs:
PR_NUMBER: ${{ github.event.pull_request.number }}
PYTORCH_FINAL_PACKAGE_DIR: /artifacts
SHA1: ${{ github.event.pull_request.head.sha || github.sha }}
USE_SPLIT_BUILD: ${{ inputs.use_split_build }}
steps:
- name: Make the env permanent during this workflow (but not the secrets)
shell: bash
@ -124,6 +132,7 @@ jobs:
echo "PR_NUMBER=${{ env.PR_NUMBER }}"
echo "PYTORCH_FINAL_PACKAGE_DIR=${{ env.PYTORCH_FINAL_PACKAGE_DIR }}"
echo "SHA1=${{ env.SHA1 }}"
echo "USE_SPLIT_BUILD=${{ env.USE_SPLIT_BUILD }}"
} >> "${GITHUB_ENV} }}"
- name: "[FB EMPLOYEES] Enable SSH (Click me for login details)"

View file

@ -55,6 +55,13 @@ on:
required: false
type: string
description: Desired python version
use_split_build:
description: |
[Experimental] Build a libtorch only wheel and build pytorch such that
are built from the libtorch wheel.
required: false
type: boolean
default: false
secrets:
github-token:
required: true
@ -93,6 +100,7 @@ jobs:
PR_NUMBER: ${{ github.event.pull_request.number }}
PYTORCH_FINAL_PACKAGE_DIR: /artifacts
SHA1: ${{ github.event.pull_request.head.sha || github.sha }}
USE_SPLIT_BUILD: ${{ inputs.use_split_build }}
steps:
- name: Checkout PyTorch
uses: pytorch/pytorch/.github/actions/checkout-pytorch@main

View file

@ -72,6 +72,48 @@ jobs:
secrets:
github-token: ${{ secrets.GITHUB_TOKEN }}
manywheel-py3_8-cuda11_8-split-build:
if: ${{ github.repository_owner == 'pytorch' }}
uses: ./.github/workflows/_binary-build-linux.yml
with:
PYTORCH_ROOT: /pytorch
BUILDER_ROOT: /builder
PACKAGE_TYPE: manywheel
# TODO: This is a legacy variable that we eventually want to get rid of in
# favor of GPU_ARCH_VERSION
DESIRED_CUDA: cu118
GPU_ARCH_VERSION: 11.8
GPU_ARCH_TYPE: cuda
DOCKER_IMAGE: pytorch/manylinux-builder:cuda11.8-main
use_split_build: True
DESIRED_PYTHON: "3.8"
build_name: manywheel-py3_8-cuda11_8-split
build_environment: linux-binary-manywheel
PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu11==11.8.89; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-runtime-cu11==11.8.89; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-cupti-cu11==11.8.87; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cudnn-cu11==9.1.0.70; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cublas-cu11==11.11.3.6; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufft-cu11==10.9.0.58; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-curand-cu11==10.3.0.86; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusolver-cu11==11.4.1.48; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparse-cu11==11.7.5.86; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nccl-cu11==2.20.5; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvtx-cu11==11.8.86; platform_system == 'Linux' and platform_machine == 'x86_64'
secrets:
github-token: ${{ secrets.GITHUB_TOKEN }}
manywheel-py3_8-cuda11_8-split-test: # Testing
if: ${{ github.repository_owner == 'pytorch' }}
needs: manywheel-py3_8-cuda11_8-split-build
uses: ./.github/workflows/_binary-test-linux.yml
with:
PYTORCH_ROOT: /pytorch
BUILDER_ROOT: /builder
PACKAGE_TYPE: manywheel
# TODO: This is a legacy variable that we eventually want to get rid of in
# favor of GPU_ARCH_VERSION
DESIRED_CUDA: cu118
GPU_ARCH_VERSION: 11.8
GPU_ARCH_TYPE: cuda
DOCKER_IMAGE: pytorch/manylinux-builder:cuda11.8-main
use_split_build: True
DESIRED_PYTHON: "3.8"
build_name: manywheel-py3_8-cuda11_8-split
build_environment: linux-binary-manywheel
runs_on: linux.4xlarge.nvidia.gpu
secrets:
github-token: ${{ secrets.GITHUB_TOKEN }}
manywheel-py3_8-cuda12_1-build:
if: ${{ github.repository_owner == 'pytorch' }}
uses: ./.github/workflows/_binary-build-linux.yml
@ -112,6 +154,48 @@ jobs:
secrets:
github-token: ${{ secrets.GITHUB_TOKEN }}
manywheel-py3_8-cuda12_1-split-build:
if: ${{ github.repository_owner == 'pytorch' }}
uses: ./.github/workflows/_binary-build-linux.yml
with:
PYTORCH_ROOT: /pytorch
BUILDER_ROOT: /builder
PACKAGE_TYPE: manywheel
# TODO: This is a legacy variable that we eventually want to get rid of in
# favor of GPU_ARCH_VERSION
DESIRED_CUDA: cu121
GPU_ARCH_VERSION: 12.1
GPU_ARCH_TYPE: cuda
DOCKER_IMAGE: pytorch/manylinux-builder:cuda12.1-main
use_split_build: True
DESIRED_PYTHON: "3.8"
build_name: manywheel-py3_8-cuda12_1-split
build_environment: linux-binary-manywheel
PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.1.105; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-runtime-cu12==12.1.105; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-cupti-cu12==12.1.105; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cudnn-cu12==9.1.0.70; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cublas-cu12==12.1.3.1; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufft-cu12==11.0.2.54; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-curand-cu12==10.3.2.106; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusolver-cu12==11.4.5.107; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparse-cu12==12.1.0.106; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nccl-cu12==2.20.5; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvtx-cu12==12.1.105; platform_system == 'Linux' and platform_machine == 'x86_64'
secrets:
github-token: ${{ secrets.GITHUB_TOKEN }}
manywheel-py3_8-cuda12_1-split-test: # Testing
if: ${{ github.repository_owner == 'pytorch' }}
needs: manywheel-py3_8-cuda12_1-split-build
uses: ./.github/workflows/_binary-test-linux.yml
with:
PYTORCH_ROOT: /pytorch
BUILDER_ROOT: /builder
PACKAGE_TYPE: manywheel
# TODO: This is a legacy variable that we eventually want to get rid of in
# favor of GPU_ARCH_VERSION
DESIRED_CUDA: cu121
GPU_ARCH_VERSION: 12.1
GPU_ARCH_TYPE: cuda
DOCKER_IMAGE: pytorch/manylinux-builder:cuda12.1-main
use_split_build: True
DESIRED_PYTHON: "3.8"
build_name: manywheel-py3_8-cuda12_1-split
build_environment: linux-binary-manywheel
runs_on: linux.4xlarge.nvidia.gpu
secrets:
github-token: ${{ secrets.GITHUB_TOKEN }}
manywheel-py3_8-cuda12_4-build:
if: ${{ github.repository_owner == 'pytorch' }}
uses: ./.github/workflows/_binary-build-linux.yml
@ -151,3 +235,45 @@ jobs:
runs_on: linux.4xlarge.nvidia.gpu
secrets:
github-token: ${{ secrets.GITHUB_TOKEN }}
manywheel-py3_8-cuda12_4-split-build:
if: ${{ github.repository_owner == 'pytorch' }}
uses: ./.github/workflows/_binary-build-linux.yml
with:
PYTORCH_ROOT: /pytorch
BUILDER_ROOT: /builder
PACKAGE_TYPE: manywheel
# TODO: This is a legacy variable that we eventually want to get rid of in
# favor of GPU_ARCH_VERSION
DESIRED_CUDA: cu124
GPU_ARCH_VERSION: 12.4
GPU_ARCH_TYPE: cuda
DOCKER_IMAGE: pytorch/manylinux-builder:cuda12.4-main
use_split_build: True
DESIRED_PYTHON: "3.8"
build_name: manywheel-py3_8-cuda12_4-split
build_environment: linux-binary-manywheel
PYTORCH_EXTRA_INSTALL_REQUIREMENTS: nvidia-cuda-nvrtc-cu12==12.4.99; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-runtime-cu12==12.4.99; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cuda-cupti-cu12==12.4.99; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cudnn-cu12==9.1.0.70; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cublas-cu12==12.4.2.65; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cufft-cu12==11.2.0.44; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-curand-cu12==10.3.5.119; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusolver-cu12==11.6.0.99; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-cusparse-cu12==12.3.0.142; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nccl-cu12==2.20.5; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvtx-cu12==12.4.99; platform_system == 'Linux' and platform_machine == 'x86_64' | nvidia-nvjitlink-cu12==12.4.99; platform_system == 'Linux' and platform_machine == 'x86_64'
secrets:
github-token: ${{ secrets.GITHUB_TOKEN }}
manywheel-py3_8-cuda12_4-split-test: # Testing
if: ${{ github.repository_owner == 'pytorch' }}
needs: manywheel-py3_8-cuda12_4-split-build
uses: ./.github/workflows/_binary-test-linux.yml
with:
PYTORCH_ROOT: /pytorch
BUILDER_ROOT: /builder
PACKAGE_TYPE: manywheel
# TODO: This is a legacy variable that we eventually want to get rid of in
# favor of GPU_ARCH_VERSION
DESIRED_CUDA: cu124
GPU_ARCH_VERSION: 12.4
GPU_ARCH_TYPE: cuda
DOCKER_IMAGE: pytorch/manylinux-builder:cuda12.4-main
use_split_build: True
DESIRED_PYTHON: "3.8"
build_name: manywheel-py3_8-cuda12_4-split
build_environment: linux-binary-manywheel
runs_on: linux.4xlarge.nvidia.gpu
secrets:
github-token: ${{ secrets.GITHUB_TOKEN }}

File diff suppressed because it is too large Load diff

View file

@ -344,7 +344,7 @@ cmake_python_include_dir = sysconfig.get_path("include")
################################################################################
package_name = os.getenv("TORCH_PACKAGE_NAME", "torch")
LIBTORCH_PKG_NAME = os.getenv("LIBTORCH_PACKAGE_NAME", "libtorch")
LIBTORCH_PKG_NAME = os.getenv("LIBTORCH_PACKAGE_NAME", "torch_no_python")
if BUILD_LIBTORCH_WHL:
package_name = LIBTORCH_PKG_NAME