From 6bcb545d9c31154c28084c96d55e097acf8e9097 Mon Sep 17 00:00:00 2001 From: Wei Wang Date: Tue, 28 Jan 2025 21:01:55 +0000 Subject: [PATCH] [CI][CUDA][cuSPARSELt] cusparselt 0.6.3 and cu121 related cleanups (#145793) Make ci cusparselt installation be consistent with nightly binary Remove cu121 related docker build jobs and inductor runs Update test failures relating to cu121 Retry of https://github.com/pytorch/pytorch/pull/145696 Pull Request resolved: https://github.com/pytorch/pytorch/pull/145793 Approved by: https://github.com/eqy, https://github.com/tinglvv --- .ci/docker/build.sh | 58 -------------- .ci/docker/common/install_cuda.sh | 75 ------------------- .ci/docker/common/install_cuda_aarch64.sh | 2 +- .ci/docker/common/install_cusparselt.sh | 20 ++--- .github/scripts/test_filter_test_configs.py | 55 -------------- .github/scripts/test_trymerge.py | 4 +- .github/workflows/docker-builds.yml | 3 - .../target-determination-indexer.yml | 2 +- .github/workflows/torchbench.yml | 20 ++--- .github/workflows/trunk.yml | 8 +- test/test_sparse_semi_structured.py | 8 +- 11 files changed, 33 insertions(+), 222 deletions(-) diff --git a/.ci/docker/build.sh b/.ci/docker/build.sh index 57b4681c22c..f857147c364 100755 --- a/.ci/docker/build.sh +++ b/.ci/docker/build.sh @@ -105,20 +105,6 @@ case "$image" in CONDA_CMAKE=yes TRITON=yes ;; - pytorch-linux-focal-cuda12.1-cudnn9-py3-gcc9) - CUDA_VERSION=12.1.1 - CUDNN_VERSION=9 - ANACONDA_PYTHON_VERSION=3.10 - GCC_VERSION=9 - PROTOBUF=yes - DB=yes - VISION=yes - KATEX=yes - UCX_COMMIT=${_UCX_COMMIT} - UCC_COMMIT=${_UCC_COMMIT} - CONDA_CMAKE=yes - TRITON=yes - ;; pytorch-linux-focal-cuda12.4-cudnn9-py3-gcc9-inductor-benchmarks) CUDA_VERSION=12.4.1 CUDNN_VERSION=9 @@ -134,36 +120,6 @@ case "$image" in TRITON=yes INDUCTOR_BENCHMARKS=yes ;; - pytorch-linux-focal-cuda12.1-cudnn9-py3-gcc9-inductor-benchmarks) - CUDA_VERSION=12.1.1 - CUDNN_VERSION=9 - ANACONDA_PYTHON_VERSION=3.10 - GCC_VERSION=9 - PROTOBUF=yes - DB=yes - VISION=yes - KATEX=yes - UCX_COMMIT=${_UCX_COMMIT} - UCC_COMMIT=${_UCC_COMMIT} - CONDA_CMAKE=yes - TRITON=yes - INDUCTOR_BENCHMARKS=yes - ;; - pytorch-linux-focal-cuda12.1-cudnn9-py3.12-gcc9-inductor-benchmarks) - CUDA_VERSION=12.1.1 - CUDNN_VERSION=9 - ANACONDA_PYTHON_VERSION=3.12 - GCC_VERSION=9 - PROTOBUF=yes - DB=yes - VISION=yes - KATEX=yes - UCX_COMMIT=${_UCX_COMMIT} - UCC_COMMIT=${_UCC_COMMIT} - CONDA_CMAKE=yes - TRITON=yes - INDUCTOR_BENCHMARKS=yes - ;; pytorch-linux-focal-cuda12.4-cudnn9-py3.12-gcc9-inductor-benchmarks) CUDA_VERSION=12.4.1 CUDNN_VERSION=9 @@ -208,20 +164,6 @@ case "$image" in CONDA_CMAKE=yes TRITON=yes ;; - pytorch-linux-focal-cuda12.1-cudnn9-py3-gcc9) - CUDA_VERSION=12.1.1 - CUDNN_VERSION=9 - ANACONDA_PYTHON_VERSION=3.10 - GCC_VERSION=9 - PROTOBUF=yes - DB=yes - VISION=yes - KATEX=yes - UCX_COMMIT=${_UCX_COMMIT} - UCC_COMMIT=${_UCC_COMMIT} - CONDA_CMAKE=yes - TRITON=yes - ;; pytorch-linux-focal-py3-clang10-onnx) ANACONDA_PYTHON_VERSION=3.9 CLANG_VERSION=10 diff --git a/.ci/docker/common/install_cuda.sh b/.ci/docker/common/install_cuda.sh index 08351b5303b..563d6a934d0 100644 --- a/.ci/docker/common/install_cuda.sh +++ b/.ci/docker/common/install_cuda.sh @@ -16,17 +16,6 @@ function install_cusparselt_040 { rm -rf tmp_cusparselt } -function install_cusparselt_052 { - # cuSparseLt license: https://docs.nvidia.com/cuda/cusparselt/license.html - mkdir tmp_cusparselt && pushd tmp_cusparselt - wget -q https://developer.download.nvidia.com/compute/cusparselt/redist/libcusparse_lt/linux-x86_64/libcusparse_lt-linux-x86_64-0.5.2.1-archive.tar.xz - tar xf libcusparse_lt-linux-x86_64-0.5.2.1-archive.tar.xz - cp -a libcusparse_lt-linux-x86_64-0.5.2.1-archive/include/* /usr/local/cuda/include/ - cp -a libcusparse_lt-linux-x86_64-0.5.2.1-archive/lib/* /usr/local/cuda/lib64/ - popd - rm -rf tmp_cusparselt -} - function install_cusparselt_062 { # cuSparseLt license: https://docs.nvidia.com/cuda/cusparselt/license.html mkdir tmp_cusparselt && pushd tmp_cusparselt @@ -83,39 +72,6 @@ function install_118 { ldconfig } -function install_121 { - echo "Installing CUDA 12.1 and cuDNN ${CUDNN_VERSION} and NCCL ${NCCL_VERSION} and cuSparseLt-0.5.2" - rm -rf /usr/local/cuda-12.1 /usr/local/cuda - # install CUDA 12.1.0 in the same container - wget -q https://developer.download.nvidia.com/compute/cuda/12.1.1/local_installers/cuda_12.1.1_530.30.02_linux.run - chmod +x cuda_12.1.1_530.30.02_linux.run - ./cuda_12.1.1_530.30.02_linux.run --toolkit --silent - rm -f cuda_12.1.1_530.30.02_linux.run - rm -f /usr/local/cuda && ln -s /usr/local/cuda-12.1 /usr/local/cuda - - # cuDNN license: https://developer.nvidia.com/cudnn/license_agreement - mkdir tmp_cudnn && cd tmp_cudnn - wget -q https://developer.download.nvidia.com/compute/cudnn/redist/cudnn/linux-x86_64/cudnn-linux-x86_64-${CUDNN_VERSION}_cuda12-archive.tar.xz -O cudnn-linux-x86_64-${CUDNN_VERSION}_cuda12-archive.tar.xz - tar xf cudnn-linux-x86_64-${CUDNN_VERSION}_cuda12-archive.tar.xz - cp -a cudnn-linux-x86_64-${CUDNN_VERSION}_cuda12-archive/include/* /usr/local/cuda/include/ - cp -a cudnn-linux-x86_64-${CUDNN_VERSION}_cuda12-archive/lib/* /usr/local/cuda/lib64/ - cd .. - rm -rf tmp_cudnn - - # NCCL license: https://docs.nvidia.com/deeplearning/nccl/#licenses - # Follow build: https://github.com/NVIDIA/nccl/tree/master?tab=readme-ov-file#build - git clone -b $NCCL_VERSION --depth 1 https://github.com/NVIDIA/nccl.git - cd nccl && make -j src.build - cp -a build/include/* /usr/local/cuda/include/ - cp -a build/lib/* /usr/local/cuda/lib64/ - cd .. - rm -rf nccl - - install_cusparselt_052 - - ldconfig -} - function install_124 { CUDNN_VERSION=9.1.0.70 echo "Installing CUDA 12.4.1 and cuDNN ${CUDNN_VERSION} and NCCL ${NCCL_VERSION} and cuSparseLt-0.6.2" @@ -214,37 +170,6 @@ function prune_118 { rm -rf $CUDA_BASE/libnvvp $CUDA_BASE/nsightee_plugins $CUDA_BASE/nsight-compute-2022.3.0 $CUDA_BASE/nsight-systems-2022.4.2/ } -function prune_121 { - echo "Pruning CUDA 12.1" - ##################################################################################### - # CUDA 12.1 prune static libs - ##################################################################################### - export NVPRUNE="/usr/local/cuda-12.1/bin/nvprune" - export CUDA_LIB_DIR="/usr/local/cuda-12.1/lib64" - - export GENCODE="-gencode arch=compute_50,code=sm_50 -gencode arch=compute_60,code=sm_60 -gencode arch=compute_70,code=sm_70 -gencode arch=compute_75,code=sm_75 -gencode arch=compute_80,code=sm_80 -gencode arch=compute_86,code=sm_86 -gencode arch=compute_90,code=sm_90" - export GENCODE_CUDNN="-gencode arch=compute_50,code=sm_50 -gencode arch=compute_60,code=sm_60 -gencode arch=compute_61,code=sm_61 -gencode arch=compute_70,code=sm_70 -gencode arch=compute_75,code=sm_75 -gencode arch=compute_80,code=sm_80 -gencode arch=compute_86,code=sm_86 -gencode arch=compute_90,code=sm_90" - - if [[ -n "$OVERRIDE_GENCODE" ]]; then - export GENCODE=$OVERRIDE_GENCODE - fi - - # all CUDA libs except CuDNN and CuBLAS - ls $CUDA_LIB_DIR/ | grep "\.a" | grep -v "culibos" | grep -v "cudart" | grep -v "cudnn" | grep -v "cublas" | grep -v "metis" \ - | xargs -I {} bash -c \ - "echo {} && $NVPRUNE $GENCODE $CUDA_LIB_DIR/{} -o $CUDA_LIB_DIR/{}" - - # prune CuDNN and CuBLAS - $NVPRUNE $GENCODE_CUDNN $CUDA_LIB_DIR/libcublas_static.a -o $CUDA_LIB_DIR/libcublas_static.a - $NVPRUNE $GENCODE_CUDNN $CUDA_LIB_DIR/libcublasLt_static.a -o $CUDA_LIB_DIR/libcublasLt_static.a - - ##################################################################################### - # CUDA 12.1 prune visual tools - ##################################################################################### - export CUDA_BASE="/usr/local/cuda-12.1/" - rm -rf $CUDA_BASE/libnvvp $CUDA_BASE/nsightee_plugins $CUDA_BASE/nsight-compute-2023.1.0 $CUDA_BASE/nsight-systems-2023.1.2/ -} - function prune_124 { echo "Pruning CUDA 12.4" ##################################################################################### diff --git a/.ci/docker/common/install_cuda_aarch64.sh b/.ci/docker/common/install_cuda_aarch64.sh index caede23e6ca..4a7a5c33bf5 100644 --- a/.ci/docker/common/install_cuda_aarch64.sh +++ b/.ci/docker/common/install_cuda_aarch64.sh @@ -57,7 +57,7 @@ function install_124 { cd .. rm -rf nccl - install_cusparselt_062 + install_cusparselt_063 ldconfig } diff --git a/.ci/docker/common/install_cusparselt.sh b/.ci/docker/common/install_cusparselt.sh index c4b3f3e02a7..0603739fb04 100644 --- a/.ci/docker/common/install_cusparselt.sh +++ b/.ci/docker/common/install_cusparselt.sh @@ -5,7 +5,15 @@ set -ex # cuSPARSELt license: https://docs.nvidia.com/cuda/cusparselt/license.html mkdir tmp_cusparselt && cd tmp_cusparselt -if [[ ${CUDA_VERSION:0:4} =~ ^12\.[2-6]$ ]]; then +if [[ ${CUDA_VERSION:0:4} =~ ^12\.[5-8]$ ]]; then + arch_path='sbsa' + export TARGETARCH=${TARGETARCH:-$(uname -m)} + if [ ${TARGETARCH} = 'amd64' ] || [ "${TARGETARCH}" = 'x86_64' ]; then + arch_path='x86_64' + fi + CUSPARSELT_NAME="libcusparse_lt-linux-${arch_path}-0.6.3.2-archive" + curl --retry 3 -OLs https://developer.download.nvidia.com/compute/cusparselt/redist/libcusparse_lt/linux-${arch_path}/${CUSPARSELT_NAME}.tar.xz +elif [[ ${CUDA_VERSION:0:4} == "12.4" ]]; then arch_path='sbsa' export TARGETARCH=${TARGETARCH:-$(uname -m)} if [ ${TARGETARCH} = 'amd64' ] || [ "${TARGETARCH}" = 'x86_64' ]; then @@ -13,17 +21,11 @@ if [[ ${CUDA_VERSION:0:4} =~ ^12\.[2-6]$ ]]; then fi CUSPARSELT_NAME="libcusparse_lt-linux-${arch_path}-0.6.2.3-archive" curl --retry 3 -OLs https://developer.download.nvidia.com/compute/cusparselt/redist/libcusparse_lt/linux-${arch_path}/${CUSPARSELT_NAME}.tar.xz -elif [[ ${CUDA_VERSION:0:4} == "12.1" ]]; then - arch_path='sbsa' - export TARGETARCH=${TARGETARCH:-$(uname -m)} - if [ ${TARGETARCH} = 'amd64' ] || [ "${TARGETARCH}" = 'x86_64' ]; then - arch_path='x86_64' - fi - CUSPARSELT_NAME="libcusparse_lt-linux-${arch_path}-0.5.2.1-archive" - curl --retry 3 -OLs https://developer.download.nvidia.com/compute/cusparselt/redist/libcusparse_lt/linux-${arch_path}/${CUSPARSELT_NAME}.tar.xz elif [[ ${CUDA_VERSION:0:4} == "11.8" ]]; then CUSPARSELT_NAME="libcusparse_lt-linux-x86_64-0.4.0.7-archive" curl --retry 3 -OLs https://developer.download.nvidia.com/compute/cusparselt/redist/libcusparse_lt/linux-x86_64/${CUSPARSELT_NAME}.tar.xz +else + echo "Not sure which libcusparselt version to install for this ${CUDA_VERSION}" fi tar xf ${CUSPARSELT_NAME}.tar.xz diff --git a/.github/scripts/test_filter_test_configs.py b/.github/scripts/test_filter_test_configs.py index 2bc30fdc1e2..378f7223760 100755 --- a/.github/scripts/test_filter_test_configs.py +++ b/.github/scripts/test_filter_test_configs.py @@ -102,30 +102,6 @@ MOCKED_DISABLED_UNSTABLE_JOBS = { "manywheel-py3_8-cuda11_8-build", "", ], - "inductor / cuda12.1-py3.10-gcc9-sm86 / test (inductor)": [ - "pytorchbot", - "107079", - "https://github.com/pytorch/pytorch/issues/107079", - "inductor", - "cuda12.1-py3.10-gcc9-sm86", - "test (inductor)", - ], - "inductor / cuda12.1-py3.10-gcc9-sm86 / test (inductor_huggingface)": [ - "pytorchbot", - "109153", - "https://github.com/pytorch/pytorch/issues/109153", - "inductor", - "cuda12.1-py3.10-gcc9-sm86", - "test (inductor_huggingface)", - ], - "inductor / cuda12.1-py3.10-gcc9-sm86 / test (inductor_huggingface_dynamic)": [ - "pytorchbot", - "109154", - "https://github.com/pytorch/pytorch/issues/109154", - "inductor", - "cuda12.1-py3.10-gcc9-sm86", - "test (inductor_huggingface_dynamic)", - ], } MOCKED_PR_INFO = { @@ -637,37 +613,6 @@ class TestConfigFilter(TestCase): "expected": '{"include": [{"config": "default", "unstable": "unstable"}]}', "description": "Both binary build and test jobs are unstable", }, - { - "workflow": "inductor", - "job_name": "cuda12.1-py3.10-gcc9-sm86 / build", - "test_matrix": """ - { include: [ - { config: "inductor" }, - { config: "inductor_huggingface", shard: 1 }, - { config: "inductor_huggingface", shard: 2 }, - { config: "inductor_timm", shard: 1 }, - { config: "inductor_timm", shard: 2 }, - { config: "inductor_torchbench" }, - { config: "inductor_huggingface_dynamic" }, - { config: "inductor_torchbench_dynamic" }, - { config: "inductor_distributed" }, - ]} - """, - "expected": """ - { "include": [ - { "config": "inductor", "unstable": "unstable" }, - { "config": "inductor_huggingface", "shard": 1, "unstable": "unstable" }, - { "config": "inductor_huggingface", "shard": 2, "unstable": "unstable" }, - { "config": "inductor_timm", "shard": 1 }, - { "config": "inductor_timm", "shard": 2 }, - { "config": "inductor_torchbench" }, - { "config": "inductor_huggingface_dynamic", "unstable": "unstable" }, - { "config": "inductor_torchbench_dynamic" }, - { "config": "inductor_distributed" } - ]} - """, - "description": "Marking multiple unstable configurations", - }, ] for case in testcases: diff --git a/.github/scripts/test_trymerge.py b/.github/scripts/test_trymerge.py index af41345088d..1a152dc9594 100755 --- a/.github/scripts/test_trymerge.py +++ b/.github/scripts/test_trymerge.py @@ -535,8 +535,8 @@ class TestTryMerge(TestCase): def test_remove_job_name_suffix(self, *args: Any) -> None: test_cases = [ { - "name": "linux-bionic-cuda12.1-py3.10-gcc9-sm86 / test (default, 1, 5, linux.g5.4xlarge.nvidia.gpu)", - "expected": "linux-bionic-cuda12.1-py3.10-gcc9-sm86 / test (default)", + "name": "linux-bionic-cuda12.6-py3.10-gcc9-sm86 / test (default, 1, 5, linux.g5.4xlarge.nvidia.gpu)", + "expected": "linux-bionic-cuda12.6-py3.10-gcc9-sm86 / test (default)", }, { "name": "android-emulator-build-test / build-and-test (default, 1, 1, ubuntu-20.04-16x)", diff --git a/.github/workflows/docker-builds.yml b/.github/workflows/docker-builds.yml index 57897b8524d..b6125c0bd2a 100644 --- a/.github/workflows/docker-builds.yml +++ b/.github/workflows/docker-builds.yml @@ -52,9 +52,6 @@ jobs: pytorch-linux-focal-cuda12.4-cudnn9-py3-gcc9, pytorch-linux-focal-cuda12.4-cudnn9-py3-gcc9-inductor-benchmarks, pytorch-linux-focal-cuda12.4-cudnn9-py3.12-gcc9-inductor-benchmarks, - pytorch-linux-focal-cuda12.1-cudnn9-py3-gcc9, - pytorch-linux-focal-cuda12.1-cudnn9-py3-gcc9-inductor-benchmarks, - pytorch-linux-focal-cuda12.1-cudnn9-py3.12-gcc9-inductor-benchmarks, pytorch-linux-focal-cuda12.4-cudnn9-py3.13-gcc9-inductor-benchmarks, pytorch-linux-focal-cuda11.8-cudnn9-py3-gcc9, pytorch-linux-focal-py3.9-clang10, diff --git a/.github/workflows/target-determination-indexer.yml b/.github/workflows/target-determination-indexer.yml index a6fd1da117c..7bc2d9d7ba9 100644 --- a/.github/workflows/target-determination-indexer.yml +++ b/.github/workflows/target-determination-indexer.yml @@ -37,7 +37,7 @@ jobs: id: calculate-docker-image uses: pytorch/test-infra/.github/actions/calculate-docker-image@main with: - docker-image-name: pytorch-linux-focal-cuda12.1-cudnn9-py3-gcc9 + docker-image-name: pytorch-linux-focal-cuda12.4-cudnn9-py3-gcc9 working-directory: pytorch - name: Use following to pull public copy of the image diff --git a/.github/workflows/torchbench.yml b/.github/workflows/torchbench.yml index 378f71dad96..42e8b64e27c 100644 --- a/.github/workflows/torchbench.yml +++ b/.github/workflows/torchbench.yml @@ -21,15 +21,15 @@ jobs: curr_branch: ${{ github.head_ref || github.ref_name }} curr_ref_type: ${{ github.ref_type }} - linux-focal-cuda12_1-py3_10-gcc9-torchbench-build-gcp: - name: cuda12.1-py3.10-gcc9-sm80 + linux-focal-cuda12_4-py3_10-gcc9-torchbench-build-gcp: + name: cuda12.4-py3.10-gcc9-sm80 uses: ./.github/workflows/_linux-build.yml needs: - get-default-label-prefix with: runner_prefix: "${{ needs.get-default-label-prefix.outputs.label-type }}" - build-environment: linux-focal-cuda12.1-py3.10-gcc9-sm80 - docker-image-name: pytorch-linux-focal-cuda12.1-cudnn9-py3-gcc9-inductor-benchmarks + build-environment: linux-focal-cuda12.4-py3.10-gcc9-sm80 + docker-image-name: pytorch-linux-focal-cuda12.4-cudnn9-py3-gcc9-inductor-benchmarks cuda-arch-list: '8.0' test-matrix: | { include: [ @@ -37,12 +37,12 @@ jobs: ]} secrets: inherit - linux-focal-cuda12_1-py3_10-gcc9-torchbench-test-gcp: - name: cuda12.1-py3.10-gcc9-sm80 + linux-focal-cuda12_4-py3_10-gcc9-torchbench-test-gcp: + name: cuda12.4-py3.10-gcc9-sm80 uses: ./.github/workflows/_linux-test.yml - needs: linux-focal-cuda12_1-py3_10-gcc9-torchbench-build-gcp + needs: linux-focal-cuda12_4-py3_10-gcc9-torchbench-build-gcp with: - build-environment: linux-focal-cuda12.1-py3.10-gcc9-sm80 - docker-image: ${{ needs.linux-focal-cuda12_1-py3_10-gcc9-torchbench-build-gcp.outputs.docker-image }} - test-matrix: ${{ needs.linux-focal-cuda12_1-py3_10-gcc9-torchbench-build-gcp.outputs.test-matrix }} + build-environment: linux-focal-cuda12.4-py3.10-gcc9-sm80 + docker-image: ${{ needs.linux-focal-cuda12_4-py3_10-gcc9-torchbench-build-gcp.outputs.docker-image }} + test-matrix: ${{ needs.linux-focal-cuda12_4-py3_10-gcc9-torchbench-build-gcp.outputs.test-matrix }} secrets: inherit diff --git a/.github/workflows/trunk.yml b/.github/workflows/trunk.yml index 70c7c2dd63e..6e6029da823 100644 --- a/.github/workflows/trunk.yml +++ b/.github/workflows/trunk.yml @@ -237,12 +237,12 @@ jobs: secrets: inherit # NB: Keep this in sync with inductor-perf-test-nightly.yml - linux-focal-cuda12_1-py3_10-gcc9-inductor-build: - name: cuda12.1-py3.10-gcc9-sm80 + linux-focal-cuda12_4-py3_10-gcc9-inductor-build: + name: cuda12.4-py3.10-gcc9-sm80 uses: ./.github/workflows/_linux-build.yml needs: get-label-type with: - build-environment: linux-focal-cuda12.1-py3.10-gcc9-sm80 - docker-image-name: pytorch-linux-focal-cuda12.1-cudnn9-py3-gcc9-inductor-benchmarks + build-environment: linux-focal-cuda12.4-py3.10-gcc9-sm80 + docker-image-name: pytorch-linux-focal-cuda12.4-cudnn9-py3-gcc9-inductor-benchmarks cuda-arch-list: '8.0' secrets: inherit diff --git a/test/test_sparse_semi_structured.py b/test/test_sparse_semi_structured.py index 532a55f0bfd..687e92b5df9 100644 --- a/test/test_sparse_semi_structured.py +++ b/test/test_sparse_semi_structured.py @@ -1209,12 +1209,12 @@ class TestSparseSemiStructuredCUSPARSELT(TestCase): # CUDA 11.8 has cuSPARSELt v0.4.0 support if version == (11, 8): assert torch.backends.cusparselt.version() == 400 - # CUDA 12.1 has cuSPARSELt v0.5.2 support - elif version == (12, 1): - assert torch.backends.cusparselt.version() == 502 - # CUDA 12.4+ has cuSPARSELt v0.6.2 support + # PyTorch CUDA 12.4 using cuSPARSELt v0.6.2 elif version >= (12, 4): assert torch.backends.cusparselt.version() == 602 + # PyTorch CUDA 12.6+ using cuSPARSELt v0.6.3 + elif version >= (12, 6): + assert torch.backends.cusparselt.version() == 603 else: assert torch.backends.cusparselt.version() is None