From 6bcb545d9c31154c28084c96d55e097acf8e9097 Mon Sep 17 00:00:00 2001
From: Wei Wang <weiwan@nvidia.com>
Date: Tue, 28 Jan 2025 21:01:55 +0000
Subject: [PATCH] [CI][CUDA][cuSPARSELt] cusparselt 0.6.3 and cu121 related
 cleanups (#145793)

Make ci cusparselt installation be consistent with nightly binary
Remove cu121 related docker build jobs and inductor runs Update test failures relating to cu121

Retry of https://github.com/pytorch/pytorch/pull/145696
Pull Request resolved: https://github.com/pytorch/pytorch/pull/145793
Approved by: https://github.com/eqy, https://github.com/tinglvv
---
 .ci/docker/build.sh                           | 58 --------------
 .ci/docker/common/install_cuda.sh             | 75 -------------------
 .ci/docker/common/install_cuda_aarch64.sh     |  2 +-
 .ci/docker/common/install_cusparselt.sh       | 20 ++---
 .github/scripts/test_filter_test_configs.py   | 55 --------------
 .github/scripts/test_trymerge.py              |  4 +-
 .github/workflows/docker-builds.yml           |  3 -
 .../target-determination-indexer.yml          |  2 +-
 .github/workflows/torchbench.yml              | 20 ++---
 .github/workflows/trunk.yml                   |  8 +-
 test/test_sparse_semi_structured.py           |  8 +-
 11 files changed, 33 insertions(+), 222 deletions(-)

diff --git a/.ci/docker/build.sh b/.ci/docker/build.sh
index 57b4681c22c..f857147c364 100755
--- a/.ci/docker/build.sh
+++ b/.ci/docker/build.sh
@@ -105,20 +105,6 @@ case "$image" in
     CONDA_CMAKE=yes
     TRITON=yes
     ;;
-  pytorch-linux-focal-cuda12.1-cudnn9-py3-gcc9)
-    CUDA_VERSION=12.1.1
-    CUDNN_VERSION=9
-    ANACONDA_PYTHON_VERSION=3.10
-    GCC_VERSION=9
-    PROTOBUF=yes
-    DB=yes
-    VISION=yes
-    KATEX=yes
-    UCX_COMMIT=${_UCX_COMMIT}
-    UCC_COMMIT=${_UCC_COMMIT}
-    CONDA_CMAKE=yes
-    TRITON=yes
-    ;;
   pytorch-linux-focal-cuda12.4-cudnn9-py3-gcc9-inductor-benchmarks)
     CUDA_VERSION=12.4.1
     CUDNN_VERSION=9
@@ -134,36 +120,6 @@ case "$image" in
     TRITON=yes
     INDUCTOR_BENCHMARKS=yes
     ;;
-  pytorch-linux-focal-cuda12.1-cudnn9-py3-gcc9-inductor-benchmarks)
-    CUDA_VERSION=12.1.1
-    CUDNN_VERSION=9
-    ANACONDA_PYTHON_VERSION=3.10
-    GCC_VERSION=9
-    PROTOBUF=yes
-    DB=yes
-    VISION=yes
-    KATEX=yes
-    UCX_COMMIT=${_UCX_COMMIT}
-    UCC_COMMIT=${_UCC_COMMIT}
-    CONDA_CMAKE=yes
-    TRITON=yes
-    INDUCTOR_BENCHMARKS=yes
-    ;;
-  pytorch-linux-focal-cuda12.1-cudnn9-py3.12-gcc9-inductor-benchmarks)
-    CUDA_VERSION=12.1.1
-    CUDNN_VERSION=9
-    ANACONDA_PYTHON_VERSION=3.12
-    GCC_VERSION=9
-    PROTOBUF=yes
-    DB=yes
-    VISION=yes
-    KATEX=yes
-    UCX_COMMIT=${_UCX_COMMIT}
-    UCC_COMMIT=${_UCC_COMMIT}
-    CONDA_CMAKE=yes
-    TRITON=yes
-    INDUCTOR_BENCHMARKS=yes
-    ;;
   pytorch-linux-focal-cuda12.4-cudnn9-py3.12-gcc9-inductor-benchmarks)
     CUDA_VERSION=12.4.1
     CUDNN_VERSION=9
@@ -208,20 +164,6 @@ case "$image" in
     CONDA_CMAKE=yes
     TRITON=yes
     ;;
-  pytorch-linux-focal-cuda12.1-cudnn9-py3-gcc9)
-    CUDA_VERSION=12.1.1
-    CUDNN_VERSION=9
-    ANACONDA_PYTHON_VERSION=3.10
-    GCC_VERSION=9
-    PROTOBUF=yes
-    DB=yes
-    VISION=yes
-    KATEX=yes
-    UCX_COMMIT=${_UCX_COMMIT}
-    UCC_COMMIT=${_UCC_COMMIT}
-    CONDA_CMAKE=yes
-    TRITON=yes
-    ;;
   pytorch-linux-focal-py3-clang10-onnx)
     ANACONDA_PYTHON_VERSION=3.9
     CLANG_VERSION=10
diff --git a/.ci/docker/common/install_cuda.sh b/.ci/docker/common/install_cuda.sh
index 08351b5303b..563d6a934d0 100644
--- a/.ci/docker/common/install_cuda.sh
+++ b/.ci/docker/common/install_cuda.sh
@@ -16,17 +16,6 @@ function install_cusparselt_040 {
     rm -rf tmp_cusparselt
 }
 
-function install_cusparselt_052 {
-    # cuSparseLt license: https://docs.nvidia.com/cuda/cusparselt/license.html
-    mkdir tmp_cusparselt && pushd tmp_cusparselt
-    wget -q https://developer.download.nvidia.com/compute/cusparselt/redist/libcusparse_lt/linux-x86_64/libcusparse_lt-linux-x86_64-0.5.2.1-archive.tar.xz
-    tar xf libcusparse_lt-linux-x86_64-0.5.2.1-archive.tar.xz
-    cp -a libcusparse_lt-linux-x86_64-0.5.2.1-archive/include/* /usr/local/cuda/include/
-    cp -a libcusparse_lt-linux-x86_64-0.5.2.1-archive/lib/* /usr/local/cuda/lib64/
-    popd
-    rm -rf tmp_cusparselt
-}
-
 function install_cusparselt_062 {
     # cuSparseLt license: https://docs.nvidia.com/cuda/cusparselt/license.html
     mkdir tmp_cusparselt && pushd tmp_cusparselt
@@ -83,39 +72,6 @@ function install_118 {
     ldconfig
 }
 
-function install_121 {
-    echo "Installing CUDA 12.1 and cuDNN ${CUDNN_VERSION} and NCCL ${NCCL_VERSION} and cuSparseLt-0.5.2"
-    rm -rf /usr/local/cuda-12.1 /usr/local/cuda
-    # install CUDA 12.1.0 in the same container
-    wget -q https://developer.download.nvidia.com/compute/cuda/12.1.1/local_installers/cuda_12.1.1_530.30.02_linux.run
-    chmod +x cuda_12.1.1_530.30.02_linux.run
-    ./cuda_12.1.1_530.30.02_linux.run --toolkit --silent
-    rm -f cuda_12.1.1_530.30.02_linux.run
-    rm -f /usr/local/cuda && ln -s /usr/local/cuda-12.1 /usr/local/cuda
-
-    # cuDNN license: https://developer.nvidia.com/cudnn/license_agreement
-    mkdir tmp_cudnn && cd tmp_cudnn
-    wget -q https://developer.download.nvidia.com/compute/cudnn/redist/cudnn/linux-x86_64/cudnn-linux-x86_64-${CUDNN_VERSION}_cuda12-archive.tar.xz -O cudnn-linux-x86_64-${CUDNN_VERSION}_cuda12-archive.tar.xz
-    tar xf cudnn-linux-x86_64-${CUDNN_VERSION}_cuda12-archive.tar.xz
-    cp -a cudnn-linux-x86_64-${CUDNN_VERSION}_cuda12-archive/include/* /usr/local/cuda/include/
-    cp -a cudnn-linux-x86_64-${CUDNN_VERSION}_cuda12-archive/lib/* /usr/local/cuda/lib64/
-    cd ..
-    rm -rf tmp_cudnn
-
-    # NCCL license: https://docs.nvidia.com/deeplearning/nccl/#licenses
-    # Follow build: https://github.com/NVIDIA/nccl/tree/master?tab=readme-ov-file#build
-    git clone -b $NCCL_VERSION --depth 1 https://github.com/NVIDIA/nccl.git
-    cd nccl && make -j src.build
-    cp -a build/include/* /usr/local/cuda/include/
-    cp -a build/lib/* /usr/local/cuda/lib64/
-    cd ..
-    rm -rf nccl
-
-    install_cusparselt_052
-
-    ldconfig
-}
-
 function install_124 {
   CUDNN_VERSION=9.1.0.70
   echo "Installing CUDA 12.4.1 and cuDNN ${CUDNN_VERSION} and NCCL ${NCCL_VERSION} and cuSparseLt-0.6.2"
@@ -214,37 +170,6 @@ function prune_118 {
     rm -rf $CUDA_BASE/libnvvp $CUDA_BASE/nsightee_plugins $CUDA_BASE/nsight-compute-2022.3.0 $CUDA_BASE/nsight-systems-2022.4.2/
 }
 
-function prune_121 {
-  echo "Pruning CUDA 12.1"
-  #####################################################################################
-  # CUDA 12.1 prune static libs
-  #####################################################################################
-    export NVPRUNE="/usr/local/cuda-12.1/bin/nvprune"
-    export CUDA_LIB_DIR="/usr/local/cuda-12.1/lib64"
-
-    export GENCODE="-gencode arch=compute_50,code=sm_50 -gencode arch=compute_60,code=sm_60 -gencode arch=compute_70,code=sm_70 -gencode arch=compute_75,code=sm_75 -gencode arch=compute_80,code=sm_80 -gencode arch=compute_86,code=sm_86 -gencode arch=compute_90,code=sm_90"
-    export GENCODE_CUDNN="-gencode arch=compute_50,code=sm_50 -gencode arch=compute_60,code=sm_60 -gencode arch=compute_61,code=sm_61 -gencode arch=compute_70,code=sm_70 -gencode arch=compute_75,code=sm_75 -gencode arch=compute_80,code=sm_80 -gencode arch=compute_86,code=sm_86 -gencode arch=compute_90,code=sm_90"
-
-    if [[ -n "$OVERRIDE_GENCODE" ]]; then
-        export GENCODE=$OVERRIDE_GENCODE
-    fi
-
-    # all CUDA libs except CuDNN and CuBLAS
-    ls $CUDA_LIB_DIR/ | grep "\.a" | grep -v "culibos" | grep -v "cudart" | grep -v "cudnn" | grep -v "cublas" | grep -v "metis"  \
-      | xargs -I {} bash -c \
-                "echo {} && $NVPRUNE $GENCODE $CUDA_LIB_DIR/{} -o $CUDA_LIB_DIR/{}"
-
-    # prune CuDNN and CuBLAS
-    $NVPRUNE $GENCODE_CUDNN $CUDA_LIB_DIR/libcublas_static.a -o $CUDA_LIB_DIR/libcublas_static.a
-    $NVPRUNE $GENCODE_CUDNN $CUDA_LIB_DIR/libcublasLt_static.a -o $CUDA_LIB_DIR/libcublasLt_static.a
-
-    #####################################################################################
-    # CUDA 12.1 prune visual tools
-    #####################################################################################
-    export CUDA_BASE="/usr/local/cuda-12.1/"
-    rm -rf $CUDA_BASE/libnvvp $CUDA_BASE/nsightee_plugins $CUDA_BASE/nsight-compute-2023.1.0 $CUDA_BASE/nsight-systems-2023.1.2/
-}
-
 function prune_124 {
   echo "Pruning CUDA 12.4"
   #####################################################################################
diff --git a/.ci/docker/common/install_cuda_aarch64.sh b/.ci/docker/common/install_cuda_aarch64.sh
index caede23e6ca..4a7a5c33bf5 100644
--- a/.ci/docker/common/install_cuda_aarch64.sh
+++ b/.ci/docker/common/install_cuda_aarch64.sh
@@ -57,7 +57,7 @@ function install_124 {
   cd ..
   rm -rf nccl
 
-  install_cusparselt_062
+  install_cusparselt_063
 
   ldconfig
 }
diff --git a/.ci/docker/common/install_cusparselt.sh b/.ci/docker/common/install_cusparselt.sh
index c4b3f3e02a7..0603739fb04 100644
--- a/.ci/docker/common/install_cusparselt.sh
+++ b/.ci/docker/common/install_cusparselt.sh
@@ -5,7 +5,15 @@ set -ex
 # cuSPARSELt license: https://docs.nvidia.com/cuda/cusparselt/license.html
 mkdir tmp_cusparselt && cd tmp_cusparselt
 
-if [[ ${CUDA_VERSION:0:4} =~ ^12\.[2-6]$ ]]; then
+if [[ ${CUDA_VERSION:0:4} =~ ^12\.[5-8]$ ]]; then
+    arch_path='sbsa'
+    export TARGETARCH=${TARGETARCH:-$(uname -m)}
+    if [ ${TARGETARCH} = 'amd64' ] || [ "${TARGETARCH}" = 'x86_64' ]; then
+        arch_path='x86_64'
+    fi
+    CUSPARSELT_NAME="libcusparse_lt-linux-${arch_path}-0.6.3.2-archive"
+    curl --retry 3 -OLs https://developer.download.nvidia.com/compute/cusparselt/redist/libcusparse_lt/linux-${arch_path}/${CUSPARSELT_NAME}.tar.xz
+elif [[ ${CUDA_VERSION:0:4} == "12.4" ]]; then
     arch_path='sbsa'
     export TARGETARCH=${TARGETARCH:-$(uname -m)}
     if [ ${TARGETARCH} = 'amd64' ] || [ "${TARGETARCH}" = 'x86_64' ]; then
@@ -13,17 +21,11 @@ if [[ ${CUDA_VERSION:0:4} =~ ^12\.[2-6]$ ]]; then
     fi
     CUSPARSELT_NAME="libcusparse_lt-linux-${arch_path}-0.6.2.3-archive"
     curl --retry 3 -OLs https://developer.download.nvidia.com/compute/cusparselt/redist/libcusparse_lt/linux-${arch_path}/${CUSPARSELT_NAME}.tar.xz
-elif [[ ${CUDA_VERSION:0:4} == "12.1" ]]; then
-    arch_path='sbsa'
-    export TARGETARCH=${TARGETARCH:-$(uname -m)}
-    if [ ${TARGETARCH} = 'amd64' ] || [ "${TARGETARCH}" = 'x86_64' ]; then
-        arch_path='x86_64'
-    fi
-    CUSPARSELT_NAME="libcusparse_lt-linux-${arch_path}-0.5.2.1-archive"
-    curl --retry 3 -OLs https://developer.download.nvidia.com/compute/cusparselt/redist/libcusparse_lt/linux-${arch_path}/${CUSPARSELT_NAME}.tar.xz
 elif [[ ${CUDA_VERSION:0:4} == "11.8" ]]; then
     CUSPARSELT_NAME="libcusparse_lt-linux-x86_64-0.4.0.7-archive"
     curl --retry 3 -OLs https://developer.download.nvidia.com/compute/cusparselt/redist/libcusparse_lt/linux-x86_64/${CUSPARSELT_NAME}.tar.xz
+else
+    echo "Not sure which libcusparselt version to install for this ${CUDA_VERSION}"
 fi
 
 tar xf ${CUSPARSELT_NAME}.tar.xz
diff --git a/.github/scripts/test_filter_test_configs.py b/.github/scripts/test_filter_test_configs.py
index 2bc30fdc1e2..378f7223760 100755
--- a/.github/scripts/test_filter_test_configs.py
+++ b/.github/scripts/test_filter_test_configs.py
@@ -102,30 +102,6 @@ MOCKED_DISABLED_UNSTABLE_JOBS = {
         "manywheel-py3_8-cuda11_8-build",
         "",
     ],
-    "inductor / cuda12.1-py3.10-gcc9-sm86 / test (inductor)": [
-        "pytorchbot",
-        "107079",
-        "https://github.com/pytorch/pytorch/issues/107079",
-        "inductor",
-        "cuda12.1-py3.10-gcc9-sm86",
-        "test (inductor)",
-    ],
-    "inductor / cuda12.1-py3.10-gcc9-sm86 / test (inductor_huggingface)": [
-        "pytorchbot",
-        "109153",
-        "https://github.com/pytorch/pytorch/issues/109153",
-        "inductor",
-        "cuda12.1-py3.10-gcc9-sm86",
-        "test (inductor_huggingface)",
-    ],
-    "inductor / cuda12.1-py3.10-gcc9-sm86 / test (inductor_huggingface_dynamic)": [
-        "pytorchbot",
-        "109154",
-        "https://github.com/pytorch/pytorch/issues/109154",
-        "inductor",
-        "cuda12.1-py3.10-gcc9-sm86",
-        "test (inductor_huggingface_dynamic)",
-    ],
 }
 
 MOCKED_PR_INFO = {
@@ -637,37 +613,6 @@ class TestConfigFilter(TestCase):
                 "expected": '{"include": [{"config": "default", "unstable": "unstable"}]}',
                 "description": "Both binary build and test jobs are unstable",
             },
-            {
-                "workflow": "inductor",
-                "job_name": "cuda12.1-py3.10-gcc9-sm86 / build",
-                "test_matrix": """
-                    { include: [
-                        { config: "inductor" },
-                        { config: "inductor_huggingface", shard: 1 },
-                        { config: "inductor_huggingface", shard: 2 },
-                        { config: "inductor_timm", shard: 1 },
-                        { config: "inductor_timm", shard: 2 },
-                        { config: "inductor_torchbench" },
-                        { config: "inductor_huggingface_dynamic" },
-                        { config: "inductor_torchbench_dynamic" },
-                        { config: "inductor_distributed" },
-                    ]}
-                """,
-                "expected": """
-                    { "include": [
-                        { "config": "inductor", "unstable": "unstable" },
-                        { "config": "inductor_huggingface", "shard": 1, "unstable": "unstable" },
-                        { "config": "inductor_huggingface", "shard": 2, "unstable": "unstable" },
-                        { "config": "inductor_timm", "shard": 1 },
-                        { "config": "inductor_timm", "shard": 2 },
-                        { "config": "inductor_torchbench" },
-                        { "config": "inductor_huggingface_dynamic", "unstable": "unstable" },
-                        { "config": "inductor_torchbench_dynamic" },
-                        { "config": "inductor_distributed" }
-                    ]}
-                """,
-                "description": "Marking multiple unstable configurations",
-            },
         ]
 
         for case in testcases:
diff --git a/.github/scripts/test_trymerge.py b/.github/scripts/test_trymerge.py
index af41345088d..1a152dc9594 100755
--- a/.github/scripts/test_trymerge.py
+++ b/.github/scripts/test_trymerge.py
@@ -535,8 +535,8 @@ class TestTryMerge(TestCase):
     def test_remove_job_name_suffix(self, *args: Any) -> None:
         test_cases = [
             {
-                "name": "linux-bionic-cuda12.1-py3.10-gcc9-sm86 / test (default, 1, 5, linux.g5.4xlarge.nvidia.gpu)",
-                "expected": "linux-bionic-cuda12.1-py3.10-gcc9-sm86 / test (default)",
+                "name": "linux-bionic-cuda12.6-py3.10-gcc9-sm86 / test (default, 1, 5, linux.g5.4xlarge.nvidia.gpu)",
+                "expected": "linux-bionic-cuda12.6-py3.10-gcc9-sm86 / test (default)",
             },
             {
                 "name": "android-emulator-build-test / build-and-test (default, 1, 1, ubuntu-20.04-16x)",
diff --git a/.github/workflows/docker-builds.yml b/.github/workflows/docker-builds.yml
index 57897b8524d..b6125c0bd2a 100644
--- a/.github/workflows/docker-builds.yml
+++ b/.github/workflows/docker-builds.yml
@@ -52,9 +52,6 @@ jobs:
           pytorch-linux-focal-cuda12.4-cudnn9-py3-gcc9,
           pytorch-linux-focal-cuda12.4-cudnn9-py3-gcc9-inductor-benchmarks,
           pytorch-linux-focal-cuda12.4-cudnn9-py3.12-gcc9-inductor-benchmarks,
-          pytorch-linux-focal-cuda12.1-cudnn9-py3-gcc9,
-          pytorch-linux-focal-cuda12.1-cudnn9-py3-gcc9-inductor-benchmarks,
-          pytorch-linux-focal-cuda12.1-cudnn9-py3.12-gcc9-inductor-benchmarks,
           pytorch-linux-focal-cuda12.4-cudnn9-py3.13-gcc9-inductor-benchmarks,
           pytorch-linux-focal-cuda11.8-cudnn9-py3-gcc9,
           pytorch-linux-focal-py3.9-clang10,
diff --git a/.github/workflows/target-determination-indexer.yml b/.github/workflows/target-determination-indexer.yml
index a6fd1da117c..7bc2d9d7ba9 100644
--- a/.github/workflows/target-determination-indexer.yml
+++ b/.github/workflows/target-determination-indexer.yml
@@ -37,7 +37,7 @@ jobs:
         id: calculate-docker-image
         uses: pytorch/test-infra/.github/actions/calculate-docker-image@main
         with:
-          docker-image-name: pytorch-linux-focal-cuda12.1-cudnn9-py3-gcc9
+          docker-image-name: pytorch-linux-focal-cuda12.4-cudnn9-py3-gcc9
           working-directory: pytorch
 
       - name: Use following to pull public copy of the image
diff --git a/.github/workflows/torchbench.yml b/.github/workflows/torchbench.yml
index 378f71dad96..42e8b64e27c 100644
--- a/.github/workflows/torchbench.yml
+++ b/.github/workflows/torchbench.yml
@@ -21,15 +21,15 @@ jobs:
       curr_branch: ${{ github.head_ref || github.ref_name }}
       curr_ref_type: ${{ github.ref_type }}
 
-  linux-focal-cuda12_1-py3_10-gcc9-torchbench-build-gcp:
-    name: cuda12.1-py3.10-gcc9-sm80
+  linux-focal-cuda12_4-py3_10-gcc9-torchbench-build-gcp:
+    name: cuda12.4-py3.10-gcc9-sm80
     uses: ./.github/workflows/_linux-build.yml
     needs:
       - get-default-label-prefix
     with:
       runner_prefix: "${{ needs.get-default-label-prefix.outputs.label-type }}"
-      build-environment: linux-focal-cuda12.1-py3.10-gcc9-sm80
-      docker-image-name: pytorch-linux-focal-cuda12.1-cudnn9-py3-gcc9-inductor-benchmarks
+      build-environment: linux-focal-cuda12.4-py3.10-gcc9-sm80
+      docker-image-name: pytorch-linux-focal-cuda12.4-cudnn9-py3-gcc9-inductor-benchmarks
       cuda-arch-list: '8.0'
       test-matrix: |
         { include: [
@@ -37,12 +37,12 @@ jobs:
         ]}
     secrets: inherit
 
-  linux-focal-cuda12_1-py3_10-gcc9-torchbench-test-gcp:
-    name: cuda12.1-py3.10-gcc9-sm80
+  linux-focal-cuda12_4-py3_10-gcc9-torchbench-test-gcp:
+    name: cuda12.4-py3.10-gcc9-sm80
     uses: ./.github/workflows/_linux-test.yml
-    needs: linux-focal-cuda12_1-py3_10-gcc9-torchbench-build-gcp
+    needs: linux-focal-cuda12_4-py3_10-gcc9-torchbench-build-gcp
     with:
-      build-environment: linux-focal-cuda12.1-py3.10-gcc9-sm80
-      docker-image: ${{ needs.linux-focal-cuda12_1-py3_10-gcc9-torchbench-build-gcp.outputs.docker-image }}
-      test-matrix: ${{ needs.linux-focal-cuda12_1-py3_10-gcc9-torchbench-build-gcp.outputs.test-matrix }}
+      build-environment: linux-focal-cuda12.4-py3.10-gcc9-sm80
+      docker-image: ${{ needs.linux-focal-cuda12_4-py3_10-gcc9-torchbench-build-gcp.outputs.docker-image }}
+      test-matrix: ${{ needs.linux-focal-cuda12_4-py3_10-gcc9-torchbench-build-gcp.outputs.test-matrix }}
     secrets: inherit
diff --git a/.github/workflows/trunk.yml b/.github/workflows/trunk.yml
index 70c7c2dd63e..6e6029da823 100644
--- a/.github/workflows/trunk.yml
+++ b/.github/workflows/trunk.yml
@@ -237,12 +237,12 @@ jobs:
     secrets: inherit
 
   # NB: Keep this in sync with inductor-perf-test-nightly.yml
-  linux-focal-cuda12_1-py3_10-gcc9-inductor-build:
-    name: cuda12.1-py3.10-gcc9-sm80
+  linux-focal-cuda12_4-py3_10-gcc9-inductor-build:
+    name: cuda12.4-py3.10-gcc9-sm80
     uses: ./.github/workflows/_linux-build.yml
     needs: get-label-type
     with:
-      build-environment: linux-focal-cuda12.1-py3.10-gcc9-sm80
-      docker-image-name: pytorch-linux-focal-cuda12.1-cudnn9-py3-gcc9-inductor-benchmarks
+      build-environment: linux-focal-cuda12.4-py3.10-gcc9-sm80
+      docker-image-name: pytorch-linux-focal-cuda12.4-cudnn9-py3-gcc9-inductor-benchmarks
       cuda-arch-list: '8.0'
     secrets: inherit
diff --git a/test/test_sparse_semi_structured.py b/test/test_sparse_semi_structured.py
index 532a55f0bfd..687e92b5df9 100644
--- a/test/test_sparse_semi_structured.py
+++ b/test/test_sparse_semi_structured.py
@@ -1209,12 +1209,12 @@ class TestSparseSemiStructuredCUSPARSELT(TestCase):
         # CUDA 11.8 has cuSPARSELt v0.4.0 support
         if version == (11, 8):
             assert torch.backends.cusparselt.version() == 400
-        # CUDA 12.1 has cuSPARSELt v0.5.2 support
-        elif version == (12, 1):
-            assert torch.backends.cusparselt.version() == 502
-        # CUDA 12.4+ has cuSPARSELt v0.6.2 support
+        # PyTorch CUDA 12.4 using cuSPARSELt v0.6.2
         elif version >= (12, 4):
             assert torch.backends.cusparselt.version() == 602
+        # PyTorch CUDA 12.6+ using cuSPARSELt v0.6.3
+        elif version >= (12, 6):
+            assert torch.backends.cusparselt.version() == 603
         else:
             assert torch.backends.cusparselt.version() is None