[ROCm] fix test_cublas_workspace_explicit_allocation for gfx12 (#145227)

gfx12 passes the condition `torch.cuda.get_device_capability() >= (9, 4)` and uses `default_workspace_size=128MB`, but it required only for MI300 Fix condition to use `("gfx94" in gcn_arch)` instead of `torch.cuda.get_device_properties()` to detect MI300. Now `default_workspace_size=32MB` is used for gfx12 and the test passes Pull Request resolved: https://github.com/pytorch/pytorch/pull/145227 Approved by: https://github.com/jeffdaily, https://github.com/eqy
2026-05-14 20:57:59 +00:00 · 2025-01-28 16:19:27 +00:00 · 2025-01-28 16:19:27 +00:00 · 6967ef1b07
commit 6967ef1b07
parent 80a0412b76
2 changed files with 6 additions and 2 deletions
--- a/aten/src/ATen/cuda/CublasHandlePool.cpp
+++ b/aten/src/ATen/cuda/CublasHandlePool.cpp
@ -125,7 +125,8 @@ size_t parseChosenWorkspaceSize() {
  }
  /* 32MiB default, 128MiB for MI300 */
  cudaDeviceProp* properties = at::cuda::getCurrentDeviceProperties();
-  const bool gfx94 = properties != nullptr && properties->major == 9 && properties->minor == 4;
+  std::string device_arch = properties->gcnArchName;
+  const bool gfx94 = device_arch.find("gfx94") != std::string::npos;
  const size_t default_size = gfx94 ? 1024 * 128 * 1024 : 1024 * 32 * 1024;
 #else
  /* :4096:2:16:8 default, 32MiB for Hopper */
--- a/test/test_cuda.py
+++ b/test/test_cuda.py
@ -454,7 +454,10 @@ class TestCuda(TestCase):
        if torch.version.hip:
            default_workspace_size = 1024 * 32 * 1024  # :1024:32  32MiB
            # different size (128 MiB) expected on MI300 GPU
-            if torch.cuda.get_device_capability() >= (9, 4):
+            gcn_arch = str(
+                torch.cuda.get_device_properties(0).gcnArchName.split(":", 1)[0]
+            )
+            if "gfx94" in gcn_arch:
                default_workspace_size = 1024 * 128 * 1024  # :1024:128
        else:
            default_workspace_size = (