From d35a600b74dc8cb4ddcc39e07aa309325affaab9 Mon Sep 17 00:00:00 2001 From: Shuqiang Zhang Date: Tue, 5 Nov 2024 15:16:35 -0800 Subject: [PATCH] [pgnccl] skip restart test fro rocm (#139809) Summary: PG restart test is flaky in rocm: https://github.com/pytorch/pytorch/pull/139809, skip the AMD/ROCM test for now Test Plan: CI Tags: Pull Request resolved: https://github.com/pytorch/pytorch/pull/139809 Approved by: https://github.com/kwen2501 --- test/distributed/test_c10d_nccl.py | 1 + 1 file changed, 1 insertion(+) diff --git a/test/distributed/test_c10d_nccl.py b/test/distributed/test_c10d_nccl.py index 0dd46749b7c..f5f971c6ae7 100644 --- a/test/distributed/test_c10d_nccl.py +++ b/test/distributed/test_c10d_nccl.py @@ -348,6 +348,7 @@ class ProcessGroupNCCLGroupTest(MultiProcessTestCase): dist.all_reduce(t) @requires_nccl() + @skip_if_rocm_multiprocess @skip_but_pass_in_sandcastle_if(not TEST_MULTIGPU, "NCCL test requires 2+ GPUs") def test_restart_pg(self): # Note: restart test passes steadily only for blocking mode for now.