mirror of
https://github.com/saymrwulf/pytorch.git
synced 2026-05-14 20:57:59 +00:00
[c10d] Test needs abort; otherwise will hang (#141509)
Pull Request resolved: https://github.com/pytorch/pytorch/pull/141509 Approved by: https://github.com/wz337, https://github.com/fduwjj
This commit is contained in:
parent
5accae4197
commit
9e299b883b
1 changed files with 3 additions and 0 deletions
|
|
@ -2913,6 +2913,9 @@ class NcclErrorHandlingTest(MultiProcessTestCase):
|
|||
# nccl error happening before rank 0 timeouts
|
||||
time.sleep(4)
|
||||
|
||||
# Mimicing all ranks sensing the timeout, abort
|
||||
process_group.abort()
|
||||
|
||||
if prev_nccl_async_error_handling is not None:
|
||||
os.environ[
|
||||
"TORCH_NCCL_ASYNC_ERROR_HANDLING"
|
||||
|
|
|
|||
Loading…
Reference in a new issue