fixed c10d test (#10557)

Summary:
fixed NCCL test, which is not run in CI. We should enable it soon.
```
~/new_pytorch/pytorch/test$ python test_c10d.py
...............
----------------------------------------------------------------------
Ran 15 tests in 13.099s

OK
```
Pull Request resolved: https://github.com/pytorch/pytorch/pull/10557

Reviewed By: ailzhang

Differential Revision: D9353286

Pulled By: teng-li

fbshipit-source-id: 5a722975beaa601203f51c723522cc881f2d2090
This commit is contained in:
Teng Li 2018-08-15 17:15:19 -07:00 committed by Facebook Github Bot
parent 0a809fc8b1
commit 05dcf00644

View file

@ -432,7 +432,7 @@ class ProcessGroupNCCLTest(TestCase):
def test_reduce_ops(self):
store = c10d.FileStore(self.file.name)
pg = c10d.ProcessGroupNCCL(store, self.rank, self.size)
pg = c10d.ProcessGroupNCCL(store, self.rank, self.world_size)
def reduce(xs, rootRank, rootTensor):
opts = c10d.ReduceOptions()
@ -455,7 +455,7 @@ class ProcessGroupNCCLTest(TestCase):
def test_allgather_ops(self):
store = c10d.FileStore(self.file.name)
pg = c10d.ProcessGroupNCCL(store, self.rank, self.size)
pg = c10d.ProcessGroupNCCL(store, self.rank, self.world_size)
def allgather(output_ts, input_ts):
work = pg.allgather(output_ts, input_ts)
@ -465,7 +465,7 @@ class ProcessGroupNCCLTest(TestCase):
output_ts = [[] for _ in range(self.num_gpus)]
for idx, ls in enumerate(output_ts):
for _ in range(self.size):
for _ in range(self.world_size):
ls.append(torch.Tensor([0]).cuda(idx))
for i in range(self.num_gpus):