Enable ProcessGroupGlooTest in CI (take 2) (#42086)

Summary: Fixes https://github.com/pytorch/pytorch/issues/42073 Pull Request resolved: https://github.com/pytorch/pytorch/pull/42086 Reviewed By: ngimel Differential Revision: D22765777 Pulled By: malfet fbshipit-source-id: ebbcd44f448a1e7f9a3d18fa9967461129dd1dcd
2026-05-14 20:57:59 +00:00 · 2020-07-27 10:20:07 -07:00 · 2020-07-27 10:20:07 -07:00 · fbdaa555a2
commit fbdaa555a2
parent 96aaa311c0
3 changed files with 13 additions and 3 deletions
--- a/.jenkins/pytorch/test.sh
+++ b/.jenkins/pytorch/test.sh
@ -228,6 +228,15 @@ test_libtorch() {
  fi
 }

+test_distributed() {
+  if [[ "$BUILD_ENVIRONMENT" == *cuda* ]]; then
+    echo "Testing distributed C++ tests"
+    mkdir -p test/test-reports/cpp-distributed
+    build/bin/ProcessGroupGlooTest --gtest_output=xml:test/test-reports/cpp-distributed/ProcessGroupGlooTest.xml
+    build/bin/ProcessGroupNCCLErrorsTest --gtest_output=xml:test/test-reports/cpp-distributed/ProcessGroupNCCLErrorsTest.xml
+  fi
+}
+
 test_custom_backend() {
  if [[ "$BUILD_ENVIRONMENT" != *rocm* ]] && [[ "$BUILD_ENVIRONMENT" != *asan* ]] ; then
    echo "Testing custom backends"
@ -371,4 +380,5 @@ else
  test_custom_script_ops
  test_custom_backend
  test_torch_function_benchmark
+  test_distributed
 fi
--- a/torch/lib/c10d/test/CUDATest.cu
+++ b/torch/lib/c10d/test/CUDATest.cu
@ -21,7 +21,7 @@ void cudaSleep(at::cuda::CUDAStream& stream, uint64_t clocks) {

 int cudaNumDevices() {
  int n = 0;
-  AT_CUDA_CHECK(cudaGetDeviceCount(&n));
+  C10_CUDA_CHECK_WARN(cudaGetDeviceCount(&n));
  return n;
 }

--- a/torch/lib/c10d/test/ProcessGroupGlooTest.cpp
+++ b/torch/lib/c10d/test/ProcessGroupGlooTest.cpp
@ -224,7 +224,7 @@ void testBroadcast(const std::string& path, const at::DeviceType b) {

  std::vector<std::vector<at::Tensor>> inputs(size);

-  // Try every permutation of root rank and root tensoro
+  // Try every permutation of root rank and root tensor
  for (auto i = 0; i < size; i++) {
    for (auto j = 0; j < stride; j++) {
      // Initialize inputs
@ -548,7 +548,7 @@ TEST(ProcessGroupGlooTest, testAllReduceCUDA) {

 TEST(ProcessGroupGlooTest, testBroadcastCUDA) {
  {
-    if (torch::cuda::is_available()) {
+    if (torch::cuda::device_count() > 1) {
      TemporaryFile file;
      testBroadcast(file.path, at::DeviceType::CUDA);
    }