diff --git a/.jenkins/pytorch/test.sh b/.jenkins/pytorch/test.sh index 1625ce40834..72a2ea2cc10 100755 --- a/.jenkins/pytorch/test.sh +++ b/.jenkins/pytorch/test.sh @@ -228,6 +228,15 @@ test_libtorch() { fi } +test_distributed() { + if [[ "$BUILD_ENVIRONMENT" == *cuda* ]]; then + echo "Testing distributed C++ tests" + mkdir -p test/test-reports/cpp-distributed + build/bin/ProcessGroupGlooTest --gtest_output=xml:test/test-reports/cpp-distributed/ProcessGroupGlooTest.xml + build/bin/ProcessGroupNCCLErrorsTest --gtest_output=xml:test/test-reports/cpp-distributed/ProcessGroupNCCLErrorsTest.xml + fi +} + test_custom_backend() { if [[ "$BUILD_ENVIRONMENT" != *rocm* ]] && [[ "$BUILD_ENVIRONMENT" != *asan* ]] ; then echo "Testing custom backends" @@ -371,4 +380,5 @@ else test_custom_script_ops test_custom_backend test_torch_function_benchmark + test_distributed fi diff --git a/torch/lib/c10d/test/CUDATest.cu b/torch/lib/c10d/test/CUDATest.cu index 870c54cd665..c47b29ea536 100644 --- a/torch/lib/c10d/test/CUDATest.cu +++ b/torch/lib/c10d/test/CUDATest.cu @@ -21,7 +21,7 @@ void cudaSleep(at::cuda::CUDAStream& stream, uint64_t clocks) { int cudaNumDevices() { int n = 0; - AT_CUDA_CHECK(cudaGetDeviceCount(&n)); + C10_CUDA_CHECK_WARN(cudaGetDeviceCount(&n)); return n; } diff --git a/torch/lib/c10d/test/ProcessGroupGlooTest.cpp b/torch/lib/c10d/test/ProcessGroupGlooTest.cpp index 1f2ee1c2e30..bdca745cbd8 100644 --- a/torch/lib/c10d/test/ProcessGroupGlooTest.cpp +++ b/torch/lib/c10d/test/ProcessGroupGlooTest.cpp @@ -224,7 +224,7 @@ void testBroadcast(const std::string& path, const at::DeviceType b) { std::vector> inputs(size); - // Try every permutation of root rank and root tensoro + // Try every permutation of root rank and root tensor for (auto i = 0; i < size; i++) { for (auto j = 0; j < stride; j++) { // Initialize inputs @@ -548,7 +548,7 @@ TEST(ProcessGroupGlooTest, testAllReduceCUDA) { TEST(ProcessGroupGlooTest, testBroadcastCUDA) { { - if (torch::cuda::is_available()) { + if (torch::cuda::device_count() > 1) { TemporaryFile file; testBroadcast(file.path, at::DeviceType::CUDA); }