mirror of
https://github.com/saymrwulf/pytorch.git
synced 2026-05-14 20:57:59 +00:00
Enable ProcessGroupGlooTest in CI (take 2) (#42086)
Summary: Fixes https://github.com/pytorch/pytorch/issues/42073 Pull Request resolved: https://github.com/pytorch/pytorch/pull/42086 Reviewed By: ngimel Differential Revision: D22765777 Pulled By: malfet fbshipit-source-id: ebbcd44f448a1e7f9a3d18fa9967461129dd1dcd
This commit is contained in:
parent
96aaa311c0
commit
fbdaa555a2
3 changed files with 13 additions and 3 deletions
|
|
@ -228,6 +228,15 @@ test_libtorch() {
|
|||
fi
|
||||
}
|
||||
|
||||
test_distributed() {
|
||||
if [[ "$BUILD_ENVIRONMENT" == *cuda* ]]; then
|
||||
echo "Testing distributed C++ tests"
|
||||
mkdir -p test/test-reports/cpp-distributed
|
||||
build/bin/ProcessGroupGlooTest --gtest_output=xml:test/test-reports/cpp-distributed/ProcessGroupGlooTest.xml
|
||||
build/bin/ProcessGroupNCCLErrorsTest --gtest_output=xml:test/test-reports/cpp-distributed/ProcessGroupNCCLErrorsTest.xml
|
||||
fi
|
||||
}
|
||||
|
||||
test_custom_backend() {
|
||||
if [[ "$BUILD_ENVIRONMENT" != *rocm* ]] && [[ "$BUILD_ENVIRONMENT" != *asan* ]] ; then
|
||||
echo "Testing custom backends"
|
||||
|
|
@ -371,4 +380,5 @@ else
|
|||
test_custom_script_ops
|
||||
test_custom_backend
|
||||
test_torch_function_benchmark
|
||||
test_distributed
|
||||
fi
|
||||
|
|
|
|||
|
|
@ -21,7 +21,7 @@ void cudaSleep(at::cuda::CUDAStream& stream, uint64_t clocks) {
|
|||
|
||||
int cudaNumDevices() {
|
||||
int n = 0;
|
||||
AT_CUDA_CHECK(cudaGetDeviceCount(&n));
|
||||
C10_CUDA_CHECK_WARN(cudaGetDeviceCount(&n));
|
||||
return n;
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -224,7 +224,7 @@ void testBroadcast(const std::string& path, const at::DeviceType b) {
|
|||
|
||||
std::vector<std::vector<at::Tensor>> inputs(size);
|
||||
|
||||
// Try every permutation of root rank and root tensoro
|
||||
// Try every permutation of root rank and root tensor
|
||||
for (auto i = 0; i < size; i++) {
|
||||
for (auto j = 0; j < stride; j++) {
|
||||
// Initialize inputs
|
||||
|
|
@ -548,7 +548,7 @@ TEST(ProcessGroupGlooTest, testAllReduceCUDA) {
|
|||
|
||||
TEST(ProcessGroupGlooTest, testBroadcastCUDA) {
|
||||
{
|
||||
if (torch::cuda::is_available()) {
|
||||
if (torch::cuda::device_count() > 1) {
|
||||
TemporaryFile file;
|
||||
testBroadcast(file.path, at::DeviceType::CUDA);
|
||||
}
|
||||
|
|
|
|||
Loading…
Reference in a new issue