mirror of
https://github.com/saymrwulf/pytorch.git
synced 2026-05-14 20:57:59 +00:00
Summary: Pull Request resolved: https://github.com/pytorch/pytorch/pull/17623 Despite it's generic sounding name, caffe2::DeviceGuard actually only worked on CUDA devices. Rename it to something that more clearly spells out its applicability. I'm not sure if it's the right call, but in this patch I added 'using CUDAGuard = c10::cuda::CUDAGuard', as this seems to be more in-line with how the Caffe2 codebase is currently written. More idiomatic c10 namespace style would be to say cuda::CUDAGuard. Willing to change this if people shout. This is a respin of D13156470 (#14284) Reviewed By: dzhulgakov Differential Revision: D14285504 fbshipit-source-id: 93b8ab938b064572b3b010c307e1261fde0fff3d
161 lines
5.2 KiB
C++
161 lines
5.2 KiB
C++
#include <chrono>
|
|
#include <future>
|
|
#include <random>
|
|
#include <thread>
|
|
#include <array>
|
|
|
|
#include "caffe2/core/context_gpu.h"
|
|
#include <gtest/gtest.h>
|
|
|
|
namespace caffe2 {
|
|
|
|
TEST(CUDATest, HasCudaRuntime) {
|
|
EXPECT_TRUE(HasCudaRuntime());
|
|
}
|
|
|
|
TEST(CUDAContextTest, TestAllocDealloc) {
|
|
if (!HasCudaGPU()) return;
|
|
CUDAContext context(0);
|
|
context.SwitchToDevice();
|
|
auto data = CUDAContext::New(10 * sizeof(float));
|
|
EXPECT_NE(data.get(), nullptr);
|
|
}
|
|
|
|
TEST(CUDAContextTest, TestSetGetDeviceWithoutCaffeMode) {
|
|
// For a while, set full device control to be true.
|
|
for (int i = 0; i < NumCudaDevices(); ++i) {
|
|
CaffeCudaSetDevice(i);
|
|
EXPECT_EQ(CaffeCudaGetDevice(), i);
|
|
}
|
|
for (int i = NumCudaDevices() - 1; i >= 0; --i) {
|
|
CaffeCudaSetDevice(i);
|
|
EXPECT_EQ(CaffeCudaGetDevice(), i);
|
|
}
|
|
}
|
|
|
|
TEST(CUDAContextTest, MemoryPoolAllocateDealloc) {
|
|
if (!HasCudaGPU())
|
|
return;
|
|
if (GetCudaMemoryPoolType() == CudaMemoryPoolType::NONE) {
|
|
LOG(ERROR) << "Choose a memory type that is not none to test memory pool.";
|
|
return;
|
|
}
|
|
const int nbytes = 1048576;
|
|
for (int i = 0; i < NumCudaDevices(); ++i) {
|
|
LOG(INFO) << "Device " << i << " of " << NumCudaDevices();
|
|
CUDAGuard guard(i);
|
|
auto allocated = CUDAContext::New(nbytes);
|
|
EXPECT_NE(allocated, nullptr);
|
|
cudaPointerAttributes attr;
|
|
CUDA_ENFORCE(cudaPointerGetAttributes(&attr, allocated.get()));
|
|
EXPECT_EQ(attr.CAFFE2_CUDA_PTRATTR_MEMTYPE, cudaMemoryTypeDevice);
|
|
EXPECT_EQ(attr.device, i);
|
|
void* prev_allocated = allocated.get();
|
|
allocated.clear();
|
|
auto new_allocated = CUDAContext::New(nbytes);
|
|
// With a pool, the above allocation should yield the same address.
|
|
EXPECT_EQ(new_allocated.get(), prev_allocated);
|
|
// But, if we are allocating something larger, we will have a different
|
|
// chunk of memory.
|
|
auto larger_allocated = CUDAContext::New(nbytes * 2);
|
|
EXPECT_NE(larger_allocated.get(), prev_allocated);
|
|
}
|
|
}
|
|
|
|
cudaStream_t getStreamForHandle(cublasHandle_t handle) {
|
|
cudaStream_t stream = nullptr;
|
|
CUBLAS_ENFORCE(cublasGetStream(handle, &stream));
|
|
CHECK_NOTNULL(stream);
|
|
return stream;
|
|
}
|
|
|
|
TEST(CUDAContextTest, TestSameThreadSameObject) {
|
|
if (!HasCudaGPU()) return;
|
|
CUDAContext context_a(0);
|
|
CUDAContext context_b(0);
|
|
EXPECT_EQ(context_a.cuda_stream(), context_b.cuda_stream());
|
|
EXPECT_EQ(context_a.cublas_handle(), context_b.cublas_handle());
|
|
EXPECT_EQ(
|
|
context_a.cuda_stream(), getStreamForHandle(context_b.cublas_handle()));
|
|
// CuRAND generators are context-local.
|
|
EXPECT_NE(context_a.curand_generator(), context_b.curand_generator());
|
|
}
|
|
|
|
TEST(CUDAContextTest, TestSameThreadTempObject) {
|
|
if (!HasCudaGPU())
|
|
return;
|
|
CUDAContext context_outer(0); // gpu id
|
|
context_outer.SwitchToDevice();
|
|
|
|
if (NumCudaDevices() >= 2) {
|
|
auto before_stream = context_outer.cuda_stream();
|
|
|
|
// try to mess up current device
|
|
CUDAContext context_different_device(1);
|
|
context_different_device.SwitchToDevice(10);
|
|
|
|
// go back
|
|
context_outer.SwitchToDevice();
|
|
EXPECT_EQ(context_outer.cuda_stream(), before_stream);
|
|
|
|
// do nothing - infers the current device and stream
|
|
CUDAContext context_noop;
|
|
EXPECT_EQ(context_outer.cuda_stream(), before_stream);
|
|
EXPECT_EQ(context_noop.cuda_stream(), before_stream);
|
|
|
|
|
|
// override stream - the previous context is not valid any more until
|
|
// SwitchToDevice is called again (needs to be refactored into proper guard)
|
|
CUDAContext context_override;
|
|
context_override.SwitchToDevice(1); // logical stream id
|
|
EXPECT_NE(context_override.cuda_stream(), before_stream);
|
|
// note, that accessing streams from context_outer and context_noop is not
|
|
// semantically valid any more
|
|
}
|
|
}
|
|
|
|
TEST(CUDAContextTest, TestSameThreadDifferntObjectIfDifferentDevices) {
|
|
if (NumCudaDevices() > 1) {
|
|
CUDAContext context_a(0);
|
|
CUDAContext context_b(1);
|
|
EXPECT_NE(context_a.cuda_stream(), context_b.cuda_stream());
|
|
EXPECT_NE(context_a.cublas_handle(), context_b.cublas_handle());
|
|
EXPECT_NE(
|
|
context_a.cuda_stream(), getStreamForHandle(context_b.cublas_handle()));
|
|
EXPECT_NE(context_a.curand_generator(), context_b.curand_generator());
|
|
}
|
|
}
|
|
|
|
namespace {
|
|
// A test function to return a stream address from a temp CUDA context. You
|
|
// should not use that stream though, because the actual stream is destroyed
|
|
// after thread exit.
|
|
void TEST_GetStreamAddress(cudaStream_t* ptr) {
|
|
CUDAContext context(0);
|
|
context.SwitchToDevice();
|
|
*ptr = context.cuda_stream();
|
|
// Sleep for a while so we have concurrent thread executions
|
|
std::this_thread::sleep_for(std::chrono::seconds(1));
|
|
}
|
|
} // namespace
|
|
|
|
TEST(CUDAContextTest, TestDifferntThreadDifferentobject) {
|
|
if (!HasCudaGPU()) return;
|
|
std::array<cudaStream_t, 2> temp = {0};
|
|
// Same thread
|
|
TEST_GetStreamAddress(&temp[0]);
|
|
TEST_GetStreamAddress(&temp[1]);
|
|
EXPECT_TRUE(temp[0] != nullptr);
|
|
EXPECT_TRUE(temp[1] != nullptr);
|
|
EXPECT_EQ(temp[0], temp[1]);
|
|
// Different threads
|
|
std::thread thread_a(TEST_GetStreamAddress, &temp[0]);
|
|
std::thread thread_b(TEST_GetStreamAddress, &temp[1]);
|
|
thread_a.join();
|
|
thread_b.join();
|
|
EXPECT_TRUE(temp[0] != nullptr);
|
|
EXPECT_TRUE(temp[1] != nullptr);
|
|
EXPECT_NE(temp[0], temp[1]);
|
|
}
|
|
|
|
} // namespace caffe2
|