mirror of
https://github.com/saymrwulf/pytorch.git
synced 2026-05-15 21:00:47 +00:00
Summary: Shared im2col buffer needs a mutex only to protect it from ops within a workspace (since the shared buffer is created per workspace). The current implementation has a global mutex which affects perf when running multiple nets in parallel. I don't feel great about adding a mutex for this in workspace, let me know if anyone has better suggestions. Reviewed By: akyrola Differential Revision: D5341476 fbshipit-source-id: 1c9a92ef488ffb0c0013a7656bcb3d530bc7208b
27 lines
906 B
C++
27 lines
906 B
C++
#include "caffe2/core/context_gpu.h"
|
|
#include "conv_op_shared.h"
|
|
|
|
namespace caffe2 {
|
|
|
|
template <>
|
|
void createSharedBuffer<CUDAContext>(Workspace* ws) {
|
|
auto* mutexPtr = ws->CreateBlob("__CAFFE2_SHARED_CONV_BUFFER_CUDA_MUTEX__")
|
|
->GetMutable<std::unique_ptr<std::mutex>>();
|
|
mutexPtr->reset(new std::mutex());
|
|
ws->CreateBlob("__CAFFE2_SHARED_CONV_BUFFER_CUDA__");
|
|
}
|
|
|
|
template <>
|
|
void runWithSharedBuffer(
|
|
Workspace* ws,
|
|
std::function<void(Tensor<CUDAContext>* buffer)> f) {
|
|
auto* mutexBlob = ws->GetBlob("__CAFFE2_SHARED_CONV_BUFFER_CUDA_MUTEX__");
|
|
CAFFE_ENFORCE(mutexBlob, "Must call createSharedBuffer() first");
|
|
|
|
auto* mutexPtr = mutexBlob->GetMutable<std::unique_ptr<std::mutex>>();
|
|
std::lock_guard<std::mutex> g(**mutexPtr);
|
|
auto* buffer = ws->GetBlob("__CAFFE2_SHARED_CONV_BUFFER_CUDA__")
|
|
->GetMutable<TensorCUDA>();
|
|
f(buffer);
|
|
}
|
|
}
|