mirror of
https://github.com/saymrwulf/pytorch.git
synced 2026-05-14 20:57:59 +00:00
Back out "Revert D10123245: Back out "codemod cuda_gpu_id to device_id"" (#12232)
Summary: Pull Request resolved: https://github.com/pytorch/pytorch/pull/12232 Original commit changeset: fca91fea58b7 This adds proper modifications to the DeviceType <->DeviceOption conversion code added in D10033396 Reviewed By: jerryzh168 Differential Revision: D10132473 fbshipit-source-id: 801ef777e2950982cb47b48051b1471a0a91e64b
This commit is contained in:
parent
696498d9e4
commit
ff608a9ff3
41 changed files with 163 additions and 121 deletions
|
|
@ -11,7 +11,7 @@ nccl::NCCLExecution getNCCLElements(
|
|||
// We either do an N-N op, or an N-1 op.
|
||||
CAFFE_ENFORCE(op->InputSize() == op->OutputSize() || op->OutputSize() == 1);
|
||||
nccl::NCCLExecution ex;
|
||||
ex.stream_gpu_id = context.device_id();
|
||||
ex.stream_gpu_id = context.cuda_gpu_id();
|
||||
ex.stream = context.cuda_stream();
|
||||
ex.root = op->template GetSingleArgument<int>("root", 0);
|
||||
ex.elements.resize(op->InputSize());
|
||||
|
|
@ -204,7 +204,7 @@ std::pair<std::vector<DeviceOption>, std::vector<DeviceOption>> ncclOpDevInfer(
|
|||
for (int i = 0; i < def.input().size(); ++i) {
|
||||
DeviceOption dev;
|
||||
dev.set_device_type(1);
|
||||
dev.set_device_id(i);
|
||||
dev.set_cuda_gpu_id(i);
|
||||
opt.push_back(dev);
|
||||
}
|
||||
return std::make_pair(opt, opt);
|
||||
|
|
|
|||
|
|
@ -21,7 +21,7 @@ dyndep.InitOpsLibrary('@/caffe2/caffe2/contrib/nccl:nccl_ops')
|
|||
def gpu_device(i):
|
||||
device_option = caffe2_pb2.DeviceOption()
|
||||
device_option.device_type = caffe2_pb2.CUDA
|
||||
device_option.device_id = i
|
||||
device_option.cuda_gpu_id = i
|
||||
return device_option
|
||||
|
||||
|
||||
|
|
|
|||
|
|
@ -33,9 +33,9 @@ void ProfDAGNet::ValidateOpTensorDevices() {
|
|||
had_mismatches = true;
|
||||
LOG(INFO) << "== PERFORMANCE WARNING == \n"
|
||||
<< " Operator " << node.operator_->debug_def().type()
|
||||
<< " expects GPU " << mismatch.second.first.device_id()
|
||||
<< " expects GPU " << mismatch.second.first.cuda_gpu_id()
|
||||
<< " but tensor [" << mismatch.first << "] is on GPU "
|
||||
<< mismatch.second.second.device_id();
|
||||
<< mismatch.second.second.cuda_gpu_id();
|
||||
}
|
||||
}
|
||||
if (!had_mismatches) {
|
||||
|
|
|
|||
|
|
@ -177,7 +177,7 @@ def _tf_device(device_option):
|
|||
if device_option.device_type == caffe2_pb2.CPU:
|
||||
return "/cpu:*"
|
||||
if device_option.device_type == caffe2_pb2.CUDA:
|
||||
return "/gpu:{}".format(device_option.device_id)
|
||||
return "/gpu:{}".format(device_option.cuda_gpu_id)
|
||||
raise Exception("Unhandled device", device_option)
|
||||
|
||||
|
||||
|
|
|
|||
|
|
@ -79,11 +79,11 @@ class CTCOpsTest(test_util.TestCase):
|
|||
def test_ctc_cost_gpu(self):
|
||||
self.verify_cost(
|
||||
caffe2_pb2.DeviceOption(device_type=caffe2_pb2.CUDA,
|
||||
device_id=0),
|
||||
cuda_gpu_id=0),
|
||||
is_test=False)
|
||||
self.verify_cost(
|
||||
caffe2_pb2.DeviceOption(device_type=caffe2_pb2.CUDA,
|
||||
device_id=0),
|
||||
cuda_gpu_id=0),
|
||||
is_test=False,
|
||||
skip_input_lengths=True)
|
||||
|
||||
|
|
@ -99,10 +99,10 @@ class CTCOpsTest(test_util.TestCase):
|
|||
def test_ctc_forward_only_gpu(self):
|
||||
self.verify_cost(
|
||||
caffe2_pb2.DeviceOption(device_type=caffe2_pb2.CUDA,
|
||||
device_id=0),
|
||||
cuda_gpu_id=0),
|
||||
is_test=True)
|
||||
self.verify_cost(
|
||||
caffe2_pb2.DeviceOption(device_type=caffe2_pb2.CUDA,
|
||||
device_id=0),
|
||||
cuda_gpu_id=0),
|
||||
is_test=True,
|
||||
skip_input_lengths=True)
|
||||
|
|
|
|||
|
|
@ -195,7 +195,7 @@ TEST(TensorTest, TensorSerializationMultiDevices) {
|
|||
}
|
||||
EXPECT_TRUE(tensor_proto.has_device_detail());
|
||||
EXPECT_EQ(tensor_proto.device_detail().device_type(), PROTO_CUDA);
|
||||
EXPECT_EQ(tensor_proto.device_detail().device_id(), gpu_id);
|
||||
EXPECT_EQ(tensor_proto.device_detail().cuda_gpu_id(), gpu_id);
|
||||
// Test if the restored blob is still of the same device.
|
||||
blob.Reset();
|
||||
EXPECT_NO_THROW(DeserializeBlob(serialized, &blob));
|
||||
|
|
@ -205,7 +205,7 @@ TEST(TensorTest, TensorSerializationMultiDevices) {
|
|||
// Test if we force the restored blob on a different device, we
|
||||
// can still get so.
|
||||
blob.Reset();
|
||||
proto.mutable_tensor()->mutable_device_detail()->set_device_id(0);
|
||||
proto.mutable_tensor()->mutable_device_detail()->set_cuda_gpu_id(0);
|
||||
EXPECT_NO_THROW(DeserializeBlob(proto.SerializeAsString(), &blob));
|
||||
EXPECT_TRUE(BlobIsTensorType(blob, CUDA));
|
||||
EXPECT_EQ(GetGPUIDForPointer(blob.Get<TensorCUDA>().data<float>()), 0);
|
||||
|
|
|
|||
|
|
@ -256,7 +256,7 @@ CUDAContext::CUDAContext(const int gpu_id)
|
|||
|
||||
CUDAContext::CUDAContext(const DeviceOption& option)
|
||||
: gpu_id_(
|
||||
option.has_device_id() ? RectifyGPUID(option.device_id())
|
||||
option.has_cuda_gpu_id() ? RectifyGPUID(option.cuda_gpu_id())
|
||||
: CaffeCudaGetDevice()),
|
||||
random_seed_(
|
||||
option.has_random_seed() ? option.random_seed()
|
||||
|
|
|
|||
|
|
@ -184,7 +184,7 @@ class CAFFE2_CUDA_API CUDAContext final : public BaseContext {
|
|||
}
|
||||
}
|
||||
|
||||
inline int device_id() const {
|
||||
inline int cuda_gpu_id() const {
|
||||
return gpu_id_;
|
||||
}
|
||||
|
||||
|
|
@ -283,7 +283,7 @@ class CAFFE2_CUDA_API CUDAContext final : public BaseContext {
|
|||
}
|
||||
|
||||
static bool IsStreamFree(const DeviceOption& option, int stream_id) {
|
||||
auto stream = CUDAContext::cuda_stream(option.device_id(), stream_id);
|
||||
auto stream = CUDAContext::cuda_stream(option.cuda_gpu_id(), stream_id);
|
||||
return cudaStreamQuery(stream) == cudaSuccess;
|
||||
}
|
||||
|
||||
|
|
@ -393,7 +393,7 @@ class CAFFE2_CUDA_API CUDAStaticContext final : public BaseStaticContext {
|
|||
|
||||
void ExtractDeviceOption(DeviceOption* device, const void* data) override {
|
||||
device->set_device_type(TypeToProto(GetDeviceType()));
|
||||
device->set_device_id(GetGPUIDForPointer(data));
|
||||
device->set_cuda_gpu_id(GetGPUIDForPointer(data));
|
||||
}
|
||||
|
||||
protected:
|
||||
|
|
|
|||
|
|
@ -122,9 +122,9 @@ class CuDNNWrapper {
|
|||
void with_cudnn_state(size_t state_idx, F&& f) {
|
||||
CAFFE_ENFORCE(
|
||||
state_idx < CAFFE2_COMPILE_TIME_MAX_CUDNN_STATES, "Invalid state_idx");
|
||||
auto& sync_state = cudnn_states()[context_->device_id()][state_idx];
|
||||
auto& sync_state = cudnn_states()[context_->cuda_gpu_id()][state_idx];
|
||||
|
||||
DeviceGuard dg(context_->device_id());
|
||||
DeviceGuard dg(context_->cuda_gpu_id());
|
||||
|
||||
// We need to serialize execution on the CuDNNState as we can't
|
||||
// allow multiple threads to race through the cudaEventRecord
|
||||
|
|
@ -132,7 +132,7 @@ class CuDNNWrapper {
|
|||
// execution)
|
||||
std::lock_guard<std::mutex> g(sync_state.mutex);
|
||||
if (!sync_state.state.get()) {
|
||||
sync_state.state.reset(new CuDNNState(context_->device_id()));
|
||||
sync_state.state.reset(new CuDNNState(context_->cuda_gpu_id()));
|
||||
}
|
||||
CHECK_NOTNULL(sync_state.state.get())->execute(context_->cuda_stream(), f);
|
||||
}
|
||||
|
|
|
|||
|
|
@ -9,21 +9,21 @@ namespace caffe2 {
|
|||
struct CudaEventWrapper {
|
||||
explicit CudaEventWrapper(const DeviceOption& option)
|
||||
: cuda_stream_(nullptr),
|
||||
device_id_(option.device_id()),
|
||||
cuda_gpu_id_(option.cuda_gpu_id()),
|
||||
status_(EventStatus::EVENT_INITIALIZED) {
|
||||
CAFFE_ENFORCE(option.device_type(), PROTO_CUDA);
|
||||
DeviceGuard g(device_id_);
|
||||
DeviceGuard g(cuda_gpu_id_);
|
||||
CUDA_ENFORCE(cudaEventCreate(
|
||||
&cuda_event_, cudaEventDefault | cudaEventDisableTiming));
|
||||
}
|
||||
~CudaEventWrapper() {
|
||||
DeviceGuard g(device_id_);
|
||||
DeviceGuard g(cuda_gpu_id_);
|
||||
CUDA_CHECK(cudaEventDestroy(cuda_event_));
|
||||
}
|
||||
|
||||
cudaEvent_t cuda_event_;
|
||||
cudaStream_t cuda_stream_;
|
||||
int device_id_;
|
||||
int cuda_gpu_id_;
|
||||
|
||||
std::atomic<int> status_;
|
||||
std::mutex mutex_recorded_;
|
||||
|
|
@ -65,12 +65,12 @@ void EventRecordCUDA(Event* event, const void* context, const char* err_msg) {
|
|||
const auto& current_device = CaffeCudaGetDevice();
|
||||
CAFFE_ENFORCE_EQ(
|
||||
current_device,
|
||||
wrapper->device_id_,
|
||||
wrapper->cuda_gpu_id_,
|
||||
"When you call EventRecordCUDA, your current device should be the same "
|
||||
"as the device specified by the event.");
|
||||
CAFFE_ENFORCE_EQ(
|
||||
current_device,
|
||||
static_cast<const CUDAContext*>(context)->device_id());
|
||||
static_cast<const CUDAContext*>(context)->cuda_gpu_id());
|
||||
CUDA_ENFORCE(cudaEventRecord(
|
||||
wrapper->cuda_event_,
|
||||
static_cast<const CUDAContext*>(context)->cuda_stream()));
|
||||
|
|
@ -96,7 +96,7 @@ void EventFinishCUDA(const Event* event) {
|
|||
|
||||
if (wrapper->status_ == EventStatus::EVENT_SCHEDULED) {
|
||||
// ok, even if event is already completed and status was not yet updated
|
||||
DeviceGuard g(wrapper->device_id_);
|
||||
DeviceGuard g(wrapper->cuda_gpu_id_);
|
||||
auto cudaResult = cudaEventSynchronize(wrapper->cuda_event_);
|
||||
if (cudaResult == cudaSuccess) {
|
||||
wrapper->status_ = EventStatus::EVENT_SUCCESS;
|
||||
|
|
@ -127,7 +127,7 @@ void EventWaitCUDACUDA(const Event* event, void* context) {
|
|||
if (context_stream != event_stream) {
|
||||
// CAFFE_ENFORCE_EQ(
|
||||
// CaffeCudaGetDevice(),
|
||||
// static_cast<const CUDAContext*>(context)->device_id());
|
||||
// static_cast<const CUDAContext*>(context)->cuda_gpu_id());
|
||||
CUDA_CHECK(cudaStreamWaitEvent(context_stream, wrapper->cuda_event_, 0));
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -138,7 +138,7 @@ void EventWaitHIPHIP(const Event* event, void* context)
|
|||
{
|
||||
// CAFFE_ENFORCE_EQ(
|
||||
// CaffeCudaGetDevice(),
|
||||
// static_cast<const CUDAContext*>(context)->device_id());
|
||||
// static_cast<const CUDAContext*>(context)->cuda_gpu_id());
|
||||
HIP_CHECK(hipStreamWaitEvent(context_stream, wrapper->hip_event_, 0));
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -176,7 +176,7 @@ class ComputeBlobRecyclingForDag {
|
|||
// cuda device option but whose inputs/outputs are on CPU
|
||||
if (net.op(op_index).type() == "CopyGPUToCPU") {
|
||||
blob_device_[output].set_device_type(0);
|
||||
blob_device_[output].set_device_id(0);
|
||||
blob_device_[output].set_cuda_gpu_id(0);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -478,7 +478,7 @@ class ComputeBlobRecyclingForDag {
|
|||
const DeviceOption& device_option) {
|
||||
const DeviceOption& blob_device = blob_device_[blob_name];
|
||||
if (device_option.device_type() != blob_device.device_type() ||
|
||||
device_option.device_id() != blob_device.device_id()) {
|
||||
device_option.cuda_gpu_id() != blob_device.cuda_gpu_id()) {
|
||||
return false;
|
||||
}
|
||||
for (const int token : req_tokens_[blob_name]) {
|
||||
|
|
|
|||
|
|
@ -157,7 +157,7 @@ TaskThreadPool* AsyncNetBase::pool(const DeviceOption& device_option) {
|
|||
numa_node_id);
|
||||
return poolGetter(cpu_pools_, PROTO_CPU, numa_node_id, num_workers_);
|
||||
} else if (device_option.device_type() == PROTO_CUDA) {
|
||||
auto gpu_id = device_option.device_id();
|
||||
auto gpu_id = device_option.cuda_gpu_id();
|
||||
CAFFE_ENFORCE(
|
||||
gpu_id >= 0 && gpu_id < FLAGS_caffe2_net_async_max_gpus,
|
||||
"Invalid GPU id: " + caffe2::to_string(gpu_id));
|
||||
|
|
@ -173,7 +173,7 @@ int AsyncNetBase::stream(int task_id) {
|
|||
const auto& device_option = event(task_id).GetDeviceOption();
|
||||
int stream_id = 0;
|
||||
if (device_option.device_type() == PROTO_CUDA) {
|
||||
int gpu_id = device_option.device_id();
|
||||
int gpu_id = device_option.cuda_gpu_id();
|
||||
CAFFE_ENFORCE_GE(gpu_id, 0, "Invalid gpu id: " + caffe2::to_string(gpu_id));
|
||||
if ((unsigned)gpu_id >= getStreamCounters().size()) {
|
||||
getStreamCounters().resize(gpu_id + 1, 0);
|
||||
|
|
|
|||
|
|
@ -112,7 +112,7 @@ AsyncDAGNet::AsyncDAGNet(
|
|||
int AsyncDAGNet::stream(const DeviceOption& device_option) {
|
||||
int stream_id = 0;
|
||||
if (device_option.device_type() == PROTO_CUDA) {
|
||||
int gpu_id = device_option.device_id();
|
||||
int gpu_id = device_option.cuda_gpu_id();
|
||||
CAFFE_ENFORCE_GE(gpu_id, 0, "Invalid gpu id: " + caffe2::to_string(gpu_id));
|
||||
if ((unsigned)gpu_id >= stream_counters_.size()) {
|
||||
stream_counters_.resize(gpu_id + 1, 0);
|
||||
|
|
|
|||
|
|
@ -124,7 +124,7 @@ TEST(NetTest, DISABLED_ChainingForDifferentDevices) {
|
|||
type: "NetTestDummy"
|
||||
device_option {
|
||||
device_type: 1
|
||||
device_id: 1
|
||||
cuda_gpu_id: 1
|
||||
}
|
||||
}
|
||||
)DOC";
|
||||
|
|
|
|||
|
|
@ -649,7 +649,7 @@ std::map<string, std::pair<DeviceOption, DeviceOption>> ValidateTensorDevices(
|
|||
&blob_device);
|
||||
|
||||
if (blob_device.device_type() == PROTO_CUDA &&
|
||||
blob_device.device_id() != op_device.device_id()) {
|
||||
blob_device.cuda_gpu_id() != op_device.cuda_gpu_id()) {
|
||||
mismatches[blob_name] = std::make_pair(op_device, blob_device);
|
||||
} else if (
|
||||
blob_device.device_type() == PROTO_HIP &&
|
||||
|
|
|
|||
|
|
@ -26,7 +26,7 @@ static vector<int64_t> GetMKLTensorInfo(
|
|||
const mkl::MKLMemory<T>* tc = static_cast<const mkl::MKLMemory<T>*>(c);
|
||||
*capacity = tc->size() * sizeof(T);
|
||||
device->set_device_type(PROTO_MKLDNN);
|
||||
device->set_device_id(0);
|
||||
device->set_cuda_gpu_id(0);
|
||||
return tc->dims();
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -70,7 +70,7 @@ void ProfileOperatorObserver::Start() {
|
|||
int device;
|
||||
cudaGetDevice(&device);
|
||||
|
||||
cudaSetDevice(context->device_id());
|
||||
cudaSetDevice(context->cuda_gpu_id());
|
||||
cudaEventCreate(&start_);
|
||||
cudaEventRecord(start_, context->cuda_stream());
|
||||
|
||||
|
|
@ -92,7 +92,7 @@ void ProfileOperatorObserver::Stop() {
|
|||
int device;
|
||||
cudaGetDevice(&device);
|
||||
|
||||
cudaSetDevice(context->device_id());
|
||||
cudaSetDevice(context->cuda_gpu_id());
|
||||
cudaEventCreate(&stop_);
|
||||
cudaEventRecord(stop_, context->cuda_stream());
|
||||
cudaEventSynchronize(stop_);
|
||||
|
|
|
|||
|
|
@ -65,7 +65,7 @@ caffe2::DeviceOption GetDeviceOption(const Device& onnx_device) {
|
|||
{DeviceType::CUDA, caffe2::DeviceType::CUDA}};
|
||||
caffe2::DeviceOption d;
|
||||
d.set_device_type(static_cast<int32_t>(m.at(onnx_device.type)));
|
||||
d.set_device_id(onnx_device.device_id);
|
||||
d.set_cuda_gpu_id(onnx_device.device_id);
|
||||
return d;
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -9,7 +9,7 @@ void LoadOp<CUDAContext>::SetCurrentDevice(BlobProto* proto) {
|
|||
proto->mutable_tensor()->clear_device_detail();
|
||||
auto* device_detail = proto->mutable_tensor()->mutable_device_detail();
|
||||
device_detail->set_device_type(PROTO_CUDA);
|
||||
device_detail->set_device_id(CaffeCudaGetDevice());
|
||||
device_detail->set_cuda_gpu_id(CaffeCudaGetDevice());
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -72,11 +72,11 @@ void CUDARecurrentNetworkExecutor::_ExecRange(int from, int to) {
|
|||
if (gpu_id == -1 &&
|
||||
rnn_op.op->device_option().device_type() ==
|
||||
DeviceTypeProto::PROTO_CUDA) {
|
||||
gpu_id = rnn_op.op->device_option().device_id();
|
||||
gpu_id = rnn_op.op->device_option().cuda_gpu_id();
|
||||
} else {
|
||||
CAFFE_ENFORCE(
|
||||
rnn_op.op->device_option().device_type() == 0 ||
|
||||
rnn_op.op->device_option().device_id() == gpu_id,
|
||||
rnn_op.op->device_option().cuda_gpu_id() == gpu_id,
|
||||
"RNN Executor only supports ops on one GPU");
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -183,7 +183,7 @@ message DeviceOption {
|
|||
// optional DeviceType device_type = 1 [ default = CPU ];
|
||||
optional int32 device_type = 1 [ default = 0 ]; // 0 is CPU.
|
||||
// [CUDA specific] the cuda gpu id.
|
||||
optional int32 device_id = 2;
|
||||
optional int32 cuda_gpu_id = 2;
|
||||
// [general] The random seed to start the device random number generator with.
|
||||
optional uint32 random_seed = 3;
|
||||
// [general] What node this op should execute on.
|
||||
|
|
|
|||
|
|
@ -86,12 +86,54 @@ inline CAFFE2_API caffe2::DeviceOption DeviceToOption(
|
|||
caffe2::DeviceOption option;
|
||||
auto type = device.type();
|
||||
option.set_device_type(TypeToProto(type));
|
||||
option.set_device_id(device.index());
|
||||
|
||||
switch (type) {
|
||||
case DeviceType::CPU:
|
||||
if (device.index() != -1) {
|
||||
option.set_numa_node_id(device.index());
|
||||
}
|
||||
break;
|
||||
case DeviceType::CUDA:
|
||||
option.set_cuda_gpu_id(device.index());
|
||||
break;
|
||||
case DeviceType::HIP:
|
||||
option.set_hip_gpu_id(device.index());
|
||||
break;
|
||||
case DeviceType::OPENGL:
|
||||
case DeviceType::OPENCL:
|
||||
case DeviceType::MKLDNN:
|
||||
case DeviceType::IDEEP:
|
||||
case DeviceType::COMPILE_TIME_MAX_DEVICE_TYPES:
|
||||
case DeviceType::ONLY_FOR_TEST:
|
||||
break;
|
||||
default:
|
||||
AT_ERROR(
|
||||
"Unknown device:",
|
||||
static_cast<int32_t>(type),
|
||||
". If you have recently updated the caffe2.proto file to add a new "
|
||||
"device type, did you forget to update the ProtoToType() and TypeToProto"
|
||||
"function to reflect such recent changes?");
|
||||
}
|
||||
return option;
|
||||
}
|
||||
|
||||
inline CAFFE2_API at::Device OptionToDevice(const caffe2::DeviceOption option) {
|
||||
return at::Device(ProtoToType(option.device_type()), option.device_id());
|
||||
auto type = option.device_type();
|
||||
int32_t id = -1;
|
||||
switch (type) {
|
||||
case caffe2::PROTO_CPU:
|
||||
if (option.has_numa_node_id()) {
|
||||
id = option.numa_node_id();
|
||||
}
|
||||
break;
|
||||
case caffe2::PROTO_CUDA:
|
||||
id = option.cuda_gpu_id();
|
||||
break;
|
||||
case caffe2::PROTO_HIP:
|
||||
id = option.hip_gpu_id();
|
||||
break;
|
||||
}
|
||||
return at::Device(ProtoToType(type), id);
|
||||
}
|
||||
|
||||
} // namespace caffe2
|
||||
|
|
|
|||
|
|
@ -236,5 +236,5 @@ class CNNModelHelper(ModelHelper):
|
|||
def GPU(self, gpu_id=0):
|
||||
device_option = caffe2_pb2.DeviceOption()
|
||||
device_option.device_type = caffe2_pb2.CUDA
|
||||
device_option.device_id = gpu_id
|
||||
device_option.cuda_gpu_id = gpu_id
|
||||
return device_option
|
||||
|
|
|
|||
|
|
@ -84,7 +84,7 @@ def IsOperatorWithEngine(op_type, engine):
|
|||
|
||||
def DeviceOption(
|
||||
device_type,
|
||||
device_id=0,
|
||||
cuda_gpu_id=0,
|
||||
random_seed=None,
|
||||
node_name=None,
|
||||
numa_node_id=None,
|
||||
|
|
@ -92,7 +92,7 @@ def DeviceOption(
|
|||
):
|
||||
option = caffe2_pb2.DeviceOption()
|
||||
option.device_type = device_type
|
||||
option.device_id = device_id
|
||||
option.cuda_gpu_id = cuda_gpu_id
|
||||
if node_name is not None:
|
||||
option.node_name = node_name
|
||||
if random_seed is not None:
|
||||
|
|
@ -115,7 +115,7 @@ def device_option_equal(opt1, opt2, ignore_node_name=True, ignore_random_seed=Tr
|
|||
if not opt1.device_type or not opt2.device_type:
|
||||
# At least one option is for CPU, check if both are for CPU.
|
||||
return not opt1.device_type and not opt2.device_type
|
||||
return opt1.device_id == opt2.device_id
|
||||
return opt1.cuda_gpu_id == opt2.cuda_gpu_id
|
||||
|
||||
|
||||
def InferBlobDevices(net):
|
||||
|
|
@ -2111,7 +2111,7 @@ class Net(object):
|
|||
"""A convenient function to run everything on the GPU."""
|
||||
device_option = caffe2_pb2.DeviceOption()
|
||||
device_option.device_type = caffe2_pb2.CUDA
|
||||
device_option.device_id = gpu_id
|
||||
device_option.cuda_gpu_id = gpu_id
|
||||
self._net.device_option.CopyFrom(device_option)
|
||||
if use_cudnn:
|
||||
for op in self._net.op:
|
||||
|
|
@ -2286,7 +2286,7 @@ def copy_func_between_devices(src, dst):
|
|||
return None
|
||||
|
||||
if src.device_type == CUDA and dst.device_type == CUDA:
|
||||
if src.device_id == dst.device_id:
|
||||
if src.cuda_gpu_id == dst.cuda_gpu_id:
|
||||
return None
|
||||
else:
|
||||
def fun(net, *args, **kw):
|
||||
|
|
@ -2312,10 +2312,10 @@ def copy_func_between_devices(src, dst):
|
|||
def device_equal(src, dst):
|
||||
'''
|
||||
We are using this fucntion instead of == operator because optional-value
|
||||
comparison between empty device_options and {device_type:0, device_id:0}
|
||||
comparison between empty device_options and {device_type:0, cuda_gpu_id:0}
|
||||
returns not equal in some cases.
|
||||
'''
|
||||
return src.device_type == dst.device_type and src.device_id == dst.device_id
|
||||
return src.device_type == dst.device_type and src.cuda_gpu_id == dst.cuda_gpu_id
|
||||
|
||||
|
||||
def update_placeholder_op_output(op, blob_to_device):
|
||||
|
|
@ -2429,7 +2429,7 @@ def InjectCrossDeviceCopies(net, blob_to_device=None, blob_remap=None,
|
|||
if device_option.device_type == CPU:
|
||||
suffix = '_cpu'
|
||||
elif device_option.device_type == CUDA:
|
||||
suffix = '_cuda_' + str(device_option.device_id)
|
||||
suffix = '_cuda_' + str(device_option.cuda_gpu_id)
|
||||
else:
|
||||
raise RuntimeError(
|
||||
"Unknown device type: {}".
|
||||
|
|
|
|||
|
|
@ -83,17 +83,17 @@ class TestScopes(test_util.TestCase):
|
|||
# explicitly setting a device
|
||||
device_option = caffe2_pb2.DeviceOption()
|
||||
device_option.device_type = caffe2_pb2.CUDA
|
||||
device_option.device_id = 1
|
||||
device_option.cuda_gpu_id = 1
|
||||
op = core.CreateOperator("Relu", "x", "y", device_option=device_option)
|
||||
self.assertTrue(op.HasField('device_option'))
|
||||
self.assertEqual(op.device_option.device_type, caffe2_pb2.CUDA)
|
||||
self.assertEqual(op.device_option.device_id, 1)
|
||||
self.assertEqual(op.device_option.cuda_gpu_id, 1)
|
||||
with core.DeviceScope(device_option):
|
||||
# from device scope
|
||||
op = core.CreateOperator("Relu", "x", "y")
|
||||
self.assertTrue(op.HasField('device_option'))
|
||||
self.assertEqual(op.device_option.device_type, caffe2_pb2.CUDA)
|
||||
self.assertEqual(op.device_option.device_id, 1)
|
||||
self.assertEqual(op.device_option.cuda_gpu_id, 1)
|
||||
# from an overridden device option
|
||||
override_device = caffe2_pb2.DeviceOption()
|
||||
override_device.device_type = caffe2_pb2.CPU
|
||||
|
|
@ -109,13 +109,13 @@ class TestScopes(test_util.TestCase):
|
|||
def testNameAndDeviceScopeTogether(self):
|
||||
device_option = caffe2_pb2.DeviceOption()
|
||||
device_option.device_type = caffe2_pb2.CUDA
|
||||
device_option.device_id = 1
|
||||
device_option.cuda_gpu_id = 1
|
||||
with core.DeviceScope(device_option):
|
||||
with core.NameScope("foo"):
|
||||
op = core.CreateOperator("Relu", "x", "y")
|
||||
self.assertTrue(op.HasField('device_option'))
|
||||
self.assertEqual(op.device_option.device_type, caffe2_pb2.CUDA)
|
||||
self.assertEqual(op.device_option.device_id, 1)
|
||||
self.assertEqual(op.device_option.cuda_gpu_id, 1)
|
||||
self.assertEqual(len(op.input), 1)
|
||||
self.assertEqual(op.input[0], "foo/x")
|
||||
self.assertEqual(len(op.output), 1)
|
||||
|
|
@ -255,7 +255,7 @@ class TestCreateOperator(test_util.TestCase):
|
|||
def testCreate(self):
|
||||
device_option = caffe2_pb2.DeviceOption()
|
||||
device_option.device_type = caffe2_pb2.CUDA
|
||||
device_option.device_id = 1
|
||||
device_option.cuda_gpu_id = 1
|
||||
op = core.CreateOperator(
|
||||
"Ludicrous", "x", "y", name="ludicrous",
|
||||
control_input="z", device_option=device_option,
|
||||
|
|
@ -271,7 +271,7 @@ class TestCreateOperator(test_util.TestCase):
|
|||
self.assertEqual(op.control_input[0], "z")
|
||||
self.assertTrue(op.HasField('device_option'))
|
||||
self.assertEqual(op.device_option.device_type, caffe2_pb2.CUDA)
|
||||
self.assertEqual(op.device_option.device_id, 1)
|
||||
self.assertEqual(op.device_option.cuda_gpu_id, 1)
|
||||
self.assertTrue(len(op.arg), 3)
|
||||
|
||||
# can't guarantee ordering of kwargs, so generate a set of args
|
||||
|
|
@ -574,7 +574,7 @@ class TestDeviceOption(test_util.TestCase):
|
|||
opt2 = caffe2_pb2.DeviceOption()
|
||||
opt1.device_type = 0
|
||||
self.assertTrue(core.device_option_equal(opt1, opt2))
|
||||
opt1.device_id = 5
|
||||
opt1.cuda_gpu_id = 5
|
||||
# opt1 still is on CPU, so the options should be equal
|
||||
self.assertTrue(core.device_option_equal(opt1, opt2))
|
||||
opt2.device_type = 0
|
||||
|
|
@ -649,7 +649,7 @@ class TestInferDevice(test_util.TestCase):
|
|||
def setUp(self):
|
||||
device_option = caffe2_pb2.DeviceOption()
|
||||
device_option.device_type = caffe2_pb2.CUDA
|
||||
device_option.device_id = 1
|
||||
device_option.cuda_gpu_id = 1
|
||||
self.cuda_option = device_option
|
||||
self.cpu_option = caffe2_pb2.DeviceOption()
|
||||
|
||||
|
|
@ -748,7 +748,7 @@ class TestInferDevice(test_util.TestCase):
|
|||
init_net = core.Net("init")
|
||||
device_option = caffe2_pb2.DeviceOption()
|
||||
device_option.device_type = caffe2_pb2.CUDA
|
||||
device_option.device_id = 1
|
||||
device_option.cuda_gpu_id = 1
|
||||
weight = init_net.XavierFill([], 'fc_w', shape=[10, 100])
|
||||
bias = init_net.ConstantFill([], 'fc_b', shape=[10, ])
|
||||
|
||||
|
|
@ -765,7 +765,7 @@ class TestInferDevice(test_util.TestCase):
|
|||
self.assertEqual(op.input[1], "fc_w_cuda_1")
|
||||
self.assertEqual(op.input[2], "fc_b_cuda_1")
|
||||
self.assertEqual(op.device_option.device_type, 1)
|
||||
self.assertEqual(op.device_option.device_id, 1)
|
||||
self.assertEqual(op.device_option.cuda_gpu_id, 1)
|
||||
self.assertEqual(new_net._net.op[-2].type, "CopyCPUToGPU")
|
||||
self.assertEqual(new_net._net.op[0].type, "CopyCPUToGPU")
|
||||
self.assertNotEqual(blob_to_device["fc_w"], device_option)
|
||||
|
|
@ -775,7 +775,7 @@ class TestInferDevice(test_util.TestCase):
|
|||
init_net = core.Net("init")
|
||||
device_option = caffe2_pb2.DeviceOption()
|
||||
device_option.device_type = caffe2_pb2.CUDA
|
||||
device_option.device_id = 1
|
||||
device_option.cuda_gpu_id = 1
|
||||
weight = init_net.XavierFill([], 'fc_w', shape=[10, 100])
|
||||
bias = init_net.ConstantFill([], 'fc_b', shape=[10, ])
|
||||
const = init_net.ConstantFill([], 'const', shape=[], value=1.)
|
||||
|
|
@ -791,12 +791,12 @@ class TestInferDevice(test_util.TestCase):
|
|||
op = nets[1]._net.op[0]
|
||||
self.assertEqual(op.type, "CopyCPUToGPU")
|
||||
self.assertEqual(op.device_option.device_type, 1)
|
||||
self.assertEqual(op.device_option.device_id, 1)
|
||||
self.assertEqual(op.device_option.cuda_gpu_id, 1)
|
||||
self.assertEqual(op.output[0], "fc_w_cuda_1")
|
||||
op = nets[1]._net.op[1]
|
||||
self.assertEqual(op.type, "CopyCPUToGPU")
|
||||
self.assertEqual(op.device_option.device_type, 1)
|
||||
self.assertEqual(op.device_option.device_id, 1)
|
||||
self.assertEqual(op.device_option.cuda_gpu_id, 1)
|
||||
self.assertEqual(op.output[0], "fc_b_cuda_1")
|
||||
op = nets[1]._net.op[2]
|
||||
self.assertEqual(op.type, "FC")
|
||||
|
|
@ -804,7 +804,7 @@ class TestInferDevice(test_util.TestCase):
|
|||
self.assertEqual(op.input[1], "fc_w_cuda_1")
|
||||
self.assertEqual(op.input[2], "fc_b_cuda_1")
|
||||
self.assertEqual(op.device_option.device_type, 1)
|
||||
self.assertEqual(op.device_option.device_id, 1)
|
||||
self.assertEqual(op.device_option.cuda_gpu_id, 1)
|
||||
op = nets[1]._net.op[3]
|
||||
self.assertEqual(op.type, "Add")
|
||||
self.assertEqual(op.input[0], "fc1")
|
||||
|
|
@ -822,7 +822,7 @@ op {
|
|||
type: "CopyCPUToGPU"
|
||||
device_option {
|
||||
device_type: 1
|
||||
device_id: 1
|
||||
cuda_gpu_id: 1
|
||||
}
|
||||
}
|
||||
op {
|
||||
|
|
@ -832,7 +832,7 @@ op {
|
|||
type: "CopyCPUToGPU"
|
||||
device_option {
|
||||
device_type: 1
|
||||
device_id: 1
|
||||
cuda_gpu_id: 1
|
||||
}
|
||||
}
|
||||
op {
|
||||
|
|
@ -844,7 +844,7 @@ op {
|
|||
type: "FC"
|
||||
device_option {
|
||||
device_type: 1
|
||||
device_id: 1
|
||||
cuda_gpu_id: 1
|
||||
}
|
||||
}
|
||||
op {
|
||||
|
|
@ -855,7 +855,7 @@ op {
|
|||
type: "Add"
|
||||
device_option {
|
||||
device_type: 1
|
||||
device_id: 1
|
||||
cuda_gpu_id: 1
|
||||
}
|
||||
}
|
||||
external_input: "data"
|
||||
|
|
@ -870,7 +870,7 @@ external_input: "const_cuda_1"
|
|||
init_net = core.Net("init")
|
||||
device_option = caffe2_pb2.DeviceOption()
|
||||
device_option.device_type = caffe2_pb2.CUDA
|
||||
device_option.device_id = 1
|
||||
device_option.cuda_gpu_id = 1
|
||||
|
||||
with core.DeviceScope(device_option):
|
||||
weight = init_net.XavierFill([], 'fc_w', shape=[10, 100])
|
||||
|
|
@ -887,7 +887,7 @@ external_input: "const_cuda_1"
|
|||
self.assertEqual(op.input[1], "fc_w")
|
||||
self.assertEqual(op.input[2], "fc_b")
|
||||
self.assertEqual(op.device_option.device_type, 1)
|
||||
self.assertEqual(op.device_option.device_id, 1)
|
||||
self.assertEqual(op.device_option.cuda_gpu_id, 1)
|
||||
"""
|
||||
For reference, net.Proto() should be like:
|
||||
name: ""
|
||||
|
|
@ -900,7 +900,7 @@ op {
|
|||
type: "FC"
|
||||
device_option {
|
||||
device_type: 1
|
||||
device_id: 1
|
||||
cuda_gpu_id: 1
|
||||
}
|
||||
}
|
||||
external_input: "data"
|
||||
|
|
@ -912,7 +912,7 @@ external_input: "fc_b"
|
|||
net = core.Net("test")
|
||||
device_option = caffe2_pb2.DeviceOption()
|
||||
device_option.device_type = caffe2_pb2.CUDA
|
||||
device_option.device_id = 1
|
||||
device_option.cuda_gpu_id = 1
|
||||
|
||||
with core.DeviceScope(device_option):
|
||||
net.Relu("data", "relu1")
|
||||
|
|
@ -920,10 +920,10 @@ external_input: "fc_b"
|
|||
with core.DeviceScope(device_option):
|
||||
net.Relu("data", "relu3")
|
||||
net.Relu("data", "relu4")
|
||||
device_option.device_id = 0
|
||||
device_option.cuda_gpu_id = 0
|
||||
with core.DeviceScope(device_option):
|
||||
net.Relu("data", "relu5")
|
||||
device_option.device_id = 1
|
||||
device_option.cuda_gpu_id = 1
|
||||
with core.DeviceScope(device_option):
|
||||
net.Relu("data", "relu6")
|
||||
|
||||
|
|
@ -931,12 +931,12 @@ external_input: "fc_b"
|
|||
op = new_net._net.op[0]
|
||||
self.assertEqual(op.type, "CopyCPUToGPU")
|
||||
self.assertEqual(op.device_option.device_type, 1)
|
||||
self.assertEqual(op.device_option.device_id, 1)
|
||||
self.assertEqual(op.device_option.cuda_gpu_id, 1)
|
||||
self.assertEqual(op.output[0], "data_cuda_1")
|
||||
op = new_net._net.op[1]
|
||||
self.assertEqual(op.type, "Relu")
|
||||
self.assertEqual(op.device_option.device_type, 1)
|
||||
self.assertEqual(op.device_option.device_id, 1)
|
||||
self.assertEqual(op.device_option.cuda_gpu_id, 1)
|
||||
self.assertEqual(op.output[0], "relu1")
|
||||
op = new_net._net.op[2]
|
||||
self.assertEqual(op.type, "Relu")
|
||||
|
|
@ -945,7 +945,7 @@ external_input: "fc_b"
|
|||
op = new_net._net.op[3]
|
||||
self.assertEqual(op.type, "Relu")
|
||||
self.assertEqual(op.device_option.device_type, 1)
|
||||
self.assertEqual(op.device_option.device_id, 1)
|
||||
self.assertEqual(op.device_option.cuda_gpu_id, 1)
|
||||
self.assertEqual(op.input[0], "data_cuda_1")
|
||||
self.assertEqual(op.output[0], "relu3")
|
||||
op = new_net._net.op[4]
|
||||
|
|
@ -955,18 +955,18 @@ external_input: "fc_b"
|
|||
op = new_net._net.op[5]
|
||||
self.assertEqual(op.type, "CopyCPUToGPU")
|
||||
self.assertEqual(op.device_option.device_type, 1)
|
||||
self.assertEqual(op.device_option.device_id, 0)
|
||||
self.assertEqual(op.device_option.cuda_gpu_id, 0)
|
||||
self.assertEqual(op.output[0], "data_cuda_0")
|
||||
op = new_net._net.op[6]
|
||||
self.assertEqual(op.type, "Relu")
|
||||
self.assertEqual(op.device_option.device_type, 1)
|
||||
self.assertEqual(op.device_option.device_id, 0)
|
||||
self.assertEqual(op.device_option.cuda_gpu_id, 0)
|
||||
self.assertEqual(op.input[0], "data_cuda_0")
|
||||
self.assertEqual(op.output[0], "relu5")
|
||||
op = new_net._net.op[7]
|
||||
self.assertEqual(op.type, "Relu")
|
||||
self.assertEqual(op.device_option.device_type, 1)
|
||||
self.assertEqual(op.device_option.device_id, 1)
|
||||
self.assertEqual(op.device_option.cuda_gpu_id, 1)
|
||||
self.assertEqual(op.input[0], "data_cuda_1")
|
||||
self.assertEqual(op.output[0], "relu6")
|
||||
"""
|
||||
|
|
@ -979,7 +979,7 @@ op {
|
|||
type: "CopyCPUToGPU"
|
||||
device_option {
|
||||
device_type: 1
|
||||
device_id: 1
|
||||
cuda_gpu_id: 1
|
||||
}
|
||||
}
|
||||
op {
|
||||
|
|
@ -989,7 +989,7 @@ op {
|
|||
type: "Relu"
|
||||
device_option {
|
||||
device_type: 1
|
||||
device_id: 1
|
||||
cuda_gpu_id: 1
|
||||
}
|
||||
}
|
||||
op {
|
||||
|
|
@ -1005,7 +1005,7 @@ op {
|
|||
type: "Relu"
|
||||
device_option {
|
||||
device_type: 1
|
||||
device_id: 1
|
||||
cuda_gpu_id: 1
|
||||
}
|
||||
}
|
||||
op {
|
||||
|
|
@ -1021,7 +1021,7 @@ op {
|
|||
type: "CopyCPUToGPU"
|
||||
device_option {
|
||||
device_type: 1
|
||||
device_id: 0
|
||||
cuda_gpu_id: 0
|
||||
}
|
||||
}
|
||||
op {
|
||||
|
|
@ -1031,7 +1031,7 @@ op {
|
|||
type: "Relu"
|
||||
device_option {
|
||||
device_type: 1
|
||||
device_id: 0
|
||||
cuda_gpu_id: 0
|
||||
}
|
||||
}
|
||||
op {
|
||||
|
|
@ -1041,7 +1041,7 @@ op {
|
|||
type: "Relu"
|
||||
device_option {
|
||||
device_type: 1
|
||||
device_id: 1
|
||||
cuda_gpu_id: 1
|
||||
}
|
||||
}
|
||||
external_input: "data"
|
||||
|
|
@ -1060,7 +1060,7 @@ external_input: "data"
|
|||
cpu_device[i].node_name = 'node:' + str(i)
|
||||
gpu_device.append(caffe2_pb2.DeviceOption())
|
||||
gpu_device[i].device_type = caffe2_pb2.CUDA
|
||||
gpu_device[i].device_id = 0
|
||||
gpu_device[i].cuda_gpu_id = 0
|
||||
gpu_device[i].node_name = 'node:' + str(i)
|
||||
send_node = 'node:0'
|
||||
recv_node = 'node:1'
|
||||
|
|
@ -1100,12 +1100,12 @@ external_input: "data"
|
|||
op = init_net._net.op[2]
|
||||
self.assertEqual(op.type, "CopyGPUToCPU")
|
||||
self.assertEqual(op.device_option.device_type, 1)
|
||||
self.assertEqual(op.device_option.device_id, 0)
|
||||
self.assertEqual(op.device_option.cuda_gpu_id, 0)
|
||||
self.assertEqual(op.output[0], "fc_w_cpu")
|
||||
op = init_net._net.op[3]
|
||||
self.assertEqual(op.type, "CopyGPUToCPU")
|
||||
self.assertEqual(op.device_option.device_type, 1)
|
||||
self.assertEqual(op.device_option.device_id, 0)
|
||||
self.assertEqual(op.device_option.cuda_gpu_id, 0)
|
||||
self.assertEqual(op.output[0], "fc_b_cpu")
|
||||
op = init_net._net.op[4]
|
||||
self.assertEqual(op.type, placeholder_send)
|
||||
|
|
@ -1128,7 +1128,7 @@ external_input: "data"
|
|||
net = core.Net("test")
|
||||
device_option = caffe2_pb2.DeviceOption()
|
||||
device_option.device_type = caffe2_pb2.CUDA
|
||||
device_option.device_id = 1
|
||||
device_option.cuda_gpu_id = 1
|
||||
|
||||
net.Adagrad(['param', 'moment', 'grad', 'lr'], ['param', 'moment'])
|
||||
with core.DeviceScope(device_option):
|
||||
|
|
|
|||
|
|
@ -813,7 +813,7 @@ def ConvertNetForDevice(net, device=None):
|
|||
|
||||
device_prefix = "gpu" if device.device_type == caffe2_pb2.CUDA else "cpu"
|
||||
|
||||
namescope = "{}_{}/".format(device_prefix, device.device_id)
|
||||
namescope = "{}_{}/".format(device_prefix, device.cuda_gpu_id)
|
||||
for op in mnet.Proto().op:
|
||||
if "RecurrentNetwork" in op.type:
|
||||
raise("RecurrentNetwork conversion not yet supported")
|
||||
|
|
@ -1540,7 +1540,7 @@ def _AnalyzeOperators(model):
|
|||
continue
|
||||
|
||||
op_dev = op.device_option
|
||||
op_gpu = op_dev.device_id
|
||||
op_gpu = op_dev.cuda_gpu_id
|
||||
|
||||
# This avoids failing on operators that are only for CPU
|
||||
if op_dev.device_type != caffe2_pb2.CUDA:
|
||||
|
|
@ -1904,7 +1904,7 @@ def _InterleaveOps(model):
|
|||
new_ops = []
|
||||
ops = {d: [] for d in range(num_devices)}
|
||||
for op in orig_ops:
|
||||
ops[op.device_option.device_id].append(op)
|
||||
ops[op.device_option.cuda_gpu_id].append(op)
|
||||
|
||||
for j in range(num_ops_per_dev):
|
||||
tp = None
|
||||
|
|
|
|||
|
|
@ -259,7 +259,7 @@ device_options = _device_options_no_hip + ([hip_do] if workspace.has_hip_support
|
|||
|
||||
# Include device option for each GPU
|
||||
expanded_device_options = [cpu_do] + (
|
||||
[caffe2_pb2.DeviceOption(device_type=caffe2_pb2.CUDA, device_id=i)
|
||||
[caffe2_pb2.DeviceOption(device_type=caffe2_pb2.CUDA, cuda_gpu_id=i)
|
||||
for i in range(workspace.NumCudaDevices())]
|
||||
if workspace.has_gpu_support else [])
|
||||
|
||||
|
|
|
|||
|
|
@ -596,7 +596,7 @@ def ExtractPredictorNet(
|
|||
rename_list(step_op.output)
|
||||
if device is not None:
|
||||
step_op.device_option.device_type = device.device_type
|
||||
step_op.device_option.device_id = device.device_id
|
||||
step_op.device_option.cuda_gpu_id = device.cuda_gpu_id
|
||||
|
||||
rename_list(arg.n.external_input)
|
||||
rename_list(arg.n.external_output)
|
||||
|
|
@ -610,7 +610,7 @@ def ExtractPredictorNet(
|
|||
|
||||
if device is not None:
|
||||
op.device_option.device_type = device.device_type
|
||||
op.device_option.device_id = device.device_id
|
||||
op.device_option.cuda_gpu_id = device.cuda_gpu_id
|
||||
validate_op(op)
|
||||
predict_proto.op.extend([op])
|
||||
known_blobs.update(op.output)
|
||||
|
|
|
|||
|
|
@ -26,7 +26,7 @@ def OnGPU(gpu_id):
|
|||
"""
|
||||
device_option = caffe2_pb2.DeviceOption()
|
||||
device_option.device_type = caffe2_pb2.CUDA
|
||||
device_option.device_id = gpu_id
|
||||
device_option.cuda_gpu_id = gpu_id
|
||||
return device_option
|
||||
|
||||
|
||||
|
|
|
|||
|
|
@ -268,11 +268,11 @@ def call(op, inputs=None, outputs=None, factor_prefixes=False):
|
|||
|
||||
def format_device_option(dev_opt):
|
||||
if not dev_opt or not (
|
||||
dev_opt.device_type or dev_opt.device_id or dev_opt.node_name):
|
||||
dev_opt.device_type or dev_opt.cuda_gpu_id or dev_opt.node_name):
|
||||
return None
|
||||
return call(
|
||||
'DeviceOption',
|
||||
[dev_opt.device_type, dev_opt.device_id, "'%s'" % dev_opt.node_name])
|
||||
[dev_opt.device_type, dev_opt.cuda_gpu_id, "'%s'" % dev_opt.node_name])
|
||||
|
||||
|
||||
@Printer.register(OperatorDef)
|
||||
|
|
|
|||
|
|
@ -27,7 +27,7 @@ def build_test_net(net_name):
|
|||
|
||||
gpu_device_option = caffe2_pb2.DeviceOption()
|
||||
gpu_device_option.device_type = caffe2_pb2.CUDA
|
||||
gpu_device_option.device_id = 0
|
||||
gpu_device_option.cuda_gpu_id = 0
|
||||
|
||||
net.CopyCPUToGPU("output_blob_0", "output_blob_0_gpu",
|
||||
device_option=gpu_device_option)
|
||||
|
|
|
|||
|
|
@ -24,7 +24,7 @@ class Caffe2Rep(BackendRep):
|
|||
@property
|
||||
def _name_scope(self):
|
||||
if self.predict_net.device_option.device_type == caffe2_pb2.CUDA:
|
||||
return 'gpu_{}'.format(self.predict_net.device_option.device_id)
|
||||
return 'gpu_{}'.format(self.predict_net.device_option.cuda_gpu_id)
|
||||
return ''
|
||||
|
||||
def run(self, inputs, **kwargs):
|
||||
|
|
|
|||
|
|
@ -91,7 +91,7 @@ class TestLoadSaveBase(test_util.TestCase):
|
|||
self.assertEqual(proto.tensor.device_detail.device_type,
|
||||
device_type)
|
||||
if device_type == caffe2_pb2.CUDA:
|
||||
self.assertEqual(proto.tensor.device_detail.device_id,
|
||||
self.assertEqual(proto.tensor.device_detail.cuda_gpu_id,
|
||||
gpu_id)
|
||||
|
||||
blobs = [str(i) for i in range(len(arrays))]
|
||||
|
|
|
|||
|
|
@ -1216,7 +1216,7 @@ class RNNCellTest(hu.HypothesisTestCase):
|
|||
if arg.name == "step_net":
|
||||
for step_op in arg.n.op:
|
||||
self.assertEqual(0, step_op.device_option.device_type)
|
||||
self.assertEqual(1, step_op.device_option.device_id)
|
||||
self.assertEqual(1, step_op.device_option.cuda_gpu_id)
|
||||
elif arg.name == 'backward_step_net':
|
||||
self.assertEqual(caffe2_pb2.NetDef(), arg.n)
|
||||
|
||||
|
|
|
|||
|
|
@ -83,7 +83,7 @@ class Optimizer(object):
|
|||
|
||||
if current_scope.device_type == caffe2_pb2.CUDA:
|
||||
return self.get_gpu_blob_name(
|
||||
base_str, current_scope.device_id, current_scope.node_name
|
||||
base_str, current_scope.cuda_gpu_id, current_scope.node_name
|
||||
)
|
||||
else:
|
||||
return self.get_cpu_blob_name(base_str, current_scope.node_name)
|
||||
|
|
@ -279,7 +279,7 @@ class SgdOptimizer(Optimizer):
|
|||
# to include device information.
|
||||
ONE = param_init_net.ConstantFill(
|
||||
[],
|
||||
"ONE_{}_{}{}".format(dev.device_type, dev.device_id, dev.node_name),
|
||||
"ONE_{}_{}{}".format(dev.device_type, dev.cuda_gpu_id, dev.node_name),
|
||||
shape=[1],
|
||||
value=1.0
|
||||
)
|
||||
|
|
@ -488,12 +488,12 @@ class WeightDecayBuilder(Optimizer):
|
|||
|
||||
ONE = param_init_net.ConstantFill(
|
||||
[],
|
||||
"ONE_{}_{}".format(dev.device_type, dev.device_id),
|
||||
"ONE_{}_{}".format(dev.device_type, dev.cuda_gpu_id),
|
||||
shape=[1],
|
||||
value=1.0
|
||||
)
|
||||
WD = param_init_net.ConstantFill(
|
||||
[], "wd_{}_{}".format(dev.device_type, dev.device_id),
|
||||
[], "wd_{}_{}".format(dev.device_type, dev.cuda_gpu_id),
|
||||
shape=[1], value=self.weight_decay
|
||||
)
|
||||
|
||||
|
|
@ -1160,7 +1160,7 @@ class RmsPropOptimizer(Optimizer):
|
|||
|
||||
ONE = param_init_net.ConstantFill(
|
||||
[],
|
||||
"ONE_{}_{}".format(dev.device_type, dev.device_id),
|
||||
"ONE_{}_{}".format(dev.device_type, dev.cuda_gpu_id),
|
||||
shape=[1],
|
||||
value=1.0
|
||||
)
|
||||
|
|
|
|||
|
|
@ -193,7 +193,7 @@ class PredictorExporterTest(unittest.TestCase):
|
|||
|
||||
# check device options
|
||||
for op in list(init_net.Proto().op) + list(predict_init_net.Proto().op):
|
||||
self.assertEqual(1, op.device_option.device_id)
|
||||
self.assertEqual(1, op.device_option.cuda_gpu_id)
|
||||
self.assertEqual(caffe2_pb2.CPU, op.device_option.device_type)
|
||||
|
||||
def test_db_fails_without_params(self):
|
||||
|
|
|
|||
|
|
@ -34,7 +34,7 @@ class DLPackWrapper {
|
|||
"Unsupported device type: ",
|
||||
device_option.device_type());
|
||||
tensor_context.device_type = *device_type_ptr;
|
||||
tensor_context.device_id = device_option.device_id();
|
||||
tensor_context.device_id = device_option.cuda_gpu_id();
|
||||
|
||||
if (tensor->size() <= 0) {
|
||||
tensor->Resize(0);
|
||||
|
|
@ -87,7 +87,7 @@ class DLPackWrapper {
|
|||
int dlpack_device_id = dlTensor->ctx.device_id;
|
||||
CAFFE_ENFORCE_EQ(
|
||||
dlpack_device_id,
|
||||
device_option.device_id(),
|
||||
device_option.cuda_gpu_id(),
|
||||
"Expected same device id for DLPack and C2 tensors");
|
||||
|
||||
std::vector<int64_t> dims;
|
||||
|
|
|
|||
|
|
@ -30,7 +30,7 @@ C10_EXPORT int DeviceId(const DeviceOption& option) {
|
|||
case PROTO_CPU:
|
||||
return option.numa_node_id();
|
||||
case PROTO_CUDA:
|
||||
return option.device_id();
|
||||
return option.cuda_gpu_id();
|
||||
case PROTO_MKLDNN:
|
||||
return option.numa_node_id();
|
||||
case PROTO_HIP:
|
||||
|
|
@ -43,7 +43,7 @@ C10_EXPORT int DeviceId(const DeviceOption& option) {
|
|||
C10_EXPORT bool IsSameDevice(const DeviceOption& lhs, const DeviceOption& rhs) {
|
||||
return (
|
||||
lhs.device_type() == rhs.device_type() &&
|
||||
lhs.device_id() == rhs.device_id() &&
|
||||
lhs.cuda_gpu_id() == rhs.cuda_gpu_id() &&
|
||||
lhs.hip_gpu_id() == rhs.hip_gpu_id() &&
|
||||
lhs.node_name() == rhs.node_name() &&
|
||||
lhs.numa_node_id() == rhs.numa_node_id());
|
||||
|
|
|
|||
|
|
@ -11,9 +11,9 @@ TEST(ProtoUtilsTest, IsSameDevice) {
|
|||
EXPECT_FALSE(IsSameDevice(a, b));
|
||||
b.set_node_name("my_node");
|
||||
EXPECT_TRUE(IsSameDevice(a, b));
|
||||
b.set_device_id(2);
|
||||
b.set_cuda_gpu_id(2);
|
||||
EXPECT_FALSE(IsSameDevice(a, b));
|
||||
a.set_device_id(2);
|
||||
a.set_cuda_gpu_id(2);
|
||||
EXPECT_TRUE(IsSameDevice(a, b));
|
||||
a.set_device_type(DeviceTypeProto::PROTO_CUDA);
|
||||
b.set_device_type(DeviceTypeProto::PROTO_CPU);
|
||||
|
|
|
|||
|
|
@ -2216,7 +2216,7 @@ CAFFE2_SPECIFIC_MAPPINGS = {
|
|||
"CURAND_ENFORCE" :("HIPRAND_ENFORCE", API_CAFFE2),
|
||||
"curandGenerateUniform" : ("hiprandGenerateUniform", API_CAFFE2),
|
||||
"curand_generator" : ("hiprand_generator", API_CAFFE2),
|
||||
"device_id" : ("hip_gpu_id", API_CAFFE2),
|
||||
"cuda_gpu_id" : ("hip_gpu_id", API_CAFFE2),
|
||||
"CaffeCudaGetDevice" : ("CaffeHipGetDevice", API_CAFFE2),
|
||||
}
|
||||
|
||||
|
|
|
|||
Loading…
Reference in a new issue