From ff608a9ff3edded33764c8631427e92c7288bafb Mon Sep 17 00:00:00 2001 From: Junjie Bai Date: Mon, 1 Oct 2018 21:44:08 -0700 Subject: [PATCH] Back out "Revert D10123245: Back out "codemod cuda_gpu_id to device_id"" (#12232) Summary: Pull Request resolved: https://github.com/pytorch/pytorch/pull/12232 Original commit changeset: fca91fea58b7 This adds proper modifications to the DeviceType <->DeviceOption conversion code added in D10033396 Reviewed By: jerryzh168 Differential Revision: D10132473 fbshipit-source-id: 801ef777e2950982cb47b48051b1471a0a91e64b --- caffe2/contrib/nccl/cuda_nccl_op_gpu.cc | 4 +- caffe2/contrib/nccl/nccl_ops_test.py | 2 +- caffe2/contrib/prof/prof_dag_net.cc | 4 +- .../tensorboard/tensorboard_exporter.py | 2 +- caffe2/contrib/warpctc/ctc_ops_test.py | 8 +- caffe2/core/blob_gpu_test.cc | 4 +- caffe2/core/context_gpu.cu | 2 +- caffe2/core/context_gpu.h | 6 +- caffe2/core/cudnn_wrappers.h | 6 +- caffe2/core/event_gpu.cc | 16 ++-- caffe2/core/hip/event_hip.cc | 2 +- caffe2/core/memonger.cc | 4 +- caffe2/core/net_async_base.cc | 4 +- caffe2/core/net_async_dag_gpu.cc | 2 +- caffe2/core/net_gpu_test.cc | 2 +- caffe2/core/operator.cc | 2 +- caffe2/mkl/utils/mkl_memory.cc | 2 +- caffe2/observers/profile_observer_gpu.cc | 4 +- caffe2/onnx/backend.cc | 2 +- caffe2/operators/load_save_op_gpu.cc | 2 +- .../rnn/recurrent_network_executor_gpu.cc | 4 +- caffe2/proto/caffe2.proto | 2 +- caffe2/proto/caffe2_pb.h | 46 ++++++++++- caffe2/python/cnn.py | 2 +- caffe2/python/core.py | 16 ++-- caffe2/python/core_test.py | 82 +++++++++---------- caffe2/python/data_parallel_model.py | 6 +- caffe2/python/hypothesis_test_util.py | 2 +- caffe2/python/model_helper.py | 4 +- caffe2/python/muji.py | 2 +- caffe2/python/net_printer.py | 4 +- caffe2/python/numa_test.py | 2 +- caffe2/python/onnx/backend_rep.py | 2 +- caffe2/python/operator_test/load_save_test.py | 2 +- caffe2/python/operator_test/rnn_cell_test.py | 2 +- caffe2/python/optimizer.py | 10 +-- .../predictor/predictor_exporter_test.py | 2 +- caffe2/python/pybind_state_dlpack.h | 4 +- caffe2/utils/proto_utils.cc | 4 +- caffe2/utils/proto_utils_test.cc | 4 +- .../pyHIPIFY/cuda_to_hip_mappings.py | 2 +- 41 files changed, 163 insertions(+), 121 deletions(-) diff --git a/caffe2/contrib/nccl/cuda_nccl_op_gpu.cc b/caffe2/contrib/nccl/cuda_nccl_op_gpu.cc index 4c5313ff4b3..ea8b3494c6a 100644 --- a/caffe2/contrib/nccl/cuda_nccl_op_gpu.cc +++ b/caffe2/contrib/nccl/cuda_nccl_op_gpu.cc @@ -11,7 +11,7 @@ nccl::NCCLExecution getNCCLElements( // We either do an N-N op, or an N-1 op. CAFFE_ENFORCE(op->InputSize() == op->OutputSize() || op->OutputSize() == 1); nccl::NCCLExecution ex; - ex.stream_gpu_id = context.device_id(); + ex.stream_gpu_id = context.cuda_gpu_id(); ex.stream = context.cuda_stream(); ex.root = op->template GetSingleArgument("root", 0); ex.elements.resize(op->InputSize()); @@ -204,7 +204,7 @@ std::pair, std::vector> ncclOpDevInfer( for (int i = 0; i < def.input().size(); ++i) { DeviceOption dev; dev.set_device_type(1); - dev.set_device_id(i); + dev.set_cuda_gpu_id(i); opt.push_back(dev); } return std::make_pair(opt, opt); diff --git a/caffe2/contrib/nccl/nccl_ops_test.py b/caffe2/contrib/nccl/nccl_ops_test.py index f6c22a7d750..7e8a61e9de2 100644 --- a/caffe2/contrib/nccl/nccl_ops_test.py +++ b/caffe2/contrib/nccl/nccl_ops_test.py @@ -21,7 +21,7 @@ dyndep.InitOpsLibrary('@/caffe2/caffe2/contrib/nccl:nccl_ops') def gpu_device(i): device_option = caffe2_pb2.DeviceOption() device_option.device_type = caffe2_pb2.CUDA - device_option.device_id = i + device_option.cuda_gpu_id = i return device_option diff --git a/caffe2/contrib/prof/prof_dag_net.cc b/caffe2/contrib/prof/prof_dag_net.cc index c8678652c31..16917ddc154 100644 --- a/caffe2/contrib/prof/prof_dag_net.cc +++ b/caffe2/contrib/prof/prof_dag_net.cc @@ -33,9 +33,9 @@ void ProfDAGNet::ValidateOpTensorDevices() { had_mismatches = true; LOG(INFO) << "== PERFORMANCE WARNING == \n" << " Operator " << node.operator_->debug_def().type() - << " expects GPU " << mismatch.second.first.device_id() + << " expects GPU " << mismatch.second.first.cuda_gpu_id() << " but tensor [" << mismatch.first << "] is on GPU " - << mismatch.second.second.device_id(); + << mismatch.second.second.cuda_gpu_id(); } } if (!had_mismatches) { diff --git a/caffe2/contrib/tensorboard/tensorboard_exporter.py b/caffe2/contrib/tensorboard/tensorboard_exporter.py index cc2c3d85c96..93ade48e7d2 100644 --- a/caffe2/contrib/tensorboard/tensorboard_exporter.py +++ b/caffe2/contrib/tensorboard/tensorboard_exporter.py @@ -177,7 +177,7 @@ def _tf_device(device_option): if device_option.device_type == caffe2_pb2.CPU: return "/cpu:*" if device_option.device_type == caffe2_pb2.CUDA: - return "/gpu:{}".format(device_option.device_id) + return "/gpu:{}".format(device_option.cuda_gpu_id) raise Exception("Unhandled device", device_option) diff --git a/caffe2/contrib/warpctc/ctc_ops_test.py b/caffe2/contrib/warpctc/ctc_ops_test.py index 3b21c8b6674..25bb0a39e3a 100644 --- a/caffe2/contrib/warpctc/ctc_ops_test.py +++ b/caffe2/contrib/warpctc/ctc_ops_test.py @@ -79,11 +79,11 @@ class CTCOpsTest(test_util.TestCase): def test_ctc_cost_gpu(self): self.verify_cost( caffe2_pb2.DeviceOption(device_type=caffe2_pb2.CUDA, - device_id=0), + cuda_gpu_id=0), is_test=False) self.verify_cost( caffe2_pb2.DeviceOption(device_type=caffe2_pb2.CUDA, - device_id=0), + cuda_gpu_id=0), is_test=False, skip_input_lengths=True) @@ -99,10 +99,10 @@ class CTCOpsTest(test_util.TestCase): def test_ctc_forward_only_gpu(self): self.verify_cost( caffe2_pb2.DeviceOption(device_type=caffe2_pb2.CUDA, - device_id=0), + cuda_gpu_id=0), is_test=True) self.verify_cost( caffe2_pb2.DeviceOption(device_type=caffe2_pb2.CUDA, - device_id=0), + cuda_gpu_id=0), is_test=True, skip_input_lengths=True) diff --git a/caffe2/core/blob_gpu_test.cc b/caffe2/core/blob_gpu_test.cc index 8b4127e403a..55eafdede72 100644 --- a/caffe2/core/blob_gpu_test.cc +++ b/caffe2/core/blob_gpu_test.cc @@ -195,7 +195,7 @@ TEST(TensorTest, TensorSerializationMultiDevices) { } EXPECT_TRUE(tensor_proto.has_device_detail()); EXPECT_EQ(tensor_proto.device_detail().device_type(), PROTO_CUDA); - EXPECT_EQ(tensor_proto.device_detail().device_id(), gpu_id); + EXPECT_EQ(tensor_proto.device_detail().cuda_gpu_id(), gpu_id); // Test if the restored blob is still of the same device. blob.Reset(); EXPECT_NO_THROW(DeserializeBlob(serialized, &blob)); @@ -205,7 +205,7 @@ TEST(TensorTest, TensorSerializationMultiDevices) { // Test if we force the restored blob on a different device, we // can still get so. blob.Reset(); - proto.mutable_tensor()->mutable_device_detail()->set_device_id(0); + proto.mutable_tensor()->mutable_device_detail()->set_cuda_gpu_id(0); EXPECT_NO_THROW(DeserializeBlob(proto.SerializeAsString(), &blob)); EXPECT_TRUE(BlobIsTensorType(blob, CUDA)); EXPECT_EQ(GetGPUIDForPointer(blob.Get().data()), 0); diff --git a/caffe2/core/context_gpu.cu b/caffe2/core/context_gpu.cu index f10fe067ac7..0d9e2686212 100644 --- a/caffe2/core/context_gpu.cu +++ b/caffe2/core/context_gpu.cu @@ -256,7 +256,7 @@ CUDAContext::CUDAContext(const int gpu_id) CUDAContext::CUDAContext(const DeviceOption& option) : gpu_id_( - option.has_device_id() ? RectifyGPUID(option.device_id()) + option.has_cuda_gpu_id() ? RectifyGPUID(option.cuda_gpu_id()) : CaffeCudaGetDevice()), random_seed_( option.has_random_seed() ? option.random_seed() diff --git a/caffe2/core/context_gpu.h b/caffe2/core/context_gpu.h index 65ba4a006a9..ce73f5f9428 100644 --- a/caffe2/core/context_gpu.h +++ b/caffe2/core/context_gpu.h @@ -184,7 +184,7 @@ class CAFFE2_CUDA_API CUDAContext final : public BaseContext { } } - inline int device_id() const { + inline int cuda_gpu_id() const { return gpu_id_; } @@ -283,7 +283,7 @@ class CAFFE2_CUDA_API CUDAContext final : public BaseContext { } static bool IsStreamFree(const DeviceOption& option, int stream_id) { - auto stream = CUDAContext::cuda_stream(option.device_id(), stream_id); + auto stream = CUDAContext::cuda_stream(option.cuda_gpu_id(), stream_id); return cudaStreamQuery(stream) == cudaSuccess; } @@ -393,7 +393,7 @@ class CAFFE2_CUDA_API CUDAStaticContext final : public BaseStaticContext { void ExtractDeviceOption(DeviceOption* device, const void* data) override { device->set_device_type(TypeToProto(GetDeviceType())); - device->set_device_id(GetGPUIDForPointer(data)); + device->set_cuda_gpu_id(GetGPUIDForPointer(data)); } protected: diff --git a/caffe2/core/cudnn_wrappers.h b/caffe2/core/cudnn_wrappers.h index dea138e9ad5..1bd39fa62a3 100644 --- a/caffe2/core/cudnn_wrappers.h +++ b/caffe2/core/cudnn_wrappers.h @@ -122,9 +122,9 @@ class CuDNNWrapper { void with_cudnn_state(size_t state_idx, F&& f) { CAFFE_ENFORCE( state_idx < CAFFE2_COMPILE_TIME_MAX_CUDNN_STATES, "Invalid state_idx"); - auto& sync_state = cudnn_states()[context_->device_id()][state_idx]; + auto& sync_state = cudnn_states()[context_->cuda_gpu_id()][state_idx]; - DeviceGuard dg(context_->device_id()); + DeviceGuard dg(context_->cuda_gpu_id()); // We need to serialize execution on the CuDNNState as we can't // allow multiple threads to race through the cudaEventRecord @@ -132,7 +132,7 @@ class CuDNNWrapper { // execution) std::lock_guard g(sync_state.mutex); if (!sync_state.state.get()) { - sync_state.state.reset(new CuDNNState(context_->device_id())); + sync_state.state.reset(new CuDNNState(context_->cuda_gpu_id())); } CHECK_NOTNULL(sync_state.state.get())->execute(context_->cuda_stream(), f); } diff --git a/caffe2/core/event_gpu.cc b/caffe2/core/event_gpu.cc index 44aec8d3f2b..6253ca19c9a 100644 --- a/caffe2/core/event_gpu.cc +++ b/caffe2/core/event_gpu.cc @@ -9,21 +9,21 @@ namespace caffe2 { struct CudaEventWrapper { explicit CudaEventWrapper(const DeviceOption& option) : cuda_stream_(nullptr), - device_id_(option.device_id()), + cuda_gpu_id_(option.cuda_gpu_id()), status_(EventStatus::EVENT_INITIALIZED) { CAFFE_ENFORCE(option.device_type(), PROTO_CUDA); - DeviceGuard g(device_id_); + DeviceGuard g(cuda_gpu_id_); CUDA_ENFORCE(cudaEventCreate( &cuda_event_, cudaEventDefault | cudaEventDisableTiming)); } ~CudaEventWrapper() { - DeviceGuard g(device_id_); + DeviceGuard g(cuda_gpu_id_); CUDA_CHECK(cudaEventDestroy(cuda_event_)); } cudaEvent_t cuda_event_; cudaStream_t cuda_stream_; - int device_id_; + int cuda_gpu_id_; std::atomic status_; std::mutex mutex_recorded_; @@ -65,12 +65,12 @@ void EventRecordCUDA(Event* event, const void* context, const char* err_msg) { const auto& current_device = CaffeCudaGetDevice(); CAFFE_ENFORCE_EQ( current_device, - wrapper->device_id_, + wrapper->cuda_gpu_id_, "When you call EventRecordCUDA, your current device should be the same " "as the device specified by the event."); CAFFE_ENFORCE_EQ( current_device, - static_cast(context)->device_id()); + static_cast(context)->cuda_gpu_id()); CUDA_ENFORCE(cudaEventRecord( wrapper->cuda_event_, static_cast(context)->cuda_stream())); @@ -96,7 +96,7 @@ void EventFinishCUDA(const Event* event) { if (wrapper->status_ == EventStatus::EVENT_SCHEDULED) { // ok, even if event is already completed and status was not yet updated - DeviceGuard g(wrapper->device_id_); + DeviceGuard g(wrapper->cuda_gpu_id_); auto cudaResult = cudaEventSynchronize(wrapper->cuda_event_); if (cudaResult == cudaSuccess) { wrapper->status_ = EventStatus::EVENT_SUCCESS; @@ -127,7 +127,7 @@ void EventWaitCUDACUDA(const Event* event, void* context) { if (context_stream != event_stream) { // CAFFE_ENFORCE_EQ( // CaffeCudaGetDevice(), - // static_cast(context)->device_id()); + // static_cast(context)->cuda_gpu_id()); CUDA_CHECK(cudaStreamWaitEvent(context_stream, wrapper->cuda_event_, 0)); } } diff --git a/caffe2/core/hip/event_hip.cc b/caffe2/core/hip/event_hip.cc index ebec9c593e6..6f0db4642dd 100644 --- a/caffe2/core/hip/event_hip.cc +++ b/caffe2/core/hip/event_hip.cc @@ -138,7 +138,7 @@ void EventWaitHIPHIP(const Event* event, void* context) { // CAFFE_ENFORCE_EQ( // CaffeCudaGetDevice(), - // static_cast(context)->device_id()); + // static_cast(context)->cuda_gpu_id()); HIP_CHECK(hipStreamWaitEvent(context_stream, wrapper->hip_event_, 0)); } } diff --git a/caffe2/core/memonger.cc b/caffe2/core/memonger.cc index 87633fadebe..d9816e787ba 100644 --- a/caffe2/core/memonger.cc +++ b/caffe2/core/memonger.cc @@ -176,7 +176,7 @@ class ComputeBlobRecyclingForDag { // cuda device option but whose inputs/outputs are on CPU if (net.op(op_index).type() == "CopyGPUToCPU") { blob_device_[output].set_device_type(0); - blob_device_[output].set_device_id(0); + blob_device_[output].set_cuda_gpu_id(0); } } } @@ -478,7 +478,7 @@ class ComputeBlobRecyclingForDag { const DeviceOption& device_option) { const DeviceOption& blob_device = blob_device_[blob_name]; if (device_option.device_type() != blob_device.device_type() || - device_option.device_id() != blob_device.device_id()) { + device_option.cuda_gpu_id() != blob_device.cuda_gpu_id()) { return false; } for (const int token : req_tokens_[blob_name]) { diff --git a/caffe2/core/net_async_base.cc b/caffe2/core/net_async_base.cc index acc30e56517..fe4b57cd332 100644 --- a/caffe2/core/net_async_base.cc +++ b/caffe2/core/net_async_base.cc @@ -157,7 +157,7 @@ TaskThreadPool* AsyncNetBase::pool(const DeviceOption& device_option) { numa_node_id); return poolGetter(cpu_pools_, PROTO_CPU, numa_node_id, num_workers_); } else if (device_option.device_type() == PROTO_CUDA) { - auto gpu_id = device_option.device_id(); + auto gpu_id = device_option.cuda_gpu_id(); CAFFE_ENFORCE( gpu_id >= 0 && gpu_id < FLAGS_caffe2_net_async_max_gpus, "Invalid GPU id: " + caffe2::to_string(gpu_id)); @@ -173,7 +173,7 @@ int AsyncNetBase::stream(int task_id) { const auto& device_option = event(task_id).GetDeviceOption(); int stream_id = 0; if (device_option.device_type() == PROTO_CUDA) { - int gpu_id = device_option.device_id(); + int gpu_id = device_option.cuda_gpu_id(); CAFFE_ENFORCE_GE(gpu_id, 0, "Invalid gpu id: " + caffe2::to_string(gpu_id)); if ((unsigned)gpu_id >= getStreamCounters().size()) { getStreamCounters().resize(gpu_id + 1, 0); diff --git a/caffe2/core/net_async_dag_gpu.cc b/caffe2/core/net_async_dag_gpu.cc index 86d0b4d1d27..550a760826e 100644 --- a/caffe2/core/net_async_dag_gpu.cc +++ b/caffe2/core/net_async_dag_gpu.cc @@ -112,7 +112,7 @@ AsyncDAGNet::AsyncDAGNet( int AsyncDAGNet::stream(const DeviceOption& device_option) { int stream_id = 0; if (device_option.device_type() == PROTO_CUDA) { - int gpu_id = device_option.device_id(); + int gpu_id = device_option.cuda_gpu_id(); CAFFE_ENFORCE_GE(gpu_id, 0, "Invalid gpu id: " + caffe2::to_string(gpu_id)); if ((unsigned)gpu_id >= stream_counters_.size()) { stream_counters_.resize(gpu_id + 1, 0); diff --git a/caffe2/core/net_gpu_test.cc b/caffe2/core/net_gpu_test.cc index fab56112ec2..eaea9377f9b 100644 --- a/caffe2/core/net_gpu_test.cc +++ b/caffe2/core/net_gpu_test.cc @@ -124,7 +124,7 @@ TEST(NetTest, DISABLED_ChainingForDifferentDevices) { type: "NetTestDummy" device_option { device_type: 1 - device_id: 1 + cuda_gpu_id: 1 } } )DOC"; diff --git a/caffe2/core/operator.cc b/caffe2/core/operator.cc index 8115ae3aab6..79be08c03b2 100644 --- a/caffe2/core/operator.cc +++ b/caffe2/core/operator.cc @@ -649,7 +649,7 @@ std::map> ValidateTensorDevices( &blob_device); if (blob_device.device_type() == PROTO_CUDA && - blob_device.device_id() != op_device.device_id()) { + blob_device.cuda_gpu_id() != op_device.cuda_gpu_id()) { mismatches[blob_name] = std::make_pair(op_device, blob_device); } else if ( blob_device.device_type() == PROTO_HIP && diff --git a/caffe2/mkl/utils/mkl_memory.cc b/caffe2/mkl/utils/mkl_memory.cc index 9d4f347a13c..3f05f9c5d24 100644 --- a/caffe2/mkl/utils/mkl_memory.cc +++ b/caffe2/mkl/utils/mkl_memory.cc @@ -26,7 +26,7 @@ static vector GetMKLTensorInfo( const mkl::MKLMemory* tc = static_cast*>(c); *capacity = tc->size() * sizeof(T); device->set_device_type(PROTO_MKLDNN); - device->set_device_id(0); + device->set_cuda_gpu_id(0); return tc->dims(); } diff --git a/caffe2/observers/profile_observer_gpu.cc b/caffe2/observers/profile_observer_gpu.cc index 5bd9b0a11b0..bf4e20b7904 100644 --- a/caffe2/observers/profile_observer_gpu.cc +++ b/caffe2/observers/profile_observer_gpu.cc @@ -70,7 +70,7 @@ void ProfileOperatorObserver::Start() { int device; cudaGetDevice(&device); - cudaSetDevice(context->device_id()); + cudaSetDevice(context->cuda_gpu_id()); cudaEventCreate(&start_); cudaEventRecord(start_, context->cuda_stream()); @@ -92,7 +92,7 @@ void ProfileOperatorObserver::Stop() { int device; cudaGetDevice(&device); - cudaSetDevice(context->device_id()); + cudaSetDevice(context->cuda_gpu_id()); cudaEventCreate(&stop_); cudaEventRecord(stop_, context->cuda_stream()); cudaEventSynchronize(stop_); diff --git a/caffe2/onnx/backend.cc b/caffe2/onnx/backend.cc index 8a21fa0acf6..2350910febf 100644 --- a/caffe2/onnx/backend.cc +++ b/caffe2/onnx/backend.cc @@ -65,7 +65,7 @@ caffe2::DeviceOption GetDeviceOption(const Device& onnx_device) { {DeviceType::CUDA, caffe2::DeviceType::CUDA}}; caffe2::DeviceOption d; d.set_device_type(static_cast(m.at(onnx_device.type))); - d.set_device_id(onnx_device.device_id); + d.set_cuda_gpu_id(onnx_device.device_id); return d; } diff --git a/caffe2/operators/load_save_op_gpu.cc b/caffe2/operators/load_save_op_gpu.cc index f81b7789699..eaa90b3dcdb 100644 --- a/caffe2/operators/load_save_op_gpu.cc +++ b/caffe2/operators/load_save_op_gpu.cc @@ -9,7 +9,7 @@ void LoadOp::SetCurrentDevice(BlobProto* proto) { proto->mutable_tensor()->clear_device_detail(); auto* device_detail = proto->mutable_tensor()->mutable_device_detail(); device_detail->set_device_type(PROTO_CUDA); - device_detail->set_device_id(CaffeCudaGetDevice()); + device_detail->set_cuda_gpu_id(CaffeCudaGetDevice()); } } diff --git a/caffe2/operators/rnn/recurrent_network_executor_gpu.cc b/caffe2/operators/rnn/recurrent_network_executor_gpu.cc index 061f54d3a4c..e16e2073f7f 100644 --- a/caffe2/operators/rnn/recurrent_network_executor_gpu.cc +++ b/caffe2/operators/rnn/recurrent_network_executor_gpu.cc @@ -72,11 +72,11 @@ void CUDARecurrentNetworkExecutor::_ExecRange(int from, int to) { if (gpu_id == -1 && rnn_op.op->device_option().device_type() == DeviceTypeProto::PROTO_CUDA) { - gpu_id = rnn_op.op->device_option().device_id(); + gpu_id = rnn_op.op->device_option().cuda_gpu_id(); } else { CAFFE_ENFORCE( rnn_op.op->device_option().device_type() == 0 || - rnn_op.op->device_option().device_id() == gpu_id, + rnn_op.op->device_option().cuda_gpu_id() == gpu_id, "RNN Executor only supports ops on one GPU"); } diff --git a/caffe2/proto/caffe2.proto b/caffe2/proto/caffe2.proto index 63a2a256ded..9dc745edbdf 100644 --- a/caffe2/proto/caffe2.proto +++ b/caffe2/proto/caffe2.proto @@ -183,7 +183,7 @@ message DeviceOption { // optional DeviceType device_type = 1 [ default = CPU ]; optional int32 device_type = 1 [ default = 0 ]; // 0 is CPU. // [CUDA specific] the cuda gpu id. - optional int32 device_id = 2; + optional int32 cuda_gpu_id = 2; // [general] The random seed to start the device random number generator with. optional uint32 random_seed = 3; // [general] What node this op should execute on. diff --git a/caffe2/proto/caffe2_pb.h b/caffe2/proto/caffe2_pb.h index e0eb8e8dcdc..ded59d52b21 100644 --- a/caffe2/proto/caffe2_pb.h +++ b/caffe2/proto/caffe2_pb.h @@ -86,12 +86,54 @@ inline CAFFE2_API caffe2::DeviceOption DeviceToOption( caffe2::DeviceOption option; auto type = device.type(); option.set_device_type(TypeToProto(type)); - option.set_device_id(device.index()); + + switch (type) { + case DeviceType::CPU: + if (device.index() != -1) { + option.set_numa_node_id(device.index()); + } + break; + case DeviceType::CUDA: + option.set_cuda_gpu_id(device.index()); + break; + case DeviceType::HIP: + option.set_hip_gpu_id(device.index()); + break; + case DeviceType::OPENGL: + case DeviceType::OPENCL: + case DeviceType::MKLDNN: + case DeviceType::IDEEP: + case DeviceType::COMPILE_TIME_MAX_DEVICE_TYPES: + case DeviceType::ONLY_FOR_TEST: + break; + default: + AT_ERROR( + "Unknown device:", + static_cast(type), + ". If you have recently updated the caffe2.proto file to add a new " + "device type, did you forget to update the ProtoToType() and TypeToProto" + "function to reflect such recent changes?"); + } return option; } inline CAFFE2_API at::Device OptionToDevice(const caffe2::DeviceOption option) { - return at::Device(ProtoToType(option.device_type()), option.device_id()); + auto type = option.device_type(); + int32_t id = -1; + switch (type) { + case caffe2::PROTO_CPU: + if (option.has_numa_node_id()) { + id = option.numa_node_id(); + } + break; + case caffe2::PROTO_CUDA: + id = option.cuda_gpu_id(); + break; + case caffe2::PROTO_HIP: + id = option.hip_gpu_id(); + break; + } + return at::Device(ProtoToType(type), id); } } // namespace caffe2 diff --git a/caffe2/python/cnn.py b/caffe2/python/cnn.py index f9ccf92d750..f927020e6ae 100644 --- a/caffe2/python/cnn.py +++ b/caffe2/python/cnn.py @@ -236,5 +236,5 @@ class CNNModelHelper(ModelHelper): def GPU(self, gpu_id=0): device_option = caffe2_pb2.DeviceOption() device_option.device_type = caffe2_pb2.CUDA - device_option.device_id = gpu_id + device_option.cuda_gpu_id = gpu_id return device_option diff --git a/caffe2/python/core.py b/caffe2/python/core.py index 4f683daa368..6850c02fc13 100644 --- a/caffe2/python/core.py +++ b/caffe2/python/core.py @@ -84,7 +84,7 @@ def IsOperatorWithEngine(op_type, engine): def DeviceOption( device_type, - device_id=0, + cuda_gpu_id=0, random_seed=None, node_name=None, numa_node_id=None, @@ -92,7 +92,7 @@ def DeviceOption( ): option = caffe2_pb2.DeviceOption() option.device_type = device_type - option.device_id = device_id + option.cuda_gpu_id = cuda_gpu_id if node_name is not None: option.node_name = node_name if random_seed is not None: @@ -115,7 +115,7 @@ def device_option_equal(opt1, opt2, ignore_node_name=True, ignore_random_seed=Tr if not opt1.device_type or not opt2.device_type: # At least one option is for CPU, check if both are for CPU. return not opt1.device_type and not opt2.device_type - return opt1.device_id == opt2.device_id + return opt1.cuda_gpu_id == opt2.cuda_gpu_id def InferBlobDevices(net): @@ -2111,7 +2111,7 @@ class Net(object): """A convenient function to run everything on the GPU.""" device_option = caffe2_pb2.DeviceOption() device_option.device_type = caffe2_pb2.CUDA - device_option.device_id = gpu_id + device_option.cuda_gpu_id = gpu_id self._net.device_option.CopyFrom(device_option) if use_cudnn: for op in self._net.op: @@ -2286,7 +2286,7 @@ def copy_func_between_devices(src, dst): return None if src.device_type == CUDA and dst.device_type == CUDA: - if src.device_id == dst.device_id: + if src.cuda_gpu_id == dst.cuda_gpu_id: return None else: def fun(net, *args, **kw): @@ -2312,10 +2312,10 @@ def copy_func_between_devices(src, dst): def device_equal(src, dst): ''' We are using this fucntion instead of == operator because optional-value - comparison between empty device_options and {device_type:0, device_id:0} + comparison between empty device_options and {device_type:0, cuda_gpu_id:0} returns not equal in some cases. ''' - return src.device_type == dst.device_type and src.device_id == dst.device_id + return src.device_type == dst.device_type and src.cuda_gpu_id == dst.cuda_gpu_id def update_placeholder_op_output(op, blob_to_device): @@ -2429,7 +2429,7 @@ def InjectCrossDeviceCopies(net, blob_to_device=None, blob_remap=None, if device_option.device_type == CPU: suffix = '_cpu' elif device_option.device_type == CUDA: - suffix = '_cuda_' + str(device_option.device_id) + suffix = '_cuda_' + str(device_option.cuda_gpu_id) else: raise RuntimeError( "Unknown device type: {}". diff --git a/caffe2/python/core_test.py b/caffe2/python/core_test.py index 2f6dedbfd80..7120843f331 100644 --- a/caffe2/python/core_test.py +++ b/caffe2/python/core_test.py @@ -83,17 +83,17 @@ class TestScopes(test_util.TestCase): # explicitly setting a device device_option = caffe2_pb2.DeviceOption() device_option.device_type = caffe2_pb2.CUDA - device_option.device_id = 1 + device_option.cuda_gpu_id = 1 op = core.CreateOperator("Relu", "x", "y", device_option=device_option) self.assertTrue(op.HasField('device_option')) self.assertEqual(op.device_option.device_type, caffe2_pb2.CUDA) - self.assertEqual(op.device_option.device_id, 1) + self.assertEqual(op.device_option.cuda_gpu_id, 1) with core.DeviceScope(device_option): # from device scope op = core.CreateOperator("Relu", "x", "y") self.assertTrue(op.HasField('device_option')) self.assertEqual(op.device_option.device_type, caffe2_pb2.CUDA) - self.assertEqual(op.device_option.device_id, 1) + self.assertEqual(op.device_option.cuda_gpu_id, 1) # from an overridden device option override_device = caffe2_pb2.DeviceOption() override_device.device_type = caffe2_pb2.CPU @@ -109,13 +109,13 @@ class TestScopes(test_util.TestCase): def testNameAndDeviceScopeTogether(self): device_option = caffe2_pb2.DeviceOption() device_option.device_type = caffe2_pb2.CUDA - device_option.device_id = 1 + device_option.cuda_gpu_id = 1 with core.DeviceScope(device_option): with core.NameScope("foo"): op = core.CreateOperator("Relu", "x", "y") self.assertTrue(op.HasField('device_option')) self.assertEqual(op.device_option.device_type, caffe2_pb2.CUDA) - self.assertEqual(op.device_option.device_id, 1) + self.assertEqual(op.device_option.cuda_gpu_id, 1) self.assertEqual(len(op.input), 1) self.assertEqual(op.input[0], "foo/x") self.assertEqual(len(op.output), 1) @@ -255,7 +255,7 @@ class TestCreateOperator(test_util.TestCase): def testCreate(self): device_option = caffe2_pb2.DeviceOption() device_option.device_type = caffe2_pb2.CUDA - device_option.device_id = 1 + device_option.cuda_gpu_id = 1 op = core.CreateOperator( "Ludicrous", "x", "y", name="ludicrous", control_input="z", device_option=device_option, @@ -271,7 +271,7 @@ class TestCreateOperator(test_util.TestCase): self.assertEqual(op.control_input[0], "z") self.assertTrue(op.HasField('device_option')) self.assertEqual(op.device_option.device_type, caffe2_pb2.CUDA) - self.assertEqual(op.device_option.device_id, 1) + self.assertEqual(op.device_option.cuda_gpu_id, 1) self.assertTrue(len(op.arg), 3) # can't guarantee ordering of kwargs, so generate a set of args @@ -574,7 +574,7 @@ class TestDeviceOption(test_util.TestCase): opt2 = caffe2_pb2.DeviceOption() opt1.device_type = 0 self.assertTrue(core.device_option_equal(opt1, opt2)) - opt1.device_id = 5 + opt1.cuda_gpu_id = 5 # opt1 still is on CPU, so the options should be equal self.assertTrue(core.device_option_equal(opt1, opt2)) opt2.device_type = 0 @@ -649,7 +649,7 @@ class TestInferDevice(test_util.TestCase): def setUp(self): device_option = caffe2_pb2.DeviceOption() device_option.device_type = caffe2_pb2.CUDA - device_option.device_id = 1 + device_option.cuda_gpu_id = 1 self.cuda_option = device_option self.cpu_option = caffe2_pb2.DeviceOption() @@ -748,7 +748,7 @@ class TestInferDevice(test_util.TestCase): init_net = core.Net("init") device_option = caffe2_pb2.DeviceOption() device_option.device_type = caffe2_pb2.CUDA - device_option.device_id = 1 + device_option.cuda_gpu_id = 1 weight = init_net.XavierFill([], 'fc_w', shape=[10, 100]) bias = init_net.ConstantFill([], 'fc_b', shape=[10, ]) @@ -765,7 +765,7 @@ class TestInferDevice(test_util.TestCase): self.assertEqual(op.input[1], "fc_w_cuda_1") self.assertEqual(op.input[2], "fc_b_cuda_1") self.assertEqual(op.device_option.device_type, 1) - self.assertEqual(op.device_option.device_id, 1) + self.assertEqual(op.device_option.cuda_gpu_id, 1) self.assertEqual(new_net._net.op[-2].type, "CopyCPUToGPU") self.assertEqual(new_net._net.op[0].type, "CopyCPUToGPU") self.assertNotEqual(blob_to_device["fc_w"], device_option) @@ -775,7 +775,7 @@ class TestInferDevice(test_util.TestCase): init_net = core.Net("init") device_option = caffe2_pb2.DeviceOption() device_option.device_type = caffe2_pb2.CUDA - device_option.device_id = 1 + device_option.cuda_gpu_id = 1 weight = init_net.XavierFill([], 'fc_w', shape=[10, 100]) bias = init_net.ConstantFill([], 'fc_b', shape=[10, ]) const = init_net.ConstantFill([], 'const', shape=[], value=1.) @@ -791,12 +791,12 @@ class TestInferDevice(test_util.TestCase): op = nets[1]._net.op[0] self.assertEqual(op.type, "CopyCPUToGPU") self.assertEqual(op.device_option.device_type, 1) - self.assertEqual(op.device_option.device_id, 1) + self.assertEqual(op.device_option.cuda_gpu_id, 1) self.assertEqual(op.output[0], "fc_w_cuda_1") op = nets[1]._net.op[1] self.assertEqual(op.type, "CopyCPUToGPU") self.assertEqual(op.device_option.device_type, 1) - self.assertEqual(op.device_option.device_id, 1) + self.assertEqual(op.device_option.cuda_gpu_id, 1) self.assertEqual(op.output[0], "fc_b_cuda_1") op = nets[1]._net.op[2] self.assertEqual(op.type, "FC") @@ -804,7 +804,7 @@ class TestInferDevice(test_util.TestCase): self.assertEqual(op.input[1], "fc_w_cuda_1") self.assertEqual(op.input[2], "fc_b_cuda_1") self.assertEqual(op.device_option.device_type, 1) - self.assertEqual(op.device_option.device_id, 1) + self.assertEqual(op.device_option.cuda_gpu_id, 1) op = nets[1]._net.op[3] self.assertEqual(op.type, "Add") self.assertEqual(op.input[0], "fc1") @@ -822,7 +822,7 @@ op { type: "CopyCPUToGPU" device_option { device_type: 1 - device_id: 1 + cuda_gpu_id: 1 } } op { @@ -832,7 +832,7 @@ op { type: "CopyCPUToGPU" device_option { device_type: 1 - device_id: 1 + cuda_gpu_id: 1 } } op { @@ -844,7 +844,7 @@ op { type: "FC" device_option { device_type: 1 - device_id: 1 + cuda_gpu_id: 1 } } op { @@ -855,7 +855,7 @@ op { type: "Add" device_option { device_type: 1 - device_id: 1 + cuda_gpu_id: 1 } } external_input: "data" @@ -870,7 +870,7 @@ external_input: "const_cuda_1" init_net = core.Net("init") device_option = caffe2_pb2.DeviceOption() device_option.device_type = caffe2_pb2.CUDA - device_option.device_id = 1 + device_option.cuda_gpu_id = 1 with core.DeviceScope(device_option): weight = init_net.XavierFill([], 'fc_w', shape=[10, 100]) @@ -887,7 +887,7 @@ external_input: "const_cuda_1" self.assertEqual(op.input[1], "fc_w") self.assertEqual(op.input[2], "fc_b") self.assertEqual(op.device_option.device_type, 1) - self.assertEqual(op.device_option.device_id, 1) + self.assertEqual(op.device_option.cuda_gpu_id, 1) """ For reference, net.Proto() should be like: name: "" @@ -900,7 +900,7 @@ op { type: "FC" device_option { device_type: 1 - device_id: 1 + cuda_gpu_id: 1 } } external_input: "data" @@ -912,7 +912,7 @@ external_input: "fc_b" net = core.Net("test") device_option = caffe2_pb2.DeviceOption() device_option.device_type = caffe2_pb2.CUDA - device_option.device_id = 1 + device_option.cuda_gpu_id = 1 with core.DeviceScope(device_option): net.Relu("data", "relu1") @@ -920,10 +920,10 @@ external_input: "fc_b" with core.DeviceScope(device_option): net.Relu("data", "relu3") net.Relu("data", "relu4") - device_option.device_id = 0 + device_option.cuda_gpu_id = 0 with core.DeviceScope(device_option): net.Relu("data", "relu5") - device_option.device_id = 1 + device_option.cuda_gpu_id = 1 with core.DeviceScope(device_option): net.Relu("data", "relu6") @@ -931,12 +931,12 @@ external_input: "fc_b" op = new_net._net.op[0] self.assertEqual(op.type, "CopyCPUToGPU") self.assertEqual(op.device_option.device_type, 1) - self.assertEqual(op.device_option.device_id, 1) + self.assertEqual(op.device_option.cuda_gpu_id, 1) self.assertEqual(op.output[0], "data_cuda_1") op = new_net._net.op[1] self.assertEqual(op.type, "Relu") self.assertEqual(op.device_option.device_type, 1) - self.assertEqual(op.device_option.device_id, 1) + self.assertEqual(op.device_option.cuda_gpu_id, 1) self.assertEqual(op.output[0], "relu1") op = new_net._net.op[2] self.assertEqual(op.type, "Relu") @@ -945,7 +945,7 @@ external_input: "fc_b" op = new_net._net.op[3] self.assertEqual(op.type, "Relu") self.assertEqual(op.device_option.device_type, 1) - self.assertEqual(op.device_option.device_id, 1) + self.assertEqual(op.device_option.cuda_gpu_id, 1) self.assertEqual(op.input[0], "data_cuda_1") self.assertEqual(op.output[0], "relu3") op = new_net._net.op[4] @@ -955,18 +955,18 @@ external_input: "fc_b" op = new_net._net.op[5] self.assertEqual(op.type, "CopyCPUToGPU") self.assertEqual(op.device_option.device_type, 1) - self.assertEqual(op.device_option.device_id, 0) + self.assertEqual(op.device_option.cuda_gpu_id, 0) self.assertEqual(op.output[0], "data_cuda_0") op = new_net._net.op[6] self.assertEqual(op.type, "Relu") self.assertEqual(op.device_option.device_type, 1) - self.assertEqual(op.device_option.device_id, 0) + self.assertEqual(op.device_option.cuda_gpu_id, 0) self.assertEqual(op.input[0], "data_cuda_0") self.assertEqual(op.output[0], "relu5") op = new_net._net.op[7] self.assertEqual(op.type, "Relu") self.assertEqual(op.device_option.device_type, 1) - self.assertEqual(op.device_option.device_id, 1) + self.assertEqual(op.device_option.cuda_gpu_id, 1) self.assertEqual(op.input[0], "data_cuda_1") self.assertEqual(op.output[0], "relu6") """ @@ -979,7 +979,7 @@ op { type: "CopyCPUToGPU" device_option { device_type: 1 - device_id: 1 + cuda_gpu_id: 1 } } op { @@ -989,7 +989,7 @@ op { type: "Relu" device_option { device_type: 1 - device_id: 1 + cuda_gpu_id: 1 } } op { @@ -1005,7 +1005,7 @@ op { type: "Relu" device_option { device_type: 1 - device_id: 1 + cuda_gpu_id: 1 } } op { @@ -1021,7 +1021,7 @@ op { type: "CopyCPUToGPU" device_option { device_type: 1 - device_id: 0 + cuda_gpu_id: 0 } } op { @@ -1031,7 +1031,7 @@ op { type: "Relu" device_option { device_type: 1 - device_id: 0 + cuda_gpu_id: 0 } } op { @@ -1041,7 +1041,7 @@ op { type: "Relu" device_option { device_type: 1 - device_id: 1 + cuda_gpu_id: 1 } } external_input: "data" @@ -1060,7 +1060,7 @@ external_input: "data" cpu_device[i].node_name = 'node:' + str(i) gpu_device.append(caffe2_pb2.DeviceOption()) gpu_device[i].device_type = caffe2_pb2.CUDA - gpu_device[i].device_id = 0 + gpu_device[i].cuda_gpu_id = 0 gpu_device[i].node_name = 'node:' + str(i) send_node = 'node:0' recv_node = 'node:1' @@ -1100,12 +1100,12 @@ external_input: "data" op = init_net._net.op[2] self.assertEqual(op.type, "CopyGPUToCPU") self.assertEqual(op.device_option.device_type, 1) - self.assertEqual(op.device_option.device_id, 0) + self.assertEqual(op.device_option.cuda_gpu_id, 0) self.assertEqual(op.output[0], "fc_w_cpu") op = init_net._net.op[3] self.assertEqual(op.type, "CopyGPUToCPU") self.assertEqual(op.device_option.device_type, 1) - self.assertEqual(op.device_option.device_id, 0) + self.assertEqual(op.device_option.cuda_gpu_id, 0) self.assertEqual(op.output[0], "fc_b_cpu") op = init_net._net.op[4] self.assertEqual(op.type, placeholder_send) @@ -1128,7 +1128,7 @@ external_input: "data" net = core.Net("test") device_option = caffe2_pb2.DeviceOption() device_option.device_type = caffe2_pb2.CUDA - device_option.device_id = 1 + device_option.cuda_gpu_id = 1 net.Adagrad(['param', 'moment', 'grad', 'lr'], ['param', 'moment']) with core.DeviceScope(device_option): diff --git a/caffe2/python/data_parallel_model.py b/caffe2/python/data_parallel_model.py index 749c8b12c93..89770dc6ea7 100644 --- a/caffe2/python/data_parallel_model.py +++ b/caffe2/python/data_parallel_model.py @@ -813,7 +813,7 @@ def ConvertNetForDevice(net, device=None): device_prefix = "gpu" if device.device_type == caffe2_pb2.CUDA else "cpu" - namescope = "{}_{}/".format(device_prefix, device.device_id) + namescope = "{}_{}/".format(device_prefix, device.cuda_gpu_id) for op in mnet.Proto().op: if "RecurrentNetwork" in op.type: raise("RecurrentNetwork conversion not yet supported") @@ -1540,7 +1540,7 @@ def _AnalyzeOperators(model): continue op_dev = op.device_option - op_gpu = op_dev.device_id + op_gpu = op_dev.cuda_gpu_id # This avoids failing on operators that are only for CPU if op_dev.device_type != caffe2_pb2.CUDA: @@ -1904,7 +1904,7 @@ def _InterleaveOps(model): new_ops = [] ops = {d: [] for d in range(num_devices)} for op in orig_ops: - ops[op.device_option.device_id].append(op) + ops[op.device_option.cuda_gpu_id].append(op) for j in range(num_ops_per_dev): tp = None diff --git a/caffe2/python/hypothesis_test_util.py b/caffe2/python/hypothesis_test_util.py index 8470df15887..5cc18f99bd9 100644 --- a/caffe2/python/hypothesis_test_util.py +++ b/caffe2/python/hypothesis_test_util.py @@ -259,7 +259,7 @@ device_options = _device_options_no_hip + ([hip_do] if workspace.has_hip_support # Include device option for each GPU expanded_device_options = [cpu_do] + ( - [caffe2_pb2.DeviceOption(device_type=caffe2_pb2.CUDA, device_id=i) + [caffe2_pb2.DeviceOption(device_type=caffe2_pb2.CUDA, cuda_gpu_id=i) for i in range(workspace.NumCudaDevices())] if workspace.has_gpu_support else []) diff --git a/caffe2/python/model_helper.py b/caffe2/python/model_helper.py index 1e881d27f49..f8e3f32bb2c 100644 --- a/caffe2/python/model_helper.py +++ b/caffe2/python/model_helper.py @@ -596,7 +596,7 @@ def ExtractPredictorNet( rename_list(step_op.output) if device is not None: step_op.device_option.device_type = device.device_type - step_op.device_option.device_id = device.device_id + step_op.device_option.cuda_gpu_id = device.cuda_gpu_id rename_list(arg.n.external_input) rename_list(arg.n.external_output) @@ -610,7 +610,7 @@ def ExtractPredictorNet( if device is not None: op.device_option.device_type = device.device_type - op.device_option.device_id = device.device_id + op.device_option.cuda_gpu_id = device.cuda_gpu_id validate_op(op) predict_proto.op.extend([op]) known_blobs.update(op.output) diff --git a/caffe2/python/muji.py b/caffe2/python/muji.py index 2f2b5aced66..b407f96d239 100644 --- a/caffe2/python/muji.py +++ b/caffe2/python/muji.py @@ -26,7 +26,7 @@ def OnGPU(gpu_id): """ device_option = caffe2_pb2.DeviceOption() device_option.device_type = caffe2_pb2.CUDA - device_option.device_id = gpu_id + device_option.cuda_gpu_id = gpu_id return device_option diff --git a/caffe2/python/net_printer.py b/caffe2/python/net_printer.py index 7583f863b1f..4b5cddb61d2 100644 --- a/caffe2/python/net_printer.py +++ b/caffe2/python/net_printer.py @@ -268,11 +268,11 @@ def call(op, inputs=None, outputs=None, factor_prefixes=False): def format_device_option(dev_opt): if not dev_opt or not ( - dev_opt.device_type or dev_opt.device_id or dev_opt.node_name): + dev_opt.device_type or dev_opt.cuda_gpu_id or dev_opt.node_name): return None return call( 'DeviceOption', - [dev_opt.device_type, dev_opt.device_id, "'%s'" % dev_opt.node_name]) + [dev_opt.device_type, dev_opt.cuda_gpu_id, "'%s'" % dev_opt.node_name]) @Printer.register(OperatorDef) diff --git a/caffe2/python/numa_test.py b/caffe2/python/numa_test.py index 3178345cf46..8d3a362dcdf 100644 --- a/caffe2/python/numa_test.py +++ b/caffe2/python/numa_test.py @@ -27,7 +27,7 @@ def build_test_net(net_name): gpu_device_option = caffe2_pb2.DeviceOption() gpu_device_option.device_type = caffe2_pb2.CUDA - gpu_device_option.device_id = 0 + gpu_device_option.cuda_gpu_id = 0 net.CopyCPUToGPU("output_blob_0", "output_blob_0_gpu", device_option=gpu_device_option) diff --git a/caffe2/python/onnx/backend_rep.py b/caffe2/python/onnx/backend_rep.py index 5802e49de52..8cc3f9e2fa9 100644 --- a/caffe2/python/onnx/backend_rep.py +++ b/caffe2/python/onnx/backend_rep.py @@ -24,7 +24,7 @@ class Caffe2Rep(BackendRep): @property def _name_scope(self): if self.predict_net.device_option.device_type == caffe2_pb2.CUDA: - return 'gpu_{}'.format(self.predict_net.device_option.device_id) + return 'gpu_{}'.format(self.predict_net.device_option.cuda_gpu_id) return '' def run(self, inputs, **kwargs): diff --git a/caffe2/python/operator_test/load_save_test.py b/caffe2/python/operator_test/load_save_test.py index 8e3817034d4..2d53027a0a0 100644 --- a/caffe2/python/operator_test/load_save_test.py +++ b/caffe2/python/operator_test/load_save_test.py @@ -91,7 +91,7 @@ class TestLoadSaveBase(test_util.TestCase): self.assertEqual(proto.tensor.device_detail.device_type, device_type) if device_type == caffe2_pb2.CUDA: - self.assertEqual(proto.tensor.device_detail.device_id, + self.assertEqual(proto.tensor.device_detail.cuda_gpu_id, gpu_id) blobs = [str(i) for i in range(len(arrays))] diff --git a/caffe2/python/operator_test/rnn_cell_test.py b/caffe2/python/operator_test/rnn_cell_test.py index 66ac07dbdca..9d9bb38e178 100644 --- a/caffe2/python/operator_test/rnn_cell_test.py +++ b/caffe2/python/operator_test/rnn_cell_test.py @@ -1216,7 +1216,7 @@ class RNNCellTest(hu.HypothesisTestCase): if arg.name == "step_net": for step_op in arg.n.op: self.assertEqual(0, step_op.device_option.device_type) - self.assertEqual(1, step_op.device_option.device_id) + self.assertEqual(1, step_op.device_option.cuda_gpu_id) elif arg.name == 'backward_step_net': self.assertEqual(caffe2_pb2.NetDef(), arg.n) diff --git a/caffe2/python/optimizer.py b/caffe2/python/optimizer.py index ddd5871f7d4..0c5b18b0b6a 100644 --- a/caffe2/python/optimizer.py +++ b/caffe2/python/optimizer.py @@ -83,7 +83,7 @@ class Optimizer(object): if current_scope.device_type == caffe2_pb2.CUDA: return self.get_gpu_blob_name( - base_str, current_scope.device_id, current_scope.node_name + base_str, current_scope.cuda_gpu_id, current_scope.node_name ) else: return self.get_cpu_blob_name(base_str, current_scope.node_name) @@ -279,7 +279,7 @@ class SgdOptimizer(Optimizer): # to include device information. ONE = param_init_net.ConstantFill( [], - "ONE_{}_{}{}".format(dev.device_type, dev.device_id, dev.node_name), + "ONE_{}_{}{}".format(dev.device_type, dev.cuda_gpu_id, dev.node_name), shape=[1], value=1.0 ) @@ -488,12 +488,12 @@ class WeightDecayBuilder(Optimizer): ONE = param_init_net.ConstantFill( [], - "ONE_{}_{}".format(dev.device_type, dev.device_id), + "ONE_{}_{}".format(dev.device_type, dev.cuda_gpu_id), shape=[1], value=1.0 ) WD = param_init_net.ConstantFill( - [], "wd_{}_{}".format(dev.device_type, dev.device_id), + [], "wd_{}_{}".format(dev.device_type, dev.cuda_gpu_id), shape=[1], value=self.weight_decay ) @@ -1160,7 +1160,7 @@ class RmsPropOptimizer(Optimizer): ONE = param_init_net.ConstantFill( [], - "ONE_{}_{}".format(dev.device_type, dev.device_id), + "ONE_{}_{}".format(dev.device_type, dev.cuda_gpu_id), shape=[1], value=1.0 ) diff --git a/caffe2/python/predictor/predictor_exporter_test.py b/caffe2/python/predictor/predictor_exporter_test.py index ef11246bdfc..b4c71535deb 100644 --- a/caffe2/python/predictor/predictor_exporter_test.py +++ b/caffe2/python/predictor/predictor_exporter_test.py @@ -193,7 +193,7 @@ class PredictorExporterTest(unittest.TestCase): # check device options for op in list(init_net.Proto().op) + list(predict_init_net.Proto().op): - self.assertEqual(1, op.device_option.device_id) + self.assertEqual(1, op.device_option.cuda_gpu_id) self.assertEqual(caffe2_pb2.CPU, op.device_option.device_type) def test_db_fails_without_params(self): diff --git a/caffe2/python/pybind_state_dlpack.h b/caffe2/python/pybind_state_dlpack.h index 6db4ae42b84..679152c7881 100644 --- a/caffe2/python/pybind_state_dlpack.h +++ b/caffe2/python/pybind_state_dlpack.h @@ -34,7 +34,7 @@ class DLPackWrapper { "Unsupported device type: ", device_option.device_type()); tensor_context.device_type = *device_type_ptr; - tensor_context.device_id = device_option.device_id(); + tensor_context.device_id = device_option.cuda_gpu_id(); if (tensor->size() <= 0) { tensor->Resize(0); @@ -87,7 +87,7 @@ class DLPackWrapper { int dlpack_device_id = dlTensor->ctx.device_id; CAFFE_ENFORCE_EQ( dlpack_device_id, - device_option.device_id(), + device_option.cuda_gpu_id(), "Expected same device id for DLPack and C2 tensors"); std::vector dims; diff --git a/caffe2/utils/proto_utils.cc b/caffe2/utils/proto_utils.cc index dd80282238a..dc8e088eba9 100644 --- a/caffe2/utils/proto_utils.cc +++ b/caffe2/utils/proto_utils.cc @@ -30,7 +30,7 @@ C10_EXPORT int DeviceId(const DeviceOption& option) { case PROTO_CPU: return option.numa_node_id(); case PROTO_CUDA: - return option.device_id(); + return option.cuda_gpu_id(); case PROTO_MKLDNN: return option.numa_node_id(); case PROTO_HIP: @@ -43,7 +43,7 @@ C10_EXPORT int DeviceId(const DeviceOption& option) { C10_EXPORT bool IsSameDevice(const DeviceOption& lhs, const DeviceOption& rhs) { return ( lhs.device_type() == rhs.device_type() && - lhs.device_id() == rhs.device_id() && + lhs.cuda_gpu_id() == rhs.cuda_gpu_id() && lhs.hip_gpu_id() == rhs.hip_gpu_id() && lhs.node_name() == rhs.node_name() && lhs.numa_node_id() == rhs.numa_node_id()); diff --git a/caffe2/utils/proto_utils_test.cc b/caffe2/utils/proto_utils_test.cc index 5d8fb86b34e..c9f37f4c98c 100644 --- a/caffe2/utils/proto_utils_test.cc +++ b/caffe2/utils/proto_utils_test.cc @@ -11,9 +11,9 @@ TEST(ProtoUtilsTest, IsSameDevice) { EXPECT_FALSE(IsSameDevice(a, b)); b.set_node_name("my_node"); EXPECT_TRUE(IsSameDevice(a, b)); - b.set_device_id(2); + b.set_cuda_gpu_id(2); EXPECT_FALSE(IsSameDevice(a, b)); - a.set_device_id(2); + a.set_cuda_gpu_id(2); EXPECT_TRUE(IsSameDevice(a, b)); a.set_device_type(DeviceTypeProto::PROTO_CUDA); b.set_device_type(DeviceTypeProto::PROTO_CPU); diff --git a/tools/amd_build/pyHIPIFY/cuda_to_hip_mappings.py b/tools/amd_build/pyHIPIFY/cuda_to_hip_mappings.py index 3a98a4cb7d9..113403fd87b 100644 --- a/tools/amd_build/pyHIPIFY/cuda_to_hip_mappings.py +++ b/tools/amd_build/pyHIPIFY/cuda_to_hip_mappings.py @@ -2216,7 +2216,7 @@ CAFFE2_SPECIFIC_MAPPINGS = { "CURAND_ENFORCE" :("HIPRAND_ENFORCE", API_CAFFE2), "curandGenerateUniform" : ("hiprandGenerateUniform", API_CAFFE2), "curand_generator" : ("hiprand_generator", API_CAFFE2), - "device_id" : ("hip_gpu_id", API_CAFFE2), + "cuda_gpu_id" : ("hip_gpu_id", API_CAFFE2), "CaffeCudaGetDevice" : ("CaffeHipGetDevice", API_CAFFE2), }