From da9e49e586016d8905c242f9df1f4aeff2a96853 Mon Sep 17 00:00:00 2001 From: Dmytro Dzhulgakov Date: Wed, 28 Nov 2018 15:43:22 -0800 Subject: [PATCH] Remove Context dependency from Tensor class (#14269) Summary: Pull Request resolved: https://github.com/pytorch/pytorch/pull/14269 Removes reference to Context proper and instead adds a bool argument for async copy (the same as `copy_`) For CopyFrom - I haven't tweaked all callsites yet. Instead I rely on a terrible hack that pointer to context is implicitly converted to bool when passed, haha :) It's not a good code and I propose to fix it in a follow up diff (maybe using clangr tooling). Reviewed By: ezyang Differential Revision: D13117981 fbshipit-source-id: 7cb1dc2ba6a4c50ac26614f45ab8318ea96e3138 --- aten/src/ATen/core/TensorImpl.h | 12 ++++----- caffe2/core/tensor.cc | 4 +-- caffe2/core/tensor.h | 15 ++++++------ caffe2/experiments/operators/tt_pad_op.h | 2 +- caffe2/mobile/contrib/ios/mpscnn/mpscnn.mm | 4 +-- caffe2/operators/box_with_nms_limit_op.cc | 8 +++--- caffe2/operators/dataset_ops.cc | 4 +-- caffe2/operators/expand_squeeze_dims_op.h | 4 +-- caffe2/operators/generate_proposals_op.cc | 4 +-- caffe2/operators/last_n_window_collector.cc | 4 +-- caffe2/operators/mean_op.h | 4 +-- caffe2/operators/onnx_while_op.h | 2 +- caffe2/operators/reservoir_sampling.cc | 4 +-- caffe2/operators/rmac_regions_op.cc | 4 +-- caffe2/operators/sequence_ops.h | 8 +++--- caffe2/operators/slice_op.cu | 4 +-- caffe2/operators/slice_op.h | 4 +-- caffe2/operators/stop_gradient.h | 2 +- caffe2/operators/utility_ops.cu | 2 +- caffe2/operators/utility_ops.h | 4 +-- caffe2/queue/queue_ops.h | 2 +- caffe2/video/video_input_op.h | 27 ++++++++++++--------- 22 files changed, 65 insertions(+), 63 deletions(-) diff --git a/aten/src/ATen/core/TensorImpl.h b/aten/src/ATen/core/TensorImpl.h index 59edcaf95f4..a4ebc36c95f 100644 --- a/aten/src/ATen/core/TensorImpl.h +++ b/aten/src/ATen/core/TensorImpl.h @@ -917,9 +917,9 @@ struct CAFFE2_API TensorImpl : public c10::intrusive_ptr_target { * a tensor on CPU and then CopyFrom a CUDA tensor, that will to a * CUDA-to-CPU transfer). * - * If the function is invoked without `context` the copy would be synchronous + * 'async' parameter triggers async copy for CUDA tensors */ - void CopyFrom(const TensorImpl& src, at::BaseContext* context = nullptr) { + void CopyFrom(const TensorImpl& src, bool async = false) { AT_ASSERT(!is_variable()); AT_ASSERTM( src.is_contiguous(), @@ -978,7 +978,7 @@ struct CAFFE2_API TensorImpl : public c10::intrusive_ptr_target { src.device(), new_data, device(), - context != nullptr); + async); } } } @@ -991,8 +991,10 @@ struct CAFFE2_API TensorImpl : public c10::intrusive_ptr_target { * elements, in which case this tensors' capacity is grown at a factor of * growthPct. This ensures that Extend runs on an amortized O(1) time * complexity. + * + * This op is auto-asynchronous if the underlying device (CUDA) supports it. */ - void Extend(int64_t num, float growthPct, at::BaseContext* context) { + void Extend(int64_t num, float growthPct) { AT_ASSERT(sizes_.size() >= 1u); AT_ASSERTM(num >= 0, "`num` must be non-negative for Extend"); AT_ASSERTM( @@ -1022,8 +1024,6 @@ struct CAFFE2_API TensorImpl : public c10::intrusive_ptr_target { auto oldDims = sizes_; Resize(newCapacity); auto* newData = raw_mutable_data(data_type_); - AT_ASSERTM( - context != nullptr, "Context must be provided to Extend the tensor"); if (data_type_.copy()) { AT_ASSERTM( device_type() == ::at::DeviceType::CPU, diff --git a/caffe2/core/tensor.cc b/caffe2/core/tensor.cc index 8030ffeb7d8..79e751c8384 100644 --- a/caffe2/core/tensor.cc +++ b/caffe2/core/tensor.cc @@ -159,7 +159,7 @@ void ReinitializeAndCopyFrom( Tensor* t, at::TensorOptions options, const Tensor& src, - BaseContext* context) { + bool async) { auto device_type = options.device().type(); CAFFE_ENFORCE(t != nullptr, "Target tensor ptr is null."); if (!*t || device_type != t->GetDeviceType()) { @@ -172,7 +172,7 @@ void ReinitializeAndCopyFrom( t->dtype(), " to: ", src.dtype()); - t->CopyFrom(src, context); + t->CopyFrom(src, async); } namespace { diff --git a/caffe2/core/tensor.h b/caffe2/core/tensor.h index 50ee3483081..401542217f4 100644 --- a/caffe2/core/tensor.h +++ b/caffe2/core/tensor.h @@ -97,23 +97,22 @@ class CAFFE2_API Tensor final { return impl_.get()->GetDevice(); } - void CopyFrom(const Tensor& src, BaseContext* context = nullptr) const { - impl_.get()->CopyFrom(*src.impl_.get(), context); + void CopyFrom(const Tensor& src, bool async = false) const { + impl_.get()->CopyFrom(*src.impl_.get(), async); } /** * @brief Extend the outer-most dimension of this tensor * to dimension of `num`. */ - void ExtendTo(int64_t num, float growthPct, BaseContext* context) const { + void ExtendTo(int64_t num, float growthPct) const { CAFFE_ENFORCE_GE_WITH_CALLER(impl_->dim(), 1); CAFFE_ENFORCE_GE_WITH_CALLER(growthPct, 0); - CAFFE_ENFORCE(context != nullptr, "Context must be provided."); - Extend(num - impl_->size(0), growthPct, context); + Extend(num - impl_->size(0), growthPct); } - void Extend(int64_t num, float growthPct, BaseContext* context) const { - impl_.get()->Extend(num, growthPct, context); + void Extend(int64_t num, float growthPct) const { + impl_.get()->Extend(num, growthPct); } /** @@ -451,7 +450,7 @@ CAFFE2_API void ReinitializeAndCopyFrom( Tensor* t, at::TensorOptions options, const Tensor& src, - BaseContext* context = nullptr); + bool async = false); CAFFE_DECLARE_PREALLOCATED_KNOWN_TYPE(12, Tensor) diff --git a/caffe2/experiments/operators/tt_pad_op.h b/caffe2/experiments/operators/tt_pad_op.h index 57e0d4eb6f4..e25159da75b 100644 --- a/caffe2/experiments/operators/tt_pad_op.h +++ b/caffe2/experiments/operators/tt_pad_op.h @@ -52,7 +52,7 @@ class TTPadOp final : public Operator { int64_t padded_dim0 = (X_dim0 / scale_ + 1) * scale_; auto dim0_diff = padded_dim0 - X_dim0; // set growthPct to the upper bound percentage: (100 * scale_ / X_dim0) - X_pad->Extend(dim0_diff, 100 * scale_ / X_dim0, &context_); + X_pad->Extend(dim0_diff, 100 * scale_ / X_dim0); auto* X_pad_data = X_pad->template mutable_data(); int64_t X_size = X_dim0 * X_dim1; diff --git a/caffe2/mobile/contrib/ios/mpscnn/mpscnn.mm b/caffe2/mobile/contrib/ios/mpscnn/mpscnn.mm index 4e8cb62f304..aa6a5473580 100644 --- a/caffe2/mobile/contrib/ios/mpscnn/mpscnn.mm +++ b/caffe2/mobile/contrib/ios/mpscnn/mpscnn.mm @@ -2302,8 +2302,8 @@ class MPSCNNGenerateProposalsCPPOp final : public Operator { int csz = im_i_boxes.rows(); int cur_start_idx = out_rois->dim(0); - out_rois->Extend(csz, 50, &context_); - out_rois_probs->Extend(csz, 50, &context_); + out_rois->Extend(csz, 50); + out_rois_probs->Extend(csz, 50); // write rois Eigen::Map cur_rois( diff --git a/caffe2/operators/box_with_nms_limit_op.cc b/caffe2/operators/box_with_nms_limit_op.cc index 2b83e193f52..18646b4835e 100644 --- a/caffe2/operators/box_with_nms_limit_op.cc +++ b/caffe2/operators/box_with_nms_limit_op.cc @@ -167,9 +167,9 @@ bool BoxWithNMSLimitOp::RunOnDevice() { // Write results int cur_start_idx = out_scores->size(0); - out_scores->Extend(total_keep_count, 50, &context_); - out_boxes->Extend(total_keep_count, 50, &context_); - out_classes->Extend(total_keep_count, 50, &context_); + out_scores->Extend(total_keep_count, 50); + out_boxes->Extend(total_keep_count, 50); + out_classes->Extend(total_keep_count, 50); int cur_out_idx = 0; for (int j = 1; j < num_classes; j++) { @@ -202,7 +202,7 @@ bool BoxWithNMSLimitOp::RunOnDevice() { } if (out_keeps) { - out_keeps->Extend(total_keep_count, 50, &context_); + out_keeps->Extend(total_keep_count, 50); Eigen::Map out_keeps_arr( out_keeps->template mutable_data() + cur_start_idx, diff --git a/caffe2/operators/dataset_ops.cc b/caffe2/operators/dataset_ops.cc index 3a074dea525..4a8efc46214 100644 --- a/caffe2/operators/dataset_ops.cc +++ b/caffe2/operators/dataset_ops.cc @@ -776,7 +776,7 @@ class AppendOp final : public Operator { CAFFE_ENFORCE(a.sizes()[i] == b.sizes()[i]); } auto oldSize = c->numel(); - c->Extend(b.sizes()[0], kDatasetGrowthPct, &context_); + c->Extend(b.sizes()[0], kDatasetGrowthPct); auto* dst = (char*)c->raw_mutable_data() + oldSize * b.dtype().itemsize(); context_.CopyItemsSameDevice(b.dtype(), b.numel(), b.raw_data(), dst); return true; @@ -826,7 +826,7 @@ class AtomicAppendOp final : public Operator { continue; } auto oldSize = c->numel(); - c->Extend(b.sizes()[0], kDatasetGrowthPct, &context_); + c->Extend(b.sizes()[0], kDatasetGrowthPct); auto* dst = (char*)c->raw_mutable_data() + oldSize * b.dtype().itemsize(); context_.CopyItemsSameDevice(b.dtype(), b.numel(), b.raw_data(), dst); } diff --git a/caffe2/operators/expand_squeeze_dims_op.h b/caffe2/operators/expand_squeeze_dims_op.h index f9c87987c75..89ff9c04b9e 100644 --- a/caffe2/operators/expand_squeeze_dims_op.h +++ b/caffe2/operators/expand_squeeze_dims_op.h @@ -26,7 +26,7 @@ class ExpandDimsOp : public Operator { bool RunOnDevice() override { auto& input = Input(0); auto* output = Output(0); - output->CopyFrom(input, &context_); + output->CopyFrom(input, true /*async*/); if (dims_.empty()) { return true; } @@ -70,7 +70,7 @@ class SqueezeOp : public Operator { bool RunOnDevice() override { auto& input = Input(0); auto* output = Output(0); - output->CopyFrom(input, &context_); + output->CopyFrom(input, true /*async*/); CAFFE_ENFORCE_GT( input.dim(), diff --git a/caffe2/operators/generate_proposals_op.cc b/caffe2/operators/generate_proposals_op.cc index 0646f27b08d..ade6bfe6c25 100644 --- a/caffe2/operators/generate_proposals_op.cc +++ b/caffe2/operators/generate_proposals_op.cc @@ -284,8 +284,8 @@ bool GenerateProposalsOp::RunOnDevice() { for (int i = 0; i < num_images; i++) { roi_counts += im_boxes[i].rows(); } - out_rois->Extend(roi_counts, 50, &context_); - out_rois_probs->Extend(roi_counts, 50, &context_); + out_rois->Extend(roi_counts, 50); + out_rois_probs->Extend(roi_counts, 50); float* out_rois_ptr = out_rois->template mutable_data(); float* out_rois_probs_ptr = out_rois_probs->template mutable_data(); for (int i = 0; i < num_images; i++) { diff --git a/caffe2/operators/last_n_window_collector.cc b/caffe2/operators/last_n_window_collector.cc index b98d028d83a..2b0695fdd33 100644 --- a/caffe2/operators/last_n_window_collector.cc +++ b/caffe2/operators/last_n_window_collector.cc @@ -71,7 +71,7 @@ class LastNWindowCollectorOp : public Operator { if (num_entries == 0) { if (!output_initialized) { // Get both shape and meta - output->CopyFrom(input, &context_); + output->CopyFrom(input, true /*async*/); } return true; } @@ -83,7 +83,7 @@ class LastNWindowCollectorOp : public Operator { // output_num is >= output_batch_size if (output_num > output_batch_size) { - output->ExtendTo(output_num, 50, &context_); + output->ExtendTo(output_num, 50); } auto* output_data = diff --git a/caffe2/operators/mean_op.h b/caffe2/operators/mean_op.h index 413a0f3b966..0a5d0726e43 100644 --- a/caffe2/operators/mean_op.h +++ b/caffe2/operators/mean_op.h @@ -23,7 +23,7 @@ class MeanOp final : public Operator { auto* output = Output(0); output->ResizeLike(input0); - output->CopyFrom(input0, &context_); + output->CopyFrom(input0, true /*async*/); if (InputSize() == 1) { return true; @@ -102,7 +102,7 @@ class MeanGradientOp : public Operator { for (int i = 1; i < num_inputs; i++) { auto* cur_dX = Output(i); cur_dX->ResizeLike(dY); - cur_dX->CopyFrom(*dX0, &context_); + cur_dX->CopyFrom(*dX0, true /*async*/); } return true; diff --git a/caffe2/operators/onnx_while_op.h b/caffe2/operators/onnx_while_op.h index 4614b578d2c..eeb45bb7432 100644 --- a/caffe2/operators/onnx_while_op.h +++ b/caffe2/operators/onnx_while_op.h @@ -171,7 +171,7 @@ class ONNXWhileOp final : public Operator { scan_outputs_sizes[i], "Size of scan output changed across iterations"); dims.insert(dims.begin(), itr); - scan_output_target->Extend(1, 100, &context_); + scan_output_target->Extend(1, 100); int64_t timestep_size = 1; for (const int64_t t : scan_outputs_sizes[i]) { diff --git a/caffe2/operators/reservoir_sampling.cc b/caffe2/operators/reservoir_sampling.cc index 287a77d9846..285dbbaca93 100644 --- a/caffe2/operators/reservoir_sampling.cc +++ b/caffe2/operators/reservoir_sampling.cc @@ -103,9 +103,9 @@ class ReservoirSamplingOp final : public Operator { auto output_num = std::min(numToCollect_, output_batch_size + num_to_copy); // output_num is >= output_batch_size - output->ExtendTo(output_num, 50, &context_); + output->ExtendTo(output_num, 50); if (pos_to_object) { - pos_to_object->ExtendTo(output_num, 50, &context_); + pos_to_object->ExtendTo(output_num, 50); } auto* output_data = diff --git a/caffe2/operators/rmac_regions_op.cc b/caffe2/operators/rmac_regions_op.cc index 458afdef66a..ab04906d735 100644 --- a/caffe2/operators/rmac_regions_op.cc +++ b/caffe2/operators/rmac_regions_op.cc @@ -58,7 +58,7 @@ bool RMACRegionsOp::RunOnDevice() { (l + Hd - 1 > 0) ? ((H - region_size) / (1.0 * (l + Hd - 1))) : 0; int cur_rows = output->dim32(0); - output->Extend((l + Wd) * (l + Hd), 50, &context_); + output->Extend((l + Wd) * (l + Hd), 50); auto* outputData = output->template mutable_data() + cur_rows * 5; for (int i = 0; i < l + Wd; ++i) { @@ -87,7 +87,7 @@ bool RMACRegionsOp::RunOnDevice() { // Replicate regions for all items in batch int num_rois = output->dim32(0); - output->Extend((batch_size - 1) * num_rois, 50, &context_); + output->Extend((batch_size - 1) * num_rois, 50); auto* outputData = output->template mutable_data(); for (int b = 1; b < batch_size; ++b) { // Copy all rois diff --git a/caffe2/operators/sequence_ops.h b/caffe2/operators/sequence_ops.h index b521aa99964..0b41da82ff7 100644 --- a/caffe2/operators/sequence_ops.h +++ b/caffe2/operators/sequence_ops.h @@ -120,9 +120,9 @@ class RemovePaddingOp final : public Operator { bool RunOnDevice() override { if (startPaddingWidth_ == 0 && endPaddingWidth_ == 0) { - Output(0)->CopyFrom(Input(0), &context_); + Output(0)->CopyFrom(Input(0), true /*async*/); if (OutputSize() == 2) { - Output(1)->CopyFrom(Input(1), &context_); + Output(1)->CopyFrom(Input(1), true /*async*/); } return true; } @@ -160,9 +160,9 @@ class AddPaddingOp final : public Operator { bool RunOnDevice() override { if (startPaddingWidth_ == 0 && endPaddingWidth_ == 0) { - Output(0)->CopyFrom(Input(0), &context_); + Output(0)->CopyFrom(Input(0), true /*async*/); if (OutputSize() == 2) { - Output(1)->CopyFrom(Input(1), &context_); + Output(1)->CopyFrom(Input(1), true /*async*/); } return true; } diff --git a/caffe2/operators/slice_op.cu b/caffe2/operators/slice_op.cu index 8ddb204ebd5..7d888f2408e 100644 --- a/caffe2/operators/slice_op.cu +++ b/caffe2/operators/slice_op.cu @@ -123,9 +123,9 @@ bool SliceImplGpu( } if (dim == -1) { if (!backward) { - output->CopyFrom(data, context); + output->CopyFrom(data, true /*async*/); } else { - gdata->CopyFrom(*go, context); + gdata->CopyFrom(*go, true /*async*/); } return true; } diff --git a/caffe2/operators/slice_op.h b/caffe2/operators/slice_op.h index 2e07bebc7d7..eb9193f672a 100644 --- a/caffe2/operators/slice_op.h +++ b/caffe2/operators/slice_op.h @@ -85,9 +85,9 @@ bool SliceImpl( } if (dim == -1) { if (!backward) { - output->CopyFrom(data, context); + output->CopyFrom(data, true /*async*/); } else { - gdata->CopyFrom(*go, context); + gdata->CopyFrom(*go, true /*async*/); } return true; } diff --git a/caffe2/operators/stop_gradient.h b/caffe2/operators/stop_gradient.h index e05cd1100e9..68bbad6dabd 100644 --- a/caffe2/operators/stop_gradient.h +++ b/caffe2/operators/stop_gradient.h @@ -14,7 +14,7 @@ class StopGradientOp : public Operator { const auto& in = Input(0); auto* out = Output(0); if (out != &in) { - out->CopyFrom(in, &context_); + out->CopyFrom(in, true /*async*/); } return true; } diff --git a/caffe2/operators/utility_ops.cu b/caffe2/operators/utility_ops.cu index 868e8498303..0d9bb323f07 100644 --- a/caffe2/operators/utility_ops.cu +++ b/caffe2/operators/utility_ops.cu @@ -130,7 +130,7 @@ bool NanCheckOp::RunOnDevice() { // This op should act as an identity matrix if we don't find any NaNs/infs. // Copy over the data if we are not doing this in-place. if (&X != Y) { - Y->CopyFrom(X, &context_); + Y->CopyFrom(X, true /*async*/); } return true; } diff --git a/caffe2/operators/utility_ops.h b/caffe2/operators/utility_ops.h index 09f39d8d4ff..0e03bfbfa05 100644 --- a/caffe2/operators/utility_ops.h +++ b/caffe2/operators/utility_ops.h @@ -196,7 +196,7 @@ class EnsureDenseOp final : public Operator { // allow the output to be copied from the input if (&input != output) { output->ResizeLike(input); - output->CopyFrom(input, &context_); + output->CopyFrom(input, true /*async*/); } return true; } @@ -257,7 +257,7 @@ class SumOp : public Operator { auto& input0 = Input(0); auto* output = Output(0); if (InputSize() == 1) { - output->CopyFrom(input0, &context_); + output->CopyFrom(input0, true /*async*/); return true; } output->ResizeLike(input0); diff --git a/caffe2/queue/queue_ops.h b/caffe2/queue/queue_ops.h index d5681e542f9..10500494685 100644 --- a/caffe2/queue/queue_ops.h +++ b/caffe2/queue/queue_ops.h @@ -160,7 +160,7 @@ class SafeDequeueBlobsOp final : public Operator { size, " total columns"); - out->Extend(in.sizes()[0], kTensorGrowthPct, &context_); + out->Extend(in.sizes()[0], kTensorGrowthPct); auto* dst = (char*)out->raw_mutable_data() + oldSize * in.dtype().itemsize(); context_.template CopyItems( diff --git a/caffe2/video/video_input_op.h b/caffe2/video/video_input_op.h index b2dffc2e412..a7855f63337 100644 --- a/caffe2/video/video_input_op.h +++ b/caffe2/video/video_input_op.h @@ -808,14 +808,17 @@ bool VideoInputOp::Prefetch() { // prefetch function as well. if (!std::is_same::value) { if (get_rgb_) { - prefetched_clip_rgb_on_device_.CopyFrom(prefetched_clip_rgb_, &context_); + prefetched_clip_rgb_on_device_.CopyFrom( + prefetched_clip_rgb_, true /*async*/); } if (get_optical_flow_) { - prefetched_clip_of_on_device_.CopyFrom(prefetched_clip_of_, &context_); + prefetched_clip_of_on_device_.CopyFrom( + prefetched_clip_of_, true /*async*/); } - prefetched_label_on_device_.CopyFrom(prefetched_label_, &context_); + prefetched_label_on_device_.CopyFrom(prefetched_label_, true /*async*/); if (get_video_id_) { - prefetched_video_id_on_device_.CopyFrom(prefetched_video_id_, &context_); + prefetched_video_id_on_device_.CopyFrom( + prefetched_video_id_, true /*async*/); } } return true; @@ -828,34 +831,34 @@ bool VideoInputOp::CopyPrefetched() { auto* clip_rgb_output = OperatorBase::Output(index++, Context::GetDeviceType()); if (std::is_same::value) { - clip_rgb_output->CopyFrom(prefetched_clip_rgb_, &context_); + clip_rgb_output->CopyFrom(prefetched_clip_rgb_, true /*async*/); } else { - clip_rgb_output->CopyFrom(prefetched_clip_rgb_on_device_, &context_); + clip_rgb_output->CopyFrom(prefetched_clip_rgb_on_device_, true /*async*/); } } if (get_optical_flow_) { auto* clip_of_output = OperatorBase::Output(index++, Context::GetDeviceType()); if (std::is_same::value) { - clip_of_output->CopyFrom(prefetched_clip_of_, &context_); + clip_of_output->CopyFrom(prefetched_clip_of_, true /*async*/); } else { - clip_of_output->CopyFrom(prefetched_clip_of_on_device_, &context_); + clip_of_output->CopyFrom(prefetched_clip_of_on_device_, true /*async*/); } } auto* label_output = OperatorBase::Output(index++, Context::GetDeviceType()); if (std::is_same::value) { - label_output->CopyFrom(prefetched_label_, &context_); + label_output->CopyFrom(prefetched_label_, true /*async*/); } else { - label_output->CopyFrom(prefetched_label_on_device_, &context_); + label_output->CopyFrom(prefetched_label_on_device_, true /*async*/); } if (get_video_id_) { auto* video_id_output = OperatorBase::Output(index, Context::GetDeviceType()); if (std::is_same::value) { - video_id_output->CopyFrom(prefetched_video_id_, &context_); + video_id_output->CopyFrom(prefetched_video_id_, true /*async*/); } else { - video_id_output->CopyFrom(prefetched_video_id_on_device_, &context_); + video_id_output->CopyFrom(prefetched_video_id_on_device_, true /*async*/); } } return true;