From d6f3c88418db2d5958a42d1bbac9de2bb29ab54b Mon Sep 17 00:00:00 2001 From: Chao Li Date: Thu, 16 Aug 2018 11:15:07 -0700 Subject: [PATCH] Revert D9076734: Split storage from tensor Differential Revision: D9076734 Original commit changeset: ea9e1094ecf8 fbshipit-source-id: 3fa9b65b7265fce6207d9e1d9ef4707dbb29704b --- caffe2/core/storage.h | 120 ------------------ caffe2/core/tensor.h | 289 +++++++++++++++++++----------------------- 2 files changed, 131 insertions(+), 278 deletions(-) delete mode 100644 caffe2/core/storage.h diff --git a/caffe2/core/storage.h b/caffe2/core/storage.h deleted file mode 100644 index aeebad4dbdb..00000000000 --- a/caffe2/core/storage.h +++ /dev/null @@ -1,120 +0,0 @@ -#ifndef CAFFE2_CORE_STORAGE_H_ -#define CAFFE2_CORE_STORAGE_H_ - -#include -#include -#include -#include -#include -#include -#include - -#include "caffe2/core/allocator.h" -#include "caffe2/core/common.h" -#include "caffe2/core/context.h" -#include "caffe2/core/flags.h" -#include "caffe2/core/logging.h" -#include "caffe2/core/typeid.h" - -namespace caffe2 { - -using DataType = TypeMeta; - -class StorageImpl; -using Storage = std::shared_ptr; - -class StorageImpl { - public: - StorageImpl() = delete; - StorageImpl(const StorageImpl&) = delete; - StorageImpl& operator=(const StorageImpl&) = delete; - - explicit StorageImpl(DeviceType device_type) : device_type_(device_type) {} - StorageImpl(DeviceType device_type, TypeMeta data_type) - : data_type_(data_type), device_type_(device_type) {} - - void reset() { - data_ptr_.reset(); - capacity_ = 0; - } - - template - inline bool IsType() const { - return data_type_.Match(); - } - - const void* data_ptr() const { - return data_ptr_.get(); - } - - void* data_ptr() { - return data_ptr_.get(); - } - - const DataType& dtype() const { - return data_type_; - } - - size_t capacity() const { - return capacity_; - } - - int64_t numel() const { - return capacity_ / itemsize(); - } - - inline void set_device_type(DeviceType device_type) { - device_type_ = device_type; - } - - inline DeviceType device_type() const { - return device_type_; - } - - inline size_t itemsize() const { - return data_type_.itemsize(); - } - - // Rule of Five - StorageImpl(StorageImpl&&) = default; - ~StorageImpl() = default; - StorageImpl& operator=(StorageImpl&&) = default; - - protected: - template - void ShareExternalPointer( - void* src, - const DataType& data_type, - size_t capacity = 0, - Deleter d = nullptr) { - // Check if the deleter is a MemoryDeleter and is a simple nullptr. - if (std::is_same::value && - reinterpret_cast(&d)[0] == nullptr) { - // Use aliasing constructor trick to avoid calling the destructor. - data_ptr_ = std::shared_ptr(std::shared_ptr(), src); - } else { - data_ptr_.reset(src, d); - } - // Sets capacity. If not specified, we will implicitly assume that - // the capacity is the current size. - if (capacity) { - capacity_ = capacity; - } - } - - // TODO: changed to DataPtr in Aten when shared folder - // is ready - using DataPtr = std::shared_ptr; - int64_t capacity_ = 0; - DataType data_type_; - DataPtr data_ptr_; - // allocator_ takes precedence over StaticContext from device_type_ - // Allocator* allocator_; - DeviceType device_type_ = CPU; - - friend class Tensor; -}; - -} // namespace caffe2 - -#endif // CAFFE2_CORE_STORAGE_H_ diff --git a/caffe2/core/tensor.h b/caffe2/core/tensor.h index ad39ab625cf..2e011efb55c 100644 --- a/caffe2/core/tensor.h +++ b/caffe2/core/tensor.h @@ -1,7 +1,19 @@ #ifndef CAFFE2_CORE_TENSOR_H_ #define CAFFE2_CORE_TENSOR_H_ -#include "caffe2/core/storage.h" +#include +#include +#include +#include +#include +#include +#include + +#include "caffe2/core/common.h" +#include "caffe2/core/flags.h" +#include "caffe2/core/context.h" +#include "caffe2/core/typeid.h" +#include "caffe2/core/logging.h" // A global boolean variable to control whether we free memory when a Tensor // is shrinked to a smaller size. As a result, a Tensor is always going to @@ -80,8 +92,7 @@ inline int canonical_axis_index_(int axis_index, int ndims) { class Tensor { public: Tensor() = delete; - explicit Tensor(DeviceType device_type) - : storage_(std::make_shared(device_type)) {} + explicit Tensor(DeviceType type) : device_type_(type) {} /** * @brief Creates a tensor of the given dimension. @@ -89,27 +100,20 @@ class Tensor { * Note that the actual data allocation is not going to be carried out until * the first time mutable_data() is called. */ - // TODO: here, we create a Storage - // and immediately discard it in Resize() since - // reset_tensor will be true and FreeMemory will be called, - // we might want to avoid creating Storage twice? - explicit Tensor(const vector& dims, DeviceType device_type) - : storage_(std::make_shared(device_type)) { + explicit Tensor(const vector& dims, DeviceType type) + : device_type_(type) { Resize(dims); } - explicit Tensor(const vector& dims, DeviceType device_type) - : storage_(std::make_shared(device_type)) { + explicit Tensor(const vector& dims, DeviceType type) + : device_type_(type) { Resize(dims); } /* Now we require that context_for_copy has the same device type as src since * template is removed */ - Tensor( - const Tensor& src, - BaseContext* context_for_copy, - DeviceType device_type) - : storage_(std::make_shared(device_type)) { + Tensor(const Tensor& src, BaseContext* context_for_copy, DeviceType type) + : device_type_(type) { CopyFrom(src, context_for_copy); } @@ -117,8 +121,7 @@ class Tensor { * @brief: Create a Tensor of DeviceType `type` and initialize it with * src Tensor */ - Tensor(const Tensor& src, DeviceType device_type) - : storage_(std::make_shared(device_type)) { + Tensor(const Tensor& src, DeviceType type) : device_type_(type) { CopyFrom(src); } @@ -131,13 +134,11 @@ class Tensor { const vector& dims, const vector& values, BaseContext* context) - : storage_(std::make_shared( - context->GetDevicetype(), - TypeMeta::Make())) { + : meta_(TypeMeta::Make()) { Resize(dims); CAFFE_ENFORCE_EQ_WITH_CALLER(values.size(), size_); - context->CopyItemsFromCPU( - storage_->dtype(), size_, values.data(), mutable_data()); + device_type_ = context->GetDevicetype(); + context->CopyItemsFromCPU(meta_, size_, values.data(), mutable_data()); } /** @@ -147,13 +148,10 @@ class Tensor { template < typename T, typename = typename std::enable_if::value>::type> - Tensor(const T& value, BaseContext* context) - : storage_(std::make_shared( - context->GetDevicetype(), - TypeMeta::Make())) { + Tensor(const T& value, BaseContext* context) : meta_(TypeMeta::Make()) { Resize(vector{}); - context->CopyItemsFromCPU( - storage_->dtype(), size_, &value, mutable_data()); + device_type_ = context->GetDevicetype(); + context->CopyItemsFromCPU(meta_, size_, &value, mutable_data()); } /* @@ -161,7 +159,7 @@ class Tensor { * context pointer in tensor, which indicates the type of the tensor. */ BaseStaticContext* GetStaticContext() const { - return GET_STATIC_CONTEXT(GetDeviceType()); + return GET_STATIC_CONTEXT(device_type_); } /* @brief @@ -176,9 +174,8 @@ class Tensor { } DeviceType GetDeviceType() const { - return storage_->device_type(); + return device_type_; } - /** * @brief Copies the data from a source tensor, with a contex provided to * carry out the underlying memcpy operation. @@ -187,23 +184,25 @@ class Tensor { if ((void*)&src == (void*)this) { return; } - storage_->data_type_ = src.storage()->dtype(); + meta_ = src.meta(); if (src.size() == -1) { dims_.clear(); size_ = -1; - storage_->reset(); + data_.reset(); + capacity_ = 0; + reserved_ = false; return; } Resize(src.dims()); if (size() > 0) { - if (storage_->dtype().copy()) { + if (meta_.copy()) { CAFFE_ENFORCE( GetDeviceType() == CPU, "In CopyFrom source and dest tensors must both be CPU for meta copy"); CAFFE_ENFORCE( src.GetDeviceType() == CPU, "In CopyFrom source and dest tensors must both be CPU for meta copy"); - storage_->dtype().copy()(src.raw_data(), raw_mutable_data(), size()); + meta_.copy()(src.raw_data(), raw_mutable_data(), size()); } else { // We'll need to use a non-CPU context to perform the copy if // one of the context is not CPU since only non-CPU context @@ -256,12 +255,9 @@ class Tensor { CAFFE_ENFORCE_GE_WITH_CALLER(dims_.size(), 1); CAFFE_ENFORCE_GE_WITH_CALLER( num, 0, "`num` must be non-negative for Extend"); - CAFFE_ENFORCE( - storage_.use_count() == 1, - "Can't call Extend on shared storage, please call Resize instead"); auto newDims = dims_; newDims[0] += num; - if (!storage_->data_ptr()) { + if (!data_) { Resize(newDims); return; } @@ -270,7 +266,7 @@ class Tensor { newDims.end(), static_cast(1), std::multiplies()); - if (newSize * storage_->itemsize() <= storage_->capacity()) { + if (newSize * meta_.itemsize() <= capacity_) { dims_ = newDims; size_ = newSize; return; @@ -278,15 +274,14 @@ class Tensor { auto newCapacity = dims_; newCapacity[0] = std::max( newDims[0], std::ceil(dims_[0] * (growthPct + 100) / 100)); - auto oldData = std::move(storage_->data_ptr_); + auto oldData = std::move(data_); auto oldSize = size_; auto oldDims = dims_; Resize(newCapacity); - auto* newData = raw_mutable_data(storage_->dtype()); + auto* newData = raw_mutable_data(meta_); CAFFE_ENFORCE( context != nullptr, "Context must be provided to Extend the tensor"); - context->CopyItemsSameDevice( - storage_->dtype(), oldSize, oldData.get(), newData); + context->CopyItemsSameDevice(meta_, oldSize, oldData.get(), newData); reserved_ = true; dims_ = newDims; size_ = newSize; @@ -303,9 +298,6 @@ class Tensor { CAFFE_ENFORCE_WITH_CALLER( outer_dim <= dims_[0], "New outer dimension must be smaller than current."); - CAFFE_ENFORCE( - storage_.use_count() == 1, - "Can't call ShrinkTo on shared storage, please call Resize instead."); dims_[0] = outer_dim; size_ = std::accumulate( dims_.begin(), @@ -324,9 +316,6 @@ class Tensor { void ReserveSpace(const T& outer_dim) { CAFFE_ENFORCE( size_ != -1, "size should be initialized before calling ReserveSpace"); - CAFFE_ENFORCE( - storage_.use_count() == 1, - "Can't call ReserveSpace on shared storage."); auto newCapacity = dims_; newCapacity[0] = outer_dim; auto newSize = std::accumulate( @@ -334,16 +323,16 @@ class Tensor { newCapacity.end(), static_cast(1), std::multiplies()); - if (newSize * storage_->itemsize() <= storage_->capacity()) { + if (newSize * meta_.itemsize() <= capacity_) { return; } // Old data is discarded - storage_->data_ptr_.reset(); + data_.reset(); auto oldSize = size_; auto oldDims = dims_; Resize(newCapacity); - // Allocate new memory but don't copy over the data - raw_mutable_data(storage_->dtype()); + // Allocate new memory and don't copy over the data + raw_mutable_data(meta_); dims_ = oldDims; size_ = oldSize; reserved_ = true; @@ -368,16 +357,15 @@ class Tensor { if (size_changed) { // If needed, we will free the data. the next mutable_data() call // will create the data storage. + int64_t new_size = size_ * meta_.itemsize(); bool reset_tensor = false; if (reserved_) { - // If tensor is reserved then don't claim its memeory unless capacity() + // If tensor is reserved then don't claim its memeory unless capacity_ // is smaller than new size - reset_tensor = storage_->capacity() < size_ * storage_->itemsize(); + reset_tensor = capacity_ < new_size; } else { - reset_tensor = storage_->capacity() < size_ * storage_->itemsize() || - !FLAGS_caffe2_keep_on_shrink || - storage_->capacity() - size_ * storage_->itemsize() > - FLAGS_caffe2_max_keep_on_shrink_memory; + reset_tensor = capacity_ < new_size || !FLAGS_caffe2_keep_on_shrink || + capacity_ - new_size > FLAGS_caffe2_max_keep_on_shrink_memory; } if (reset_tensor) { @@ -429,9 +417,12 @@ class Tensor { * allocation. */ inline void FreeMemory() { - // We'll detach from the old Storage and create a new one - storage_ = std::make_shared( - storage_->device_type(), storage_->dtype()); + data_.reset(); + capacity_ = 0; + // If reserved is true and we changed tensor memory then it is fine + // to switch it to false, if Resize is called from Reserve and it triggers + // FreeMemory() then reserved_ will be set to true at end of ReserveSpace() + reserved_ = false; } /** @@ -441,8 +432,8 @@ class Tensor { */ string DebugString() const { std::stringstream ss; - ss << "A Tensor of item size " << storage_->itemsize() << " and type " - << storage_->dtype().name() << " and dimension ("; + ss << "A Tensor of item size " << itemsize() << " and type " + << meta_.name() << " and dimension ("; for (int d : dims_) { ss << d << ","; } @@ -453,7 +444,11 @@ class Tensor { void swap(Tensor& other) noexcept { std::swap(dims_, other.dims_); std::swap(size_, other.size_); - std::swap(storage_, other.storage_); + std::swap(meta_, other.meta_); + std::swap(data_, other.data_); + std::swap(capacity_, other.capacity_); + std::swap(reserved_, other.reserved_); + std::swap(device_type_, other.device_type_); } /** @@ -469,10 +464,7 @@ class Tensor { * The source tensor should already have its data allocated. */ void ShareData(const Tensor& src) { - CAFFE_ENFORCE( - storage_.use_count() == 1, - "Can't share data if underlying storage used by more than one tensor"); - storage_->data_type_ = src.storage()->data_type_; + meta_ = src.meta(); CAFFE_ENFORCE_EQ_WITH_CALLER( src.size_, size_, @@ -481,11 +473,11 @@ class Tensor { // in which case ShareData() doesn't make much sense since we don't really // know what to share yet. CAFFE_ENFORCE_WITH_CALLER( - src.storage()->data_ptr() || src.size_ == 0, + src.data_.get() || src.size_ == 0, "Source tensor has no content and has size > 0"); // Finally, do sharing. - storage_->data_ptr_ = src.storage()->data_ptr_; - storage_->capacity_ = src.storage()->capacity_; + data_ = src.data_; + capacity_ = src.capacity_; } /** @@ -497,36 +489,40 @@ class Tensor { * using it. If a Deleter object is passed in, when this tensor is reallocated * or freed, the deleter function is going to be called. */ - // TODO: Change to ShareExternalStorage template void ShareExternalPointer(T* src, size_t capacity = 0, Deleter d = nullptr) { ShareExternalPointer(src, TypeMeta::Make(), capacity, d); - // Sets capacity. If not specified, we will implicitly assume that - // the capacity is the current size. - if (!capacity) { - capacity = size_ * storage_->itemsize(); - } - storage_->capacity_ = capacity; } template void ShareExternalPointer( void* src, - const TypeMeta& data_type, + const TypeMeta& meta, size_t capacity = 0, Deleter d = nullptr) { - CAFFE_ENFORCE( - storage_.use_count() == 1, - "Can't share external pointer if underlying storage used by more than one tensor"); - storage_->data_type_ = data_type; + meta_ = meta; CAFFE_ENFORCE_WITH_CALLER( - storage_->data_type_.id() != TypeIdentifier::uninitialized(), + meta_.id() != TypeIdentifier::uninitialized(), "To share with a raw external pointer you need to have meta " "already set."); CAFFE_ENFORCE_WITH_CALLER( size_ >= 0, "To share data with a raw pointer, you need to set shape first."); - storage_->ShareExternalPointer(src, data_type, capacity, d); + // Check if the deleter is a MemoryDeleter and is a simple nullptr. + if (std::is_same::value && + reinterpret_cast(&d)[0] == nullptr) { + // Use aliasing constructor trick to avoid calling the destructor. + data_ = std::shared_ptr(std::shared_ptr(), src); + } else { + data_.reset(src, d); + } + // Sets capacity. If not specified, we will implicitly assume that + // the capacity is the current size. + if (capacity) { + capacity_ = capacity; + } else { + capacity_ = nbytes(); + } } /** @@ -534,8 +530,8 @@ class Tensor { * or raw_mutable_data() must have been called prior to this function call. */ inline const void* raw_data() const { - CAFFE_ENFORCE_WITH_CALLER(storage_->data_ptr() || size_ == 0); - return storage_->data_ptr(); + CAFFE_ENFORCE_WITH_CALLER(data_.get() || size_ == 0); + return data_.get(); } /** @@ -547,7 +543,7 @@ class Tensor { template inline const T* data() const { CAFFE_ENFORCE_WITH_CALLER( - storage_->data_ptr() || size_ == 0, + data_.get() || size_ == 0, "The tensor is of non-zero shape, but its data is not allocated yet. " "Caffe2 uses a lazy allocation, so you will need to call " "mutable_data() or raw_mutable_data() to actually allocate memory."); @@ -556,8 +552,8 @@ class Tensor { "Tensor type mismatch, caller expects elements to be ", TypeMeta::TypeName(), " while tensor contains ", - storage_->dtype().name()); - return static_cast(storage_->data_ptr()); + meta_.name()); + return static_cast(data_.get()); } /** @@ -573,12 +569,11 @@ class Tensor { */ inline void* raw_mutable_data(const TypeMeta& meta) { // For 0-size tensors it's fine to return any pointer (including nullptr) - if (storage_->dtype() == meta && (storage_->data_ptr() || size_ == 0)) { - return storage_->data_ptr(); + if (meta_ == meta && (data_.get() || size_ == 0)) { + return data_.get(); } else { - bool had_special_dtor = storage_->dtype().dtor() != nullptr; - // TODO: we should create a new Storage here. - storage_->data_type_ = meta; + bool had_special_dtor = meta_.dtor() != nullptr; + meta_ = meta; CAFFE_ENFORCE_WITH_CALLER( size_ >= 0, "Tensor is not initialized. You probably need to call Resize() " @@ -589,33 +584,32 @@ class Tensor { // constructor. if (size_ == 0 || (meta.ctor() == nullptr && !had_special_dtor && - storage_->capacity() >= size_ * storage_->itemsize())) { - return storage_->data_ptr(); + capacity_ >= size_ * meta_.itemsize())) { + return data_.get(); } if (meta.ctor()) { // For types that need placement new, we will call it, as well as // making sure that when the data is freed, it calls the right // destruction procedure. auto size = size_; - auto dtor = storage_->dtype().dtor(); + auto dtor = meta_.dtor(); auto ptr_and_deleter = - GetStaticContext()->New(size_ * storage_->itemsize()); + GetStaticContext()->New(size_ * meta_.itemsize()); auto deleter = ptr_and_deleter.second; - storage_->data_ptr_.reset( + data_.reset( ptr_and_deleter.first, [size, dtor, deleter](void* ptr) -> void { dtor(ptr, size); deleter(ptr); }); - storage_->dtype().ctor()(storage_->data_ptr(), size_); + meta_.ctor()(data_.get(), size_); } else { // For fundamental type, new and delete is easier. auto ptr_and_deleter = - GetStaticContext()->New(size_ * storage_->itemsize()); - storage_->data_ptr_.reset( - ptr_and_deleter.first, ptr_and_deleter.second); + GetStaticContext()->New(size_ * meta_.itemsize()); + data_.reset(ptr_and_deleter.first, ptr_and_deleter.second); } - storage_->capacity_ = size_ * storage_->itemsize(); - return storage_->data_ptr(); + capacity_ = size_ * meta_.itemsize(); + return data_.get(); } } @@ -630,10 +624,10 @@ class Tensor { */ inline void* raw_mutable_data() { CAFFE_ENFORCE_WITH_CALLER( - storage_->dtype().id() != TypeIdentifier::uninitialized(), + meta_.id() != TypeIdentifier::uninitialized(), "Calling raw_mutable_data() without meta, but the current meta is " "of unknown type."); - return raw_mutable_data(storage_->dtype()); + return raw_mutable_data(meta_); } /** @@ -642,60 +636,41 @@ class Tensor { * For fundamental types, we reuse possible existing storage if there * is sufficient capacity. */ - template - inline T* mutable_data() { - if ((size_ == 0 || storage_->data_ptr()) && IsType()) { - return static_cast(storage_->data_ptr()); + template + inline T* mutable_data() { + if ((size_ == 0 || data_.get()) && IsType()) { + return static_cast(data_.get()); + } + // Check it here statically - otherwise TypeMeta would throw the runtime + // error in attempt to invoke TypeMeta::ctor() + static_assert( + std::is_default_constructible::value, + "Tensor can't hold non-default-constructible types"); + return static_cast(raw_mutable_data(TypeMeta::Make())); } - // Check it here statically - otherwise TypeMeta would throw the runtime - // error in attempt to invoke TypeMeta::ctor() - static_assert( - std::is_default_constructible::value, - "Tensor can't hold non-default-constructible types"); - return static_cast(raw_mutable_data(TypeMeta::Make())); - } - /** - * Returns the underlying Stoarge for the Tensor - */ - inline Storage storage() { - return storage_; - } - - inline Storage storage() const { - return storage_; - } /** * Returns the number of dimensions of the data. */ - inline int ndim() const { - return dims_.size(); - } + inline int ndim() const { return dims_.size(); } /** * Returns the size (i.e. the number of items) of the tensor. */ - inline TIndex size() const { - return size_; - } + inline TIndex size() const { return size_; } /** * Return the number of bytes each item takes in the tensor. */ - inline size_t itemsize() const { - return storage_->itemsize(); - } + inline size_t itemsize() const { return meta_.itemsize(); } /** * Returns the total number of bytes of the storage. * * This is equivalent to calling size() * itemsize(). */ - inline size_t nbytes() const { - return size_ * itemsize(); - ; - } + inline size_t nbytes() const { return size_ * meta_.itemsize(); } inline size_t capacity_nbytes() const { - return storage_->capacity(); + return capacity_; } /** * Returns the dimensions of the tensor as a vector. @@ -733,15 +708,11 @@ class Tensor { * Checks if the tensor content is of the given data type. */ template - inline bool IsType() const { - return storage_->IsType(); - } + inline bool IsType() const { return meta_.Match(); } /** * Returns the TypeMeta object associated with the current data type. */ - inline const TypeMeta& meta() const { - return storage_->dtype(); - } + inline const TypeMeta& meta() const { return meta_; } /** * Returns the i-th dimension of the tensor in int. @@ -796,16 +767,18 @@ class Tensor { } protected: - using DimVector = std::vector; - DimVector dims_; // sizes_ - TIndex size_ = -1; // numel_ - // we decide to keep reserved_ and it will + vector dims_; + TIndex size_ = -1; + TypeMeta meta_; + std::shared_ptr data_; + size_t capacity_ = 0; + // we decide to keep reserved and it will // live in Tensor after the split // The logic is that if Extend() or ReserveSpace() were ever called, // then subsequent Resize()s will not free up Storage. bool reserved_ = false; - Storage storage_; - // int64_t storage_offset_; + DeviceType device_type_ = CPU; + // In case of chunk load we store how much data was already loaded private: template <