From d6f3c88418db2d5958a42d1bbac9de2bb29ab54b Mon Sep 17 00:00:00 2001
From: Chao Li <chaoli@fb.com>
Date: Thu, 16 Aug 2018 11:15:07 -0700
Subject: [PATCH] Revert D9076734: Split storage from tensor

Differential Revision:
D9076734

Original commit changeset: ea9e1094ecf8

fbshipit-source-id: 3fa9b65b7265fce6207d9e1d9ef4707dbb29704b
---
 caffe2/core/storage.h | 120 ------------------
 caffe2/core/tensor.h  | 289 +++++++++++++++++++-----------------------
 2 files changed, 131 insertions(+), 278 deletions(-)
 delete mode 100644 caffe2/core/storage.h
diff --git a/caffe2/core/storage.h b/caffe2/core/storage.h
deleted file mode 100644
index aeebad4dbdb..00000000000
--- a/caffe2/core/storage.h
+++ /dev/null
@@ -1,120 +0,0 @@
-#ifndef CAFFE2_CORE_STORAGE_H_
-#define CAFFE2_CORE_STORAGE_H_
-
-#include <cstddef>
-#include <cstdint>
-#include <fstream>
-#include <sstream>
-#include <type_traits>
-#include <typeinfo>
-#include <vector>
-
-#include "caffe2/core/allocator.h"
-#include "caffe2/core/common.h"
-#include "caffe2/core/context.h"
-#include "caffe2/core/flags.h"
-#include "caffe2/core/logging.h"
-#include "caffe2/core/typeid.h"
-
-namespace caffe2 {
-
-using DataType = TypeMeta;
-
-class StorageImpl;
-using Storage = std::shared_ptr<StorageImpl>;
-
-class StorageImpl {
- public:
-  StorageImpl() = delete;
-  StorageImpl(const StorageImpl&) = delete;
-  StorageImpl& operator=(const StorageImpl&) = delete;
-
-  explicit StorageImpl(DeviceType device_type) : device_type_(device_type) {}
-  StorageImpl(DeviceType device_type, TypeMeta data_type)
-      : data_type_(data_type), device_type_(device_type) {}
-
-  void reset() {
-    data_ptr_.reset();
-    capacity_ = 0;
-  }
-
-  template <typename T>
-  inline bool IsType() const {
-    return data_type_.Match<T>();
-  }
-
-  const void* data_ptr() const {
-    return data_ptr_.get();
-  }
-
-  void* data_ptr() {
-    return data_ptr_.get();
-  }
-
-  const DataType& dtype() const {
-    return data_type_;
-  }
-
-  size_t capacity() const {
-    return capacity_;
-  }
-
-  int64_t numel() const {
-    return capacity_ / itemsize();
-  }
-
-  inline void set_device_type(DeviceType device_type) {
-    device_type_ = device_type;
-  }
-
-  inline DeviceType device_type() const {
-    return device_type_;
-  }
-
-  inline size_t itemsize() const {
-    return data_type_.itemsize();
-  }
-
-  // Rule of Five
-  StorageImpl(StorageImpl&&) = default;
-  ~StorageImpl() = default;
-  StorageImpl& operator=(StorageImpl&&) = default;
-
- protected:
-  template <typename Deleter = MemoryDeleter>
-  void ShareExternalPointer(
-      void* src,
-      const DataType& data_type,
-      size_t capacity = 0,
-      Deleter d = nullptr) {
-    // Check if the deleter is a MemoryDeleter and is a simple nullptr.
-    if (std::is_same<MemoryDeleter, Deleter>::value &&
-        reinterpret_cast<MemoryDeleter*>(&d)[0] == nullptr) {
-      // Use aliasing constructor trick to avoid calling the destructor.
-      data_ptr_ = std::shared_ptr<void>(std::shared_ptr<void>(), src);
-    } else {
-      data_ptr_.reset(src, d);
-    }
-    // Sets capacity. If not specified, we will implicitly assume that
-    // the capacity is the current size.
-    if (capacity) {
-      capacity_ = capacity;
-    }
-  }
-
-  // TODO: changed to DataPtr in Aten when shared folder
-  // is ready
-  using DataPtr = std::shared_ptr<void>;
-  int64_t capacity_ = 0;
-  DataType data_type_;
-  DataPtr data_ptr_;
-  // allocator_ takes precedence over StaticContext from device_type_
-  // Allocator* allocator_;
-  DeviceType device_type_ = CPU;
-
-  friend class Tensor;
-};
-
-} // namespace caffe2
-
-#endif // CAFFE2_CORE_STORAGE_H_
diff --git a/caffe2/core/tensor.h b/caffe2/core/tensor.h
index ad39ab625cf..2e011efb55c 100644
--- a/caffe2/core/tensor.h
+++ b/caffe2/core/tensor.h
@@ -1,7 +1,19 @@
 #ifndef CAFFE2_CORE_TENSOR_H_
 #define CAFFE2_CORE_TENSOR_H_
 
-#include "caffe2/core/storage.h"
+#include <cstddef>
+#include <cstdint>
+#include <fstream>
+#include <sstream>
+#include <type_traits>
+#include <typeinfo>
+#include <vector>
+
+#include "caffe2/core/common.h"
+#include "caffe2/core/flags.h"
+#include "caffe2/core/context.h"
+#include "caffe2/core/typeid.h"
+#include "caffe2/core/logging.h"
 
 // A global boolean variable to control whether we free memory when a Tensor
 // is shrinked to a smaller size. As a result, a Tensor is always going to
@@ -80,8 +92,7 @@ inline int canonical_axis_index_(int axis_index, int ndims) {
 class Tensor {
  public:
   Tensor() = delete;
-  explicit Tensor(DeviceType device_type)
-      : storage_(std::make_shared<StorageImpl>(device_type)) {}
+  explicit Tensor(DeviceType type) : device_type_(type) {}
 
   /**
    * @brief Creates a tensor of the given dimension.
@@ -89,27 +100,20 @@ class Tensor {
    * Note that the actual data allocation is not going to be carried out until
    * the first time mutable_data() is called.
    */
-  // TODO: here, we create a Storage
-  // and immediately discard it in Resize() since
-  // reset_tensor will be true and FreeMemory will be called,
-  // we might want to avoid creating Storage twice?
-  explicit Tensor(const vector<TIndex>& dims, DeviceType device_type)
-      : storage_(std::make_shared<StorageImpl>(device_type)) {
+  explicit Tensor(const vector<TIndex>& dims, DeviceType type)
+      : device_type_(type) {
     Resize(dims);
   }
-  explicit Tensor(const vector<int>& dims, DeviceType device_type)
-      : storage_(std::make_shared<StorageImpl>(device_type)) {
+  explicit Tensor(const vector<int>& dims, DeviceType type)
+      : device_type_(type) {
     Resize(dims);
   }
 
   /* Now we require that context_for_copy has the same device type as src since
    * template is removed
    */
-  Tensor(
-      const Tensor& src,
-      BaseContext* context_for_copy,
-      DeviceType device_type)
-      : storage_(std::make_shared<StorageImpl>(device_type)) {
+  Tensor(const Tensor& src, BaseContext* context_for_copy, DeviceType type)
+      : device_type_(type) {
     CopyFrom(src, context_for_copy);
   }
 
@@ -117,8 +121,7 @@ class Tensor {
    * @brief: Create a Tensor of DeviceType `type` and initialize it with
    * src Tensor
    */
-  Tensor(const Tensor& src, DeviceType device_type)
-      : storage_(std::make_shared<StorageImpl>(device_type)) {
+  Tensor(const Tensor& src, DeviceType type) : device_type_(type) {
     CopyFrom(src);
   }
 
@@ -131,13 +134,11 @@ class Tensor {
       const vector<TIndex>& dims,
       const vector<T>& values,
       BaseContext* context)
-      : storage_(std::make_shared<StorageImpl>(
-            context->GetDevicetype(),
-            TypeMeta::Make<T>())) {
+      : meta_(TypeMeta::Make<T>()) {
     Resize(dims);
     CAFFE_ENFORCE_EQ_WITH_CALLER(values.size(), size_);
-    context->CopyItemsFromCPU(
-        storage_->dtype(), size_, values.data(), mutable_data<T>());
+    device_type_ = context->GetDevicetype();
+    context->CopyItemsFromCPU(meta_, size_, values.data(), mutable_data<T>());
   }
 
   /**
@@ -147,13 +148,10 @@ class Tensor {
   template <
       typename T,
       typename = typename std::enable_if<std::is_scalar<T>::value>::type>
-  Tensor(const T& value, BaseContext* context)
-      : storage_(std::make_shared<StorageImpl>(
-            context->GetDevicetype(),
-            TypeMeta::Make<T>())) {
+  Tensor(const T& value, BaseContext* context) : meta_(TypeMeta::Make<T>()) {
     Resize(vector<TIndex>{});
-    context->CopyItemsFromCPU(
-        storage_->dtype(), size_, &value, mutable_data<T>());
+    device_type_ = context->GetDevicetype();
+    context->CopyItemsFromCPU(meta_, size_, &value, mutable_data<T>());
   }
 
   /*
@@ -161,7 +159,7 @@ class Tensor {
    * context pointer in tensor, which indicates the type of the tensor.
    */
   BaseStaticContext* GetStaticContext() const {
-    return GET_STATIC_CONTEXT(GetDeviceType());
+    return GET_STATIC_CONTEXT(device_type_);
   }
 
   /* @brief
@@ -176,9 +174,8 @@ class Tensor {
   }
 
   DeviceType GetDeviceType() const {
-    return storage_->device_type();
+    return device_type_;
   }
-
   /**
    * @brief Copies the data from a source tensor, with a contex provided to
    * carry out the underlying memcpy operation.
@@ -187,23 +184,25 @@ class Tensor {
     if ((void*)&src == (void*)this) {
       return;
     }
-    storage_->data_type_ = src.storage()->dtype();
+    meta_ = src.meta();
     if (src.size() == -1) {
       dims_.clear();
       size_ = -1;
-      storage_->reset();
+      data_.reset();
+      capacity_ = 0;
+      reserved_ = false;
       return;
     }
     Resize(src.dims());
     if (size() > 0) {
-      if (storage_->dtype().copy()) {
+      if (meta_.copy()) {
         CAFFE_ENFORCE(
             GetDeviceType() == CPU,
             "In CopyFrom source and dest tensors must both be CPU for meta copy");
         CAFFE_ENFORCE(
             src.GetDeviceType() == CPU,
             "In CopyFrom source and dest tensors must both be CPU for meta copy");
-        storage_->dtype().copy()(src.raw_data(), raw_mutable_data(), size());
+        meta_.copy()(src.raw_data(), raw_mutable_data(), size());
       } else {
         // We'll need to use a non-CPU context to perform the copy if
         // one of the context is not CPU since only non-CPU context
@@ -256,12 +255,9 @@ class Tensor {
     CAFFE_ENFORCE_GE_WITH_CALLER(dims_.size(), 1);
     CAFFE_ENFORCE_GE_WITH_CALLER(
         num, 0, "`num` must be non-negative for Extend");
-    CAFFE_ENFORCE(
-        storage_.use_count() == 1,
-        "Can't call Extend on shared storage, please call Resize instead");
     auto newDims = dims_;
     newDims[0] += num;
-    if (!storage_->data_ptr()) {
+    if (!data_) {
       Resize(newDims);
       return;
     }
@@ -270,7 +266,7 @@ class Tensor {
         newDims.end(),
         static_cast<TIndex>(1),
         std::multiplies<TIndex>());
-    if (newSize * storage_->itemsize() <= storage_->capacity()) {
+    if (newSize * meta_.itemsize() <= capacity_) {
       dims_ = newDims;
       size_ = newSize;
       return;
@@ -278,15 +274,14 @@ class Tensor {
     auto newCapacity = dims_;
     newCapacity[0] = std::max<size_t>(
         newDims[0], std::ceil(dims_[0] * (growthPct + 100) / 100));
-    auto oldData = std::move(storage_->data_ptr_);
+    auto oldData = std::move(data_);
     auto oldSize = size_;
     auto oldDims = dims_;
     Resize(newCapacity);
-    auto* newData = raw_mutable_data(storage_->dtype());
+    auto* newData = raw_mutable_data(meta_);
     CAFFE_ENFORCE(
         context != nullptr, "Context must be provided to Extend the tensor");
-    context->CopyItemsSameDevice(
-        storage_->dtype(), oldSize, oldData.get(), newData);
+    context->CopyItemsSameDevice(meta_, oldSize, oldData.get(), newData);
     reserved_ = true;
     dims_ = newDims;
     size_ = newSize;
@@ -303,9 +298,6 @@ class Tensor {
     CAFFE_ENFORCE_WITH_CALLER(
         outer_dim <= dims_[0],
         "New outer dimension must be smaller than current.");
-    CAFFE_ENFORCE(
-        storage_.use_count() == 1,
-        "Can't call ShrinkTo on shared storage, please call Resize instead.");
     dims_[0] = outer_dim;
     size_ = std::accumulate(
         dims_.begin(),
@@ -324,9 +316,6 @@ class Tensor {
   void ReserveSpace(const T& outer_dim) {
     CAFFE_ENFORCE(
         size_ != -1, "size should be initialized before calling ReserveSpace");
-    CAFFE_ENFORCE(
-        storage_.use_count() == 1,
-        "Can't call ReserveSpace on shared storage.");
     auto newCapacity = dims_;
     newCapacity[0] = outer_dim;
     auto newSize = std::accumulate(
@@ -334,16 +323,16 @@ class Tensor {
         newCapacity.end(),
         static_cast<TIndex>(1),
         std::multiplies<TIndex>());
-    if (newSize * storage_->itemsize() <= storage_->capacity()) {
+    if (newSize * meta_.itemsize() <= capacity_) {
       return;
     }
     // Old data is discarded
-    storage_->data_ptr_.reset();
+    data_.reset();
     auto oldSize = size_;
     auto oldDims = dims_;
     Resize(newCapacity);
-    // Allocate new memory but don't copy over the data
-    raw_mutable_data(storage_->dtype());
+    // Allocate new memory and don't copy over the data
+    raw_mutable_data(meta_);
     dims_ = oldDims;
     size_ = oldSize;
     reserved_ = true;
@@ -368,16 +357,15 @@ class Tensor {
     if (size_changed) {
       // If needed, we will free the data. the next mutable_data() call
       // will create the data storage.
+      int64_t new_size = size_ * meta_.itemsize();
       bool reset_tensor = false;
       if (reserved_) {
-        // If tensor is reserved then don't claim its memeory unless capacity()
+        // If tensor is reserved then don't claim its memeory unless capacity_
         // is smaller than new size
-        reset_tensor = storage_->capacity() < size_ * storage_->itemsize();
+        reset_tensor = capacity_ < new_size;
       } else {
-        reset_tensor = storage_->capacity() < size_ * storage_->itemsize() ||
-            !FLAGS_caffe2_keep_on_shrink ||
-            storage_->capacity() - size_ * storage_->itemsize() >
-                FLAGS_caffe2_max_keep_on_shrink_memory;
+        reset_tensor = capacity_ < new_size || !FLAGS_caffe2_keep_on_shrink ||
+            capacity_ - new_size > FLAGS_caffe2_max_keep_on_shrink_memory;
       }
 
       if (reset_tensor) {
@@ -429,9 +417,12 @@ class Tensor {
    * allocation.
    */
   inline void FreeMemory() {
-    // We'll detach from the old Storage and create a new one
-    storage_ = std::make_shared<StorageImpl>(
-        storage_->device_type(), storage_->dtype());
+    data_.reset();
+    capacity_ = 0;
+    // If reserved is true and we changed tensor memory then it is fine
+    // to switch it to false, if Resize is called from Reserve and it triggers
+    // FreeMemory() then reserved_ will be set to true at end of ReserveSpace()
+    reserved_ = false;
   }
 
   /**
@@ -441,8 +432,8 @@ class Tensor {
    */
   string DebugString() const {
     std::stringstream ss;
-    ss << "A Tensor of item size " << storage_->itemsize() << " and type "
-       << storage_->dtype().name() << " and dimension (";
+    ss << "A Tensor of item size " << itemsize() << " and type "
+       << meta_.name() << " and dimension (";
     for (int d : dims_) {
       ss << d << ",";
     }
@@ -453,7 +444,11 @@ class Tensor {
   void swap(Tensor& other) noexcept {
     std::swap(dims_, other.dims_);
     std::swap(size_, other.size_);
-    std::swap(storage_, other.storage_);
+    std::swap(meta_, other.meta_);
+    std::swap(data_, other.data_);
+    std::swap(capacity_, other.capacity_);
+    std::swap(reserved_, other.reserved_);
+    std::swap(device_type_, other.device_type_);
   }
 
   /**
@@ -469,10 +464,7 @@ class Tensor {
    * The source tensor should already have its data allocated.
    */
   void ShareData(const Tensor& src) {
-    CAFFE_ENFORCE(
-        storage_.use_count() == 1,
-        "Can't share data if underlying storage used by more than one tensor");
-    storage_->data_type_ = src.storage()->data_type_;
+    meta_ = src.meta();
     CAFFE_ENFORCE_EQ_WITH_CALLER(
         src.size_,
         size_,
@@ -481,11 +473,11 @@ class Tensor {
     // in which case ShareData() doesn't make much sense since we don't really
     // know what to share yet.
     CAFFE_ENFORCE_WITH_CALLER(
-        src.storage()->data_ptr() || src.size_ == 0,
+        src.data_.get() || src.size_ == 0,
         "Source tensor has no content and has size > 0");
     // Finally, do sharing.
-    storage_->data_ptr_ = src.storage()->data_ptr_;
-    storage_->capacity_ = src.storage()->capacity_;
+    data_ = src.data_;
+    capacity_ = src.capacity_;
   }
 
   /**
@@ -497,36 +489,40 @@ class Tensor {
    * using it. If a Deleter object is passed in, when this tensor is reallocated
    * or freed, the deleter function is going to be called.
    */
-  // TODO: Change to ShareExternalStorage
   template <typename T, typename Deleter = MemoryDeleter>
   void ShareExternalPointer(T* src, size_t capacity = 0, Deleter d = nullptr) {
     ShareExternalPointer(src, TypeMeta::Make<T>(), capacity, d);
-    // Sets capacity. If not specified, we will implicitly assume that
-    // the capacity is the current size.
-    if (!capacity) {
-      capacity = size_ * storage_->itemsize();
-    }
-    storage_->capacity_ = capacity;
   }
 
   template <typename Deleter = MemoryDeleter>
   void ShareExternalPointer(
       void* src,
-      const TypeMeta& data_type,
+      const TypeMeta& meta,
       size_t capacity = 0,
       Deleter d = nullptr) {
-    CAFFE_ENFORCE(
-        storage_.use_count() == 1,
-        "Can't share external pointer if underlying storage used by more than one tensor");
-    storage_->data_type_ = data_type;
+    meta_ = meta;
     CAFFE_ENFORCE_WITH_CALLER(
-        storage_->data_type_.id() != TypeIdentifier::uninitialized(),
+        meta_.id() != TypeIdentifier::uninitialized(),
         "To share with a raw external pointer you need to have meta "
         "already set.");
     CAFFE_ENFORCE_WITH_CALLER(
         size_ >= 0,
         "To share data with a raw pointer, you need to set shape first.");
-    storage_->ShareExternalPointer(src, data_type, capacity, d);
+    // Check if the deleter is a MemoryDeleter and is a simple nullptr.
+    if (std::is_same<MemoryDeleter, Deleter>::value &&
+        reinterpret_cast<MemoryDeleter*>(&d)[0] == nullptr) {
+      // Use aliasing constructor trick to avoid calling the destructor.
+      data_ = std::shared_ptr<void>(std::shared_ptr<void>(), src);
+    } else {
+      data_.reset(src, d);
+    }
+    // Sets capacity. If not specified, we will implicitly assume that
+    // the capacity is the current size.
+    if (capacity) {
+      capacity_ = capacity;
+    } else {
+      capacity_ = nbytes();
+    }
   }
 
   /**
@@ -534,8 +530,8 @@ class Tensor {
    * or raw_mutable_data() must have been called prior to this function call.
    */
   inline const void* raw_data() const {
-    CAFFE_ENFORCE_WITH_CALLER(storage_->data_ptr() || size_ == 0);
-    return storage_->data_ptr();
+    CAFFE_ENFORCE_WITH_CALLER(data_.get() || size_ == 0);
+    return data_.get();
   }
 
   /**
@@ -547,7 +543,7 @@ class Tensor {
   template <typename T>
   inline const T* data() const {
     CAFFE_ENFORCE_WITH_CALLER(
-        storage_->data_ptr() || size_ == 0,
+        data_.get() || size_ == 0,
         "The tensor is of non-zero shape, but its data is not allocated yet. "
         "Caffe2 uses a lazy allocation, so you will need to call "
         "mutable_data() or raw_mutable_data() to actually allocate memory.");
@@ -556,8 +552,8 @@ class Tensor {
         "Tensor type mismatch, caller expects elements to be ",
         TypeMeta::TypeName<T>(),
         " while tensor contains ",
-        storage_->dtype().name());
-    return static_cast<T*>(storage_->data_ptr());
+        meta_.name());
+    return static_cast<T*>(data_.get());
   }
 
   /**
@@ -573,12 +569,11 @@ class Tensor {
    */
   inline void* raw_mutable_data(const TypeMeta& meta) {
     // For 0-size tensors it's fine to return any pointer (including nullptr)
-    if (storage_->dtype() == meta && (storage_->data_ptr() || size_ == 0)) {
-      return storage_->data_ptr();
+    if (meta_ == meta && (data_.get() || size_ == 0)) {
+      return data_.get();
     } else {
-      bool had_special_dtor = storage_->dtype().dtor() != nullptr;
-      // TODO: we should create a new Storage here.
-      storage_->data_type_ = meta;
+      bool had_special_dtor = meta_.dtor() != nullptr;
+      meta_ = meta;
       CAFFE_ENFORCE_WITH_CALLER(
           size_ >= 0,
           "Tensor is not initialized. You probably need to call Resize() "
@@ -589,33 +584,32 @@ class Tensor {
       // constructor.
       if (size_ == 0 ||
           (meta.ctor() == nullptr && !had_special_dtor &&
-           storage_->capacity() >= size_ * storage_->itemsize())) {
-        return storage_->data_ptr();
+           capacity_ >= size_ * meta_.itemsize())) {
+        return data_.get();
       }
       if (meta.ctor()) {
         // For types that need placement new, we will call it, as well as
         // making sure that when the data is freed, it calls the right
         // destruction procedure.
         auto size = size_;
-        auto dtor = storage_->dtype().dtor();
+        auto dtor = meta_.dtor();
         auto ptr_and_deleter =
-            GetStaticContext()->New(size_ * storage_->itemsize());
+            GetStaticContext()->New(size_ * meta_.itemsize());
         auto deleter = ptr_and_deleter.second;
-        storage_->data_ptr_.reset(
+        data_.reset(
             ptr_and_deleter.first, [size, dtor, deleter](void* ptr) -> void {
               dtor(ptr, size);
               deleter(ptr);
             });
-        storage_->dtype().ctor()(storage_->data_ptr(), size_);
+        meta_.ctor()(data_.get(), size_);
       } else {
         // For fundamental type, new and delete is easier.
         auto ptr_and_deleter =
-            GetStaticContext()->New(size_ * storage_->itemsize());
-        storage_->data_ptr_.reset(
-            ptr_and_deleter.first, ptr_and_deleter.second);
+            GetStaticContext()->New(size_ * meta_.itemsize());
+        data_.reset(ptr_and_deleter.first, ptr_and_deleter.second);
       }
-      storage_->capacity_ = size_ * storage_->itemsize();
-      return storage_->data_ptr();
+      capacity_ = size_ * meta_.itemsize();
+      return data_.get();
     }
   }
 
@@ -630,10 +624,10 @@ class Tensor {
    */
   inline void* raw_mutable_data() {
     CAFFE_ENFORCE_WITH_CALLER(
-        storage_->dtype().id() != TypeIdentifier::uninitialized(),
+        meta_.id() != TypeIdentifier::uninitialized(),
         "Calling raw_mutable_data() without meta, but the current meta is "
         "of unknown type.");
-    return raw_mutable_data(storage_->dtype());
+    return raw_mutable_data(meta_);
   }
 
   /**
@@ -642,60 +636,41 @@ class Tensor {
    * For fundamental types, we reuse possible existing storage if there
    * is sufficient capacity.
    */
-  template <typename T>
-  inline T* mutable_data() {
-    if ((size_ == 0 || storage_->data_ptr()) && IsType<T>()) {
-      return static_cast<T*>(storage_->data_ptr());
+   template <typename T>
+    inline T* mutable_data() {
+      if ((size_ == 0 || data_.get()) && IsType<T>()) {
+        return static_cast<T*>(data_.get());
+      }
+      // Check it here statically - otherwise TypeMeta would throw the runtime
+      // error in attempt to invoke TypeMeta::ctor()
+      static_assert(
+          std::is_default_constructible<T>::value,
+          "Tensor can't hold non-default-constructible types");
+      return static_cast<T*>(raw_mutable_data(TypeMeta::Make<T>()));
     }
-    // Check it here statically - otherwise TypeMeta would throw the runtime
-    // error in attempt to invoke TypeMeta::ctor()
-    static_assert(
-        std::is_default_constructible<T>::value,
-        "Tensor can't hold non-default-constructible types");
-    return static_cast<T*>(raw_mutable_data(TypeMeta::Make<T>()));
-  }
 
-  /**
-   * Returns the underlying Stoarge for the Tensor
-   */
-  inline Storage storage() {
-    return storage_;
-  }
-
-  inline Storage storage() const {
-    return storage_;
-  }
 
   /**
    * Returns the number of dimensions of the data.
    */
-  inline int ndim() const {
-    return dims_.size();
-  }
+  inline int ndim() const { return dims_.size(); }
   /**
    * Returns the size (i.e. the number of items) of the tensor.
    */
-  inline TIndex size() const {
-    return size_;
-  }
+  inline TIndex size() const { return size_; }
   /**
    * Return the number of bytes each item takes in the tensor.
    */
-  inline size_t itemsize() const {
-    return storage_->itemsize();
-  }
+  inline size_t itemsize() const { return meta_.itemsize(); }
   /**
    * Returns the total number of bytes of the storage.
    *
    * This is equivalent to calling size() * itemsize().
    */
-  inline size_t nbytes() const {
-    return size_ * itemsize();
-    ;
-  }
+  inline size_t nbytes() const { return size_ * meta_.itemsize(); }
 
   inline size_t capacity_nbytes() const {
-    return storage_->capacity();
+    return capacity_;
   }
   /**
    * Returns the dimensions of the tensor as a vector.
@@ -733,15 +708,11 @@ class Tensor {
    * Checks if the tensor content is of the given data type.
    */
   template <typename T>
-  inline bool IsType() const {
-    return storage_->IsType<T>();
-  }
+  inline bool IsType() const { return meta_.Match<T>(); }
   /**
    * Returns the TypeMeta object associated with the current data type.
    */
-  inline const TypeMeta& meta() const {
-    return storage_->dtype();
-  }
+  inline const TypeMeta& meta() const { return meta_; }
 
   /**
    * Returns the i-th dimension of the tensor in int.
@@ -796,16 +767,18 @@ class Tensor {
   }
 
  protected:
-  using DimVector = std::vector<TIndex>;
-  DimVector dims_; // sizes_
-  TIndex size_ = -1; // numel_
-  // we decide to keep reserved_ and it will
+  vector<TIndex> dims_;
+  TIndex size_ = -1;
+  TypeMeta meta_;
+  std::shared_ptr<void> data_;
+  size_t capacity_ = 0;
+  // we decide to keep reserved and it will
   // live in Tensor after the split
   // The logic is that if Extend() or ReserveSpace() were ever called,
   // then subsequent Resize()s will not free up Storage.
   bool reserved_ = false;
-  Storage storage_;
-  // int64_t storage_offset_;
+  DeviceType device_type_ = CPU;
+  // In case of chunk load we store how much data was already loaded
 
  private:
   template <