diff --git a/cmake/winml.cmake b/cmake/winml.cmake
index 9d71857473..82f7541f6d 100644
--- a/cmake/winml.cmake
+++ b/cmake/winml.cmake
@@ -400,15 +400,18 @@ endif(onnxruntime_USE_DML)
 # Add static library that will be archived/linked for both static/dynamic library
 add_library(winml_lib_api STATIC
   ${winml_lib_api_dir}/impl/FeatureCompatibility.h
+  ${winml_lib_api_dir}/impl/IData.h
   ${winml_lib_api_dir}/impl/IMapFeatureValue.h
   ${winml_lib_api_dir}/impl/ISequenceFeatureValue.h
   ${winml_lib_api_dir}/impl/MapBase.h
+  ${winml_lib_api_dir}/impl/NumericData.h
   ${winml_lib_api_dir}/impl/SequenceBase.h
+  ${winml_lib_api_dir}/impl/StringData.h
   ${winml_lib_api_dir}/impl/Tensor.h
   ${winml_lib_api_dir}/impl/TensorBase.h
-  ${winml_lib_api_dir}/impl/TensorBuffer.h
   ${winml_lib_api_dir}/impl/TensorKindFrom.h
   ${winml_lib_api_dir}/impl/TensorMemoryBufferReference.h
+  ${winml_lib_api_dir}/NumericData.cpp
   ${winml_lib_api_dir}/ImageFeatureDescriptor.cpp
   ${winml_lib_api_dir}/ImageFeatureDescriptor.h
   ${winml_lib_api_dir}/ImageFeatureValue.cpp
@@ -429,8 +432,11 @@ add_library(winml_lib_api STATIC
   ${winml_lib_api_dir}/MapFeatureDescriptor.h
   ${winml_lib_api_dir}/SequenceFeatureDescriptor.cpp
   ${winml_lib_api_dir}/SequenceFeatureDescriptor.h
+  ${winml_lib_api_dir}/StringData.cpp
   ${winml_lib_api_dir}/TensorFeatureDescriptor.cpp
   ${winml_lib_api_dir}/TensorFeatureDescriptor.h
+  ${winml_lib_api_dir}/VectorBackedBuffer.h
+  ${winml_lib_api_dir}/VectorBackedBuffer.cpp
   ${winml_lib_api_dir}/pch/pch.h
 )
 
diff --git a/winml/lib/Api.Image/DisjointBufferHelpers.cpp b/winml/lib/Api.Image/DisjointBufferHelpers.cpp
index 9ce31b0d1c..1a94bd2c16 100644
--- a/winml/lib/Api.Image/DisjointBufferHelpers.cpp
+++ b/winml/lib/Api.Image/DisjointBufferHelpers.cpp
@@ -3,7 +3,7 @@
 
 namespace _winml {
 
-void LoadOrStoreDisjointBuffers(
+static void LoadOrStoreDisjointBuffers(
     bool should_load_buffer,
     size_t num_buffers,
     std::function<gsl::span<byte>(size_t)> get_buffer,
@@ -31,4 +31,18 @@ void LoadOrStoreDisjointBuffers(
   }
 }
 
+void LoadSpanFromDisjointBuffers(
+    size_t num_buffers,
+    std::function<gsl::span<byte>(size_t)> get_buffer,
+    gsl::span<byte>& buffer_span) {
+  LoadOrStoreDisjointBuffers(true /*load into the span*/, num_buffers, get_buffer, buffer_span);
+}
+
+void StoreSpanIntoDisjointBuffers(
+    size_t num_buffers,
+    std::function<gsl::span<byte>(size_t)> get_buffer,
+    gsl::span<byte>& buffer_span) {
+  LoadOrStoreDisjointBuffers(false /*store into buffers*/, num_buffers, get_buffer, buffer_span);
+}
+
 } // namespace _winml
diff --git a/winml/lib/Api.Image/TensorToVideoFrameConverter.cpp b/winml/lib/Api.Image/TensorToVideoFrameConverter.cpp
index b2d3da143b..1f64fa73c9 100644
--- a/winml/lib/Api.Image/TensorToVideoFrameConverter.cpp
+++ b/winml/lib/Api.Image/TensorToVideoFrameConverter.cpp
@@ -630,8 +630,7 @@ void TensorToVideoFrameConverter::ConvertBatchedDX12TensorToBuffers(
   byte* readback_buffer = nullptr;
   WINML_THROW_IF_FAILED(readback_heap_->Map(0, &CD3DX12_RANGE(0, buffer_size_in_bytes), reinterpret_cast<void**>(&readback_buffer)));
   auto readback_buffer_span = gsl::span<byte>(readback_buffer, buffer_size_in_bytes);
-  _winml::LoadOrStoreDisjointBuffers(
-      false /*load disjoint buffers into*/,
+  _winml::StoreSpanIntoDisjointBuffers(
       buffers.size(),
       [&](size_t i) {
         byte* buffer_start = nullptr;
diff --git a/winml/lib/Api.Image/VideoFrameToTensorConverter.cpp b/winml/lib/Api.Image/VideoFrameToTensorConverter.cpp
index 8a70a28b04..20a0ad93f1 100644
--- a/winml/lib/Api.Image/VideoFrameToTensorConverter.cpp
+++ b/winml/lib/Api.Image/VideoFrameToTensorConverter.cpp
@@ -559,8 +559,7 @@ void VideoFrameToTensorConverter::ConvertBuffersToBatchedGPUTensor(
   WINML_THROW_IF_FAILED(upload_heap_->Map(0, &CD3DX12_RANGE(0, 0), reinterpret_cast<void**>(&gpu_buffer)));
   auto gpu_buffer_span = gsl::span<byte>(gpu_buffer, buffer_size_in_bytes);
 
-  _winml::LoadOrStoreDisjointBuffers(
-      true /*load disjoint buffers into*/,
+  _winml::LoadSpanFromDisjointBuffers(
       buffers.size(),
       [&](size_t i) {
         byte* buffer_start = nullptr;
diff --git a/winml/lib/Api.Image/inc/DisjointBufferHelpers.h b/winml/lib/Api.Image/inc/DisjointBufferHelpers.h
index eb4ba6a893..9e6c354e43 100644
--- a/winml/lib/Api.Image/inc/DisjointBufferHelpers.h
+++ b/winml/lib/Api.Image/inc/DisjointBufferHelpers.h
@@ -7,8 +7,12 @@
 
 namespace _winml {
 
-void LoadOrStoreDisjointBuffers(
-    bool should_load_buffer,
+void LoadSpanFromDisjointBuffers(
+    size_t num_buffers,
+    std::function<gsl::span<byte>(size_t)> get_buffer,
+    gsl::span<byte>& buffer_span);
+
+void StoreSpanIntoDisjointBuffers(
     size_t num_buffers,
     std::function<gsl::span<byte>(size_t)> get_buffer,
     gsl::span<byte>& buffer_span);
diff --git a/winml/lib/Api/NumericData.cpp b/winml/lib/Api/NumericData.cpp
new file mode 100644
index 0000000000..8c09eebb2e
--- /dev/null
+++ b/winml/lib/Api/NumericData.cpp
@@ -0,0 +1,129 @@
+﻿// Copyright (c) Microsoft Corporation. All rights reserved.
+// Licensed under the MIT License.
+#include "pch.h"
+
+#include "impl/NumericData.h"
+#include "VectorBackedBuffer.h"
+#include "robuffer.h"
+#include "winrt/Windows.Storage.Streams.h"
+#include "DisjointBufferHelpers.h"
+
+namespace _winml {
+
+std::shared_ptr<_winml::idata> numeric_data::create(
+  size_t num_elements,
+  size_t element_size_in_bytes,
+  wfc::IIterable<wss::IBuffer> const& buffers) {
+  return std::make_shared<numeric_data>(num_elements, element_size_in_bytes, buffers);
+}
+
+numeric_data::numeric_data(
+  size_t num_elements, size_t element_size_in_bytes, wfc::IIterable<wss::IBuffer> const& buffers) :
+  num_elements_(num_elements),
+  element_size_in_bytes_(element_size_in_bytes),
+  combined_buffer_(nullptr),
+  buffers_() {
+  if (buffers != nullptr) {
+    buffers_ = { begin(buffers), end(buffers) };
+  }
+  
+  if (buffers_.size() == 0) {
+    combined_buffer_ = winrt::make<vector_backed_buffer>(num_elements * element_size_in_bytes);
+    buffers_ = { combined_buffer_ };
+    auto buffer = buffer_at(0);
+  
+    // The initial release of WinML (RS5) shipped with behavior that would
+    // zero-initialize uninitialized tensors. After measuring, the performance impact
+    // of memsetting the memory buffer is quite small (<1ms for 3channel 720x720 TensorFloats).
+    // To maintain parity with RS5 behavior, we always zero out the memory buffer.
+    memset(buffer.data(), 0, buffer.size_bytes());
+  }
+  else if (buffers_.size() == 1) {
+    combined_buffer_ = buffers_[0];
+  }
+  else {
+    // If there are many buffers, then the combined buffer will be a separately allocated value that combines all of the buffers.
+    // This needs to be lazily done however, as the extra memory should not be allocated when not needed (GPU).
+  }
+}
+
+size_t numeric_data::num_elements() {
+  return num_elements_;
+}
+
+size_t numeric_data::size_in_bytes() {
+  return num_elements_ * element_size_in_bytes_;
+}
+
+size_t numeric_data::num_buffers() {
+  return buffers_.size();
+}
+
+std::vector<wss::IBuffer>& numeric_data::buffers() {
+  return buffers_;
+}
+
+gsl::span<byte> numeric_data::buffer(bool should_sync_buffer) {
+  if (buffers_.size() == 1) {
+    // Single buffer optimization to not create a temporary buffer that concatenates disjoint buffers into one.
+    return buffer_at(0);
+  }
+  auto span = combined_buffer();
+  if (should_sync_buffer) {
+    _winml::LoadSpanFromDisjointBuffers(
+      buffers_.size(),
+      [this](size_t i) { return buffer_at(i); },
+      span);
+  }
+
+  return span;
+}
+
+bool numeric_data::flush() {
+  auto should_flush = buffers_.size() != 1;
+  if (should_flush) {
+    auto span = combined_buffer();
+    _winml::StoreSpanIntoDisjointBuffers(
+        buffers_.size(),
+        [this](size_t i) { return buffer_at(i); },
+        span);
+  }
+  return should_flush;
+}
+
+void numeric_data::set(size_t data_size, const byte* data) {
+  WINML_THROW_HR_IF_FALSE_MSG(
+      E_INVALIDARG,
+      data_size <= (num_elements_ * element_size_in_bytes_),
+      "Argument size (%llu) exceeds the tensor size (%llu).",
+      static_cast<uint64_t>(data_size),
+      static_cast<uint64_t>(num_elements_ * element_size_in_bytes_));
+  
+  gsl::span<byte> span(const_cast<byte*>(data), data_size);
+  _winml::StoreSpanIntoDisjointBuffers(
+    buffers_.size(),
+    [this](size_t i) { return buffer_at(i); },
+    span);
+}
+
+static gsl::span<byte> get_span_from_ibuffer(wss::IBuffer buffer) {
+  byte* current_data = nullptr;
+  auto bufferByteAccess = buffer.as<Windows::Storage::Streams::IBufferByteAccess>();
+  bufferByteAccess->Buffer(&current_data);
+  return gsl::span<byte>(
+      current_data,
+      static_cast<size_t>(buffer.Capacity()));
+}
+
+gsl::span<byte> numeric_data::buffer_at(size_t index) {
+  return get_span_from_ibuffer(buffers_[index]);
+}
+
+gsl::span<byte> numeric_data::combined_buffer() {
+  if (combined_buffer_ == nullptr) {
+    combined_buffer_ = winrt::make<vector_backed_buffer>(num_elements_ * element_size_in_bytes_);
+  }
+  return get_span_from_ibuffer(combined_buffer_);
+}
+
+}  // namespace _winml
\ No newline at end of file
diff --git a/winml/lib/Api/StringData.cpp b/winml/lib/Api/StringData.cpp
new file mode 100644
index 0000000000..09bc423fdc
--- /dev/null
+++ b/winml/lib/Api/StringData.cpp
@@ -0,0 +1,62 @@
+﻿// Copyright (c) Microsoft Corporation. All rights reserved.
+// Licensed under the MIT License.
+
+#include "pch.h"
+
+#include "impl/StringData.h"
+
+namespace _winml {
+
+string_data::string_data(size_t size) :
+    buffer_(size) {}
+
+std::shared_ptr<_winml::idata> string_data::create(size_t size) {
+  return std::make_shared<string_data>(size);
+}
+
+size_t string_data::num_elements() {
+  return buffer_.size();
+}
+
+size_t string_data::size_in_bytes() {
+  WINML_THROW_HR(E_UNEXPECTED);
+}
+
+size_t string_data::num_buffers() {
+  return 1;
+}
+
+bool string_data::flush() {
+  // Vacuously true
+  return true;
+}
+
+std::vector<wss::IBuffer>& string_data::buffers() {
+  WINML_THROW_HR(E_UNEXPECTED);
+}
+
+gsl::span<byte> string_data::buffer(bool /*should_sync_buffer*/) {
+  return gsl::span<byte>(reinterpret_cast<byte*>(buffer_.data()), buffer_.size());
+}
+
+void string_data::set(size_t num_elements, const std::string_view* data) {
+  WINML_THROW_HR_IF_FALSE_MSG(
+      E_INVALIDARG,
+      num_elements <= buffer_.size(),
+      "Argument size (%d) exceeds the tensor size (%d).",
+      static_cast<int>(num_elements),
+      static_cast<int>(buffer_.size()));
+
+  // Copy
+  std::copy(data, data + num_elements, buffer_.begin());
+}
+
+void string_data::set(size_t /*data_size*/, const byte* /*data*/) {
+  WINML_THROW_HR(E_UNEXPECTED);
+}
+
+std::vector<std::string>& string_data::get_backing_vector() {
+  return buffer_;
+}
+
+}  // namespace _winml
\ No newline at end of file
diff --git a/winml/lib/Api/VectorBackedBuffer.cpp b/winml/lib/Api/VectorBackedBuffer.cpp
new file mode 100644
index 0000000000..d12f664956
--- /dev/null
+++ b/winml/lib/Api/VectorBackedBuffer.cpp
@@ -0,0 +1,29 @@
+﻿// Copyright (c) Microsoft Corporation. All rights reserved.
+// Licensed under the MIT License.
+#include "pch.h"
+
+#include "VectorBackedBuffer.h"
+
+namespace _winml {
+
+vector_backed_buffer::vector_backed_buffer(size_t size) : buffer_(size) {}
+
+uint32_t vector_backed_buffer::Capacity() const {
+  return static_cast<uint32_t>(buffer_.size());
+}
+
+uint32_t vector_backed_buffer::Length() const {
+  throw winrt::hresult_error(E_NOTIMPL);
+}
+
+void vector_backed_buffer::Length(uint32_t /*value*/) {
+  throw winrt::hresult_error(E_NOTIMPL);
+}
+
+STDMETHODIMP vector_backed_buffer::Buffer(uint8_t** value) {
+  RETURN_HR_IF_NULL(E_POINTER, value);
+  *value = buffer_.data();
+  return S_OK;
+}
+
+}  // namespace _winml
\ No newline at end of file
diff --git a/winml/lib/Api/VectorBackedBuffer.h b/winml/lib/Api/VectorBackedBuffer.h
new file mode 100644
index 0000000000..a12b26ffd7
--- /dev/null
+++ b/winml/lib/Api/VectorBackedBuffer.h
@@ -0,0 +1,28 @@
+﻿// Copyright (c) Microsoft Corporation. All rights reserved.
+// Licensed under the MIT License.
+
+#pragma once
+
+#include "robuffer.h"
+#include "winrt/Windows.Storage.Streams.h"
+
+namespace _winml {
+
+class vector_backed_buffer : public winrt::implements<
+                         vector_backed_buffer,
+                         wss::IBuffer,
+                         Windows::Storage::Streams::IBufferByteAccess> {
+ public:
+  vector_backed_buffer(size_t size);
+
+  uint32_t Capacity() const;
+  uint32_t Length() const;
+  void Length(uint32_t /*value*/);
+
+  STDMETHOD(Buffer)(uint8_t** value);
+
+ private:
+  std::vector<BYTE> buffer_;
+};
+
+}  // namespace _winml
\ No newline at end of file
diff --git a/winml/lib/Api/impl/IData.h b/winml/lib/Api/impl/IData.h
new file mode 100644
index 0000000000..d649f4d5e1
--- /dev/null
+++ b/winml/lib/Api/impl/IData.h
@@ -0,0 +1,25 @@
+// Copyright (c) Microsoft Corporation. All rights reserved.
+// Licensed under the MIT License.
+
+#pragma once
+
+#include "IEngine.h"
+
+// ILotusValueProviderPrivate exposes a private Lotus interface to the engine so that it can retrieve tensor
+// resources stored in winrt structures.
+
+namespace _winml {
+
+struct idata {
+  virtual ~idata(){}
+  
+  virtual size_t num_elements() = 0;
+  virtual size_t size_in_bytes() = 0;
+  virtual size_t num_buffers() = 0;
+  virtual std::vector<wss::IBuffer>& buffers() = 0;
+  virtual gsl::span<byte> buffer(bool should_sync_buffer) = 0;
+  virtual bool flush() = 0;
+  virtual void set(size_t data_size, const byte* data) = 0;
+};
+
+}  // namespace _winml
\ No newline at end of file
diff --git a/winml/lib/Api/impl/NumericData.h b/winml/lib/Api/impl/NumericData.h
new file mode 100644
index 0000000000..1921c2f422
--- /dev/null
+++ b/winml/lib/Api/impl/NumericData.h
@@ -0,0 +1,46 @@
+﻿// Copyright (c) Microsoft Corporation. All rights reserved.
+// Licensed under the MIT License.
+
+#pragma once
+
+#include "IData.h"
+#include "robuffer.h"
+#include "winrt/Windows.Storage.Streams.h"
+
+namespace _winml {
+
+class numeric_data : public _winml::idata {
+ public:
+  static std::shared_ptr<_winml::idata> create(
+    size_t num_elements,
+    size_t element_size_in_bytes,
+    wfc::IIterable<wss::IBuffer> const& buffers);
+
+  // Privte constructor as this type should be created as a shared_ptr
+  numeric_data(size_t num_elements, size_t element_size_in_bytes, wfc::IIterable<wss::IBuffer> const& buffers);
+  gsl::span<byte> buffer_at(size_t index);
+  gsl::span<byte> combined_buffer();
+
+ public:
+  size_t num_elements() override;
+  size_t size_in_bytes() override;
+  size_t num_buffers() override;
+
+  // Buffer accessors
+  std::vector<wss::IBuffer>& buffers() override;
+  gsl::span<byte> buffer(bool should_sync_buffer) override;
+
+  // Flush to buffers API
+  bool flush() override;
+
+  // Set APIs
+  void set(size_t data_size, const byte* data) override;
+
+ private:
+  wss::IBuffer combined_buffer_;
+  std::vector<wss::IBuffer> buffers_;
+  size_t num_elements_;
+  size_t element_size_in_bytes_;
+};
+
+}  // namespace _winml
\ No newline at end of file
diff --git a/winml/lib/Api/impl/StringData.h b/winml/lib/Api/impl/StringData.h
new file mode 100644
index 0000000000..17c83d8be1
--- /dev/null
+++ b/winml/lib/Api/impl/StringData.h
@@ -0,0 +1,40 @@
+﻿// Copyright (c) Microsoft Corporation. All rights reserved.
+// Licensed under the MIT License.
+
+#pragma once
+
+#include "IData.h"
+#include "robuffer.h"
+#include "winrt/Windows.Storage.Streams.h"
+
+namespace _winml {
+
+class string_data : public _winml::idata {
+ public:
+  static std::shared_ptr<_winml::idata> create(size_t size);
+
+  string_data(size_t size);
+
+  size_t num_elements() override;
+  size_t size_in_bytes() override;
+  size_t num_buffers() override;
+
+  // Buffer accessors
+  std::vector<wss::IBuffer>& buffers() override;
+  gsl::span<byte> buffer(bool should_sync_buffer) override;
+
+  // Flush to buffers API
+  bool flush() override;
+
+  // Set APIs
+  void set(size_t data_size, const byte* data) override;
+
+ public:
+  void set(size_t num_elements, const std::string_view* data);
+  std::vector<std::string>& get_backing_vector();
+
+ private:
+  std::vector<std::string> buffer_;
+};
+
+}  // namespace _winml
\ No newline at end of file
diff --git a/winml/lib/Api/impl/Tensor.h b/winml/lib/Api/impl/Tensor.h
index a82a94a30d..a1bb950b35 100644
--- a/winml/lib/Api/impl/Tensor.h
+++ b/winml/lib/Api/impl/Tensor.h
@@ -3,7 +3,8 @@
 
 #pragma once
 
-#include "TensorBuffer.h"
+#include "NumericData.h"
+#include "StringData.h"
 
 //
 // the Tensor class is the actual object for CPU memory buffers.
@@ -12,81 +13,83 @@
 //
 namespace _winml {
 
+inline size_t compute_size_of_shape(const std::vector<int64_t>& shape) {
+  auto size_of_shape =
+    static_cast<size_t>(
+      std::accumulate(
+        std::begin(shape),
+        std::end(shape),
+        static_cast<int64_t>(1),
+        std::multiplies<int64_t>()));
+  return size_of_shape;
+}
+
+template <typename T>
+inline auto create_data(
+  const std::vector<int64_t>& shape,
+  const wfc::IIterable<wss::IBuffer>& buffers) {
+  return _winml::numeric_data::create(compute_size_of_shape(shape), sizeof(T), buffers);
+}
+
+template <>
+inline auto create_data<std::string>(
+  const std::vector<int64_t>& shape,
+  const wfc::IIterable<wss::IBuffer>& /*buffers*/) {
+  return _winml::string_data::create(compute_size_of_shape(shape));
+}
+
 template <typename T>
 class Tensor {
  private:
-  std::shared_ptr<TensorBuffer<T>> buffer_;
+  std::shared_ptr<_winml::idata> data_;
   std::vector<int64_t> shape_;
 
- public:
+ private:
   Tensor() = delete;
 
-  Tensor(
-      std::vector<int64_t> const& shape,
-      wfc::IIterable<wss::IBuffer> const& buffers) :
-                            shape_(shape),
-                            buffer_(TensorBuffer<T>::Create(
-                                        static_cast<size_t>(std::accumulate(
-                                             std::begin(shape), std::end(shape),
-                                             static_cast<int64_t>(1), std::multiplies<int64_t>())),
-                                        buffers)) {}
+ public:
+  Tensor(const std::vector<int64_t>& shape) :
+    shape_(shape),
+    data_(create_data<T>(shape, nullptr)) {}
 
   Tensor(
-      std::vector<int64_t> const& shape) : shape_(shape),
-                                           buffer_(TensorBuffer<T>::Create(
-                                                        static_cast<size_t>(std::accumulate(
-                                                            std::begin(shape), std::end(shape),
-                                                            static_cast<int64_t>(1),
-                                                            std::multiplies<int64_t>())))) {}
-
-  Tensor(
-      std::vector<int64_t> const&& shape) : shape_(std::move(shape)),
-                                            buffer_(TensorBuffer<T>::Create(
-                                                        static_cast<size_t>(std::accumulate(
-                                                            std::begin(shape), std::end(shape),
-                                                            static_cast<int64_t>(1),
-                                                            std::multiplies<int64_t>())))) {
-  }
-
-  auto number_of_elements() const {
-    return buffer_->NumElements();
-  }
+    const std::vector<int64_t>& shape,
+    const wfc::IIterable<wss::IBuffer>& buffers) :
+      shape_(shape),
+      data_(create_data<T>(shape, buffers)) {}
 
   auto size_in_bytes() const {
-    return buffer_->SizeInBytes();
+    return data_->size_in_bytes();
   }
 
   auto num_buffers() {
-    return buffer_->NumBuffers();
+    return data_->num_buffers();
   }
 
   auto& buffers() {
-    return buffer_->Buffers();
+    return data_->buffers();
   }
 
-  auto buffer(bool should_sync_buffer = true) {
-    auto span = buffer_->Buffer(should_sync_buffer);
-    return gsl::span<T>(reinterpret_cast<T*>(span.data()), buffer_->NumElements());
+  gsl::span<T> buffer(bool should_sync_buffer = true) {
+    auto span = data_->buffer(should_sync_buffer);
+    return gsl::span<T>(reinterpret_cast<T*>(span.data()), data_->num_elements());
   }
 
   auto flush() {
-    return buffer_->Flush();
+    return data_->flush();
   }
 
-  void set(size_t size, const T* pData) {
-    buffer_->Set(size * sizeof(T), pData);
-  }
-
-  void set(std::vector<T>&& other) {
-    buffer_->Set(other);
+  void set(size_t size, const T* data) {
+    auto size_in_bytes = size * sizeof(T);
+    data_->set(size_in_bytes, reinterpret_cast<const byte*>(data));
   }
 
   const std::vector<int64_t>& shape() const {
     return shape_;
   }
 
-  auto get_tensor_buffer() {
-    return buffer_;
+  auto get_data() {
+    return data_;
   }
 };
 }  // namespace _winml
\ No newline at end of file
diff --git a/winml/lib/Api/impl/TensorBase.h b/winml/lib/Api/impl/TensorBase.h
index 59aab905a9..181f3f68d4 100644
--- a/winml/lib/Api/impl/TensorBase.h
+++ b/winml/lib/Api/impl/TensorBase.h
@@ -74,28 +74,28 @@ struct TensorBase : TBase {
   ///    b) TensorBase(winrt::Windows::Foundation::Collections::IIterable<int64_t> const& shape)
   ///  3) use provided backing gpu memory
   ///    a) TensorBase(std::vector<int64_t> const& shape, ID3D12Resource* pResource)
-  TensorBase() : m_resources(std::make_shared<TensorResources<T>>()) {
+  TensorBase() : resources_(std::make_shared<TensorResources<T>>()) {
   }
 
   TensorBase(wfc::IIterable<int64_t> const& shape) : shape_(begin(shape), end(shape)),
-                                                     m_resources(std::make_shared<TensorResources<T>>()) {
-    GetCpuResource() = std::make_shared<_winml::Tensor<T>>(shape_);
+                                                     resources_(std::make_shared<TensorResources<T>>()) {
+    CpuTensor() = std::make_shared<_winml::Tensor<T>>(shape_);
   }
 
   TensorBase(std::vector<int64_t> const& shape) : shape_(shape),
-                                                  m_resources(std::make_shared<TensorResources<T>>()) {
-    GetCpuResource() = std::make_shared<_winml::Tensor<T>>(shape_);
+                                                  resources_(std::make_shared<TensorResources<T>>()) {
+    CpuTensor() = std::make_shared<_winml::Tensor<T>>(shape_);
   }
 
   TensorBase(std::vector<int64_t> const& shape, ID3D12Resource* resource) : shape_(shape),
-                                                                            m_resources(std::make_shared<TensorResources<T>>()) {
+                                                                            resources_(std::make_shared<TensorResources<T>>()) {
     // This Api is not supported for TensorString
     WINML_THROW_HR_IF_TRUE_MSG(
         E_ILLEGAL_METHOD_CALL,
         (std::is_same<T, std::string>::value),
         "TensorString objects cannot be created from a ID3D12Resource!");
 
-    GetGpuResource().copy_from(resource);
+    GpuTensor().copy_from(resource);
   }
 
   HRESULT CreateGPUMLValue(ID3D12Resource* resource, BindingContext& context, IValue** out) {
@@ -117,21 +117,21 @@ struct TensorBase : TBase {
     auto engine = session->GetEngine();
     auto should_sync_buffer = context.type == _winml::BindingType::kInput;
 
-    if (GetCpuResource() != nullptr) {
+    if (CpuTensor() != nullptr) {
       return CreateTensorValueFromExternalBuffer(engine, should_sync_buffer, out);
     }
 
     // If there is no matching cpu resource, then fallback to a gpu resource
-    if (GetGpuResource() != nullptr) {
-      return CreateGPUMLValue(GetGpuResource().get(), context, out);
+    if (GpuTensor() != nullptr) {
+      return CreateGPUMLValue(GpuTensor().get(), context, out);
     }
 
     WINML_THROW_HR(WINML_ERR_INVALID_BINDING);
   }
 
   HRESULT GPUTensorize(_winml::BindingContext& context, IValue** out) {
-    if (GetGpuResource() != nullptr) {
-      return CreateGPUMLValue(GetGpuResource().get(), context, out);
+    if (GpuTensor() != nullptr) {
+      return CreateGPUMLValue(GpuTensor().get(), context, out);
     }
 
     // Get engine
@@ -142,8 +142,8 @@ struct TensorBase : TBase {
     auto should_sync_buffer = context.type == _winml::BindingType::kInput;
 
     // If there is no matching gpu resource, then fallback to a cpu resource
-    if (GetCpuResource() != nullptr) {
-      auto num_backing_buffers = GetCpuResource()->num_buffers(); 
+    if (CpuTensor() != nullptr) {
+      auto num_backing_buffers = CpuTensor()->num_buffers(); 
       if (num_backing_buffers == 1) {
         // If we have a single backing cpu buffer, there is no need to create GPU resources.
         // The engine will use the buffer provided, and perform the needed copies into the GPU context as needed.
@@ -154,24 +154,24 @@ struct TensorBase : TBase {
           // If we are binding inputs, then a GPU resource needs to be allocated, and individual buffer contents need
           // to be copied directly into a gpu resource.
 
-          if (GetGpuResource() == nullptr) {
-            GetGpuResource() = CreateD3D12Resource(session);
+          if (GpuTensor() == nullptr) {
+            GpuTensor() = CreateD3D12Resource(session);
           }
 
           _winml::ConverterResourceDescription descriptor = {};
           descriptor.pixel_format = static_cast<DWORD>(wgdx::DirectXPixelFormat::Unknown);
-          descriptor.width = static_cast<int>(GetCpuResource()->size_in_bytes());
+          descriptor.width = static_cast<int>(CpuTensor()->size_in_bytes());
           descriptor.height = static_cast<int>(1);
           descriptor.luid = device->GetD3DDevice()->GetAdapterLuid();  // Converted image on GPU
 
           context.converter = _winml::PoolObjectWrapper::Create(device->TensorizerStore()->Fetch(descriptor));
           context.converter->Get()->Tensorizer->ConvertBuffersToBatchedGPUTensor(
-            GetCpuResource()->buffers(),
-            GetCpuResource()->size_in_bytes(),
+            CpuTensor()->buffers(),
+            CpuTensor()->size_in_bytes(),
             *device->GetD3DDeviceCache(),
-            GetGpuResource().get());
+            GpuTensor().get());
 
-          return CreateGPUMLValue(GetGpuResource().get(), context, out);
+          return CreateGPUMLValue(GpuTensor().get(), context, out);
 
         } else if (context.type == _winml::BindingType::kOutput) {
           // If we are binding outputs, then the buffers do not need to bound. If the engine produces a output on the gpu
@@ -179,8 +179,8 @@ struct TensorBase : TBase {
           // into the output buffers without temporary intermediary buffers! No binding here is necessary.
           // If the output produces a cpu buffer (even in the GPU case), we will already have a cpu buffer, and just need
           // to copy back to the output buffers, no binding is necessary.
-          GetGpuResource() = CreateD3D12Resource(session);
-          return CreateGPUMLValue(GetGpuResource().get(), context, out);
+          GpuTensor() = CreateD3D12Resource(session);
+          return CreateGPUMLValue(GpuTensor().get(), context, out);
         }
       }
     }
@@ -188,11 +188,11 @@ struct TensorBase : TBase {
     if (TensorKind() == winml::TensorKind::String) {
       // Lazily allocate the cpu TensorString resource
       // TensorStrings are CPU only, and so a gpu resource cannot be allocated for them.
-      GetCpuResource() = std::make_shared<_winml::Tensor<T>>(shape_);
+      CpuTensor() = std::make_shared<_winml::Tensor<T>>(shape_);
       return CreateTensorValueFromExternalBuffer(engine, should_sync_buffer, out);
     } else {
-      GetGpuResource() = CreateD3D12Resource(session);
-      return CreateGPUMLValue(GetGpuResource().get(), context, out);
+      GpuTensor() = CreateD3D12Resource(session);
+      return CreateGPUMLValue(GpuTensor().get(), context, out);
     }
   }
 
@@ -242,8 +242,8 @@ struct TensorBase : TBase {
   void EnsureBufferNotInUse() {
     auto isBufferInUse =
         std::any_of(
-            m_outstandingReferences.begin(),
-            m_outstandingReferences.end(),
+            outstanding_references_.begin(),
+            outstanding_references_.end(),
             [](auto weakRef) { return weakRef.get() != nullptr; });
 
     WINML_THROW_HR_IF_TRUE_MSG(WINML_ERR_INVALID_BINDING, isBufferInUse, "The tensor has outstanding memory buffer references that must be closed prior to evaluation!");
@@ -254,7 +254,7 @@ struct TensorBase : TBase {
   (_winml::BindingContext& context, IValue** out) {
     RETURN_HR_IF_NULL_MSG(
         WINML_ERR_INVALID_BINDING,
-        m_resources,
+        resources_,
         "The tensor has been closed and its resources have been detached!");
 
     EnsureBufferNotInUse();
@@ -289,7 +289,7 @@ struct TensorBase : TBase {
     // the conditions of ASSERT_TEMPLATE_PARAMETERS_EXACT() are met.
     ASSERT_TEMPLATE_PARAMETERS<ElementType, ElementViewType>();
 
-    GetCpuResource()->set(size, reinterpret_cast<ElementType*>(data));
+    CpuTensor()->set(size, reinterpret_cast<ElementType*>(data));
   }
 
   template <>
@@ -297,7 +297,8 @@ struct TensorBase : TBase {
     // Ensure that this call is being called with the correct template parameters
     ASSERT_TEMPLATE_PARAMETERS<std::string, winrt::hstring>();
 
-    GetCpuResource()->get_tensor_buffer()->Set(size, reinterpret_cast<std::string_view*>(data));
+    auto string_data = std::static_pointer_cast<_winml::string_data>(CpuTensor()->get_data());
+    string_data->set(size, reinterpret_cast<std::string_view*>(data));
   }
 
   template <typename ElementType = T, typename ElementViewType = ViewT>
@@ -307,8 +308,8 @@ struct TensorBase : TBase {
     ASSERT_TEMPLATE_PARAMETERS<ElementType, ElementViewType>();
 
     RETURN_IF_FAILED_MSG(engine->CreateTensorValueFromExternalBuffer(
-                             GetCpuResource()->buffer(sync_buffer).data(), GetCpuResource()->size_in_bytes(), GetCpuResource()->shape().data(),
-                             GetCpuResource()->shape().size(), TensorKind(), value),
+                             CpuTensor()->buffer(sync_buffer).data(), CpuTensor()->size_in_bytes(), CpuTensor()->shape().data(),
+                             CpuTensor()->shape().size(), TensorKind(), value),
                          "Failed to prepare buffer for copy back from device resource.");
     return S_OK;
   }
@@ -318,17 +319,19 @@ struct TensorBase : TBase {
     // Ensure that this call is being called with the correct template parameters
     ASSERT_TEMPLATE_PARAMETERS<std::string, winrt::hstring>();
 
+    auto string_data = std::static_pointer_cast<_winml::string_data>(CpuTensor()->get_data());
+    auto& string_vector = string_data->get_backing_vector();
+
     std::vector<const char*> raw_values;
-    auto string_array = static_cast<std::string*>(GetCpuResource()->buffer().data());
     std::transform(
-        string_array,
-        string_array + GetCpuResource()->number_of_elements(),
+        std::begin(string_vector),
+        std::end(string_vector),
         std::back_inserter(raw_values),
         [&](auto& str) { return str.c_str(); });
 
     RETURN_IF_FAILED_MSG(engine->CreateStringTensorValueFromDataWithCopy(
-                             raw_values.data(), raw_values.size(), GetCpuResource()->shape().data(),
-                             GetCpuResource()->shape().size(), value),
+                             raw_values.data(), raw_values.size(), CpuTensor()->shape().data(),
+                             CpuTensor()->shape().size(), value),
                          "Failed to prepare buffer for copy back from device resource.");
     return S_OK;
   }
@@ -338,7 +341,7 @@ struct TensorBase : TBase {
   (BindingContext& context, IValue* value) {
     RETURN_HR_IF_NULL_MSG(
         E_ILLEGAL_METHOD_CALL,
-        m_resources,
+        resources_,
         "The tensor has been closed and its resources have been detached during evaluation!");
 
     _winml::Resource updated_resource;
@@ -348,14 +351,14 @@ struct TensorBase : TBase {
     RETURN_IF_FAILED_MSG(value->GetTensorShape(shape_), "Failed to get the tensor shape from resource!");
 
     // make sure we always have a CPU resource
-    if (GetCpuResource() == nullptr) {
-      GetCpuResource() = std::make_shared<_winml::Tensor<T>>(shape_);
+    if (CpuTensor() == nullptr) {
+      CpuTensor() = std::make_shared<_winml::Tensor<T>>(shape_);
     }
 
     bool is_cpu;
     if (SUCCEEDED(value->IsCpu(&is_cpu)) && is_cpu) {
       // Get the data pointer and size
-      auto buffer = GetCpuResource()->buffer(false);
+      auto buffer = CpuTensor()->buffer(false);
 
       if (updated_resource.get() != reinterpret_cast<void*>(buffer.data())) {
         // Only copy the data if the source and destination are not the same!
@@ -366,7 +369,7 @@ struct TensorBase : TBase {
       } else {
         // If the engine wrote to the data directly, it is possible that the underlying data was held by many buffers
         // In that case the underlying buffers will not match the engine output, and they need to be flushed.
-        GetCpuResource()->flush();
+        CpuTensor()->flush();
       }
     } else {
       // If we got a gpu resource, we should move the data to the cpu so accessors can retrieve the data.
@@ -377,7 +380,7 @@ struct TensorBase : TBase {
       auto device = session->Device().as<winmlp::LearningModelDevice>();
       auto engine = session->GetEngine();
 
-      if (GetCpuResource()->num_buffers() == 1) {
+      if (CpuTensor()->num_buffers() == 1) {
         winrt::com_ptr<IValue> dest;
         RETURN_IF_FAILED_MSG(CreateTensorValueFromExternalBuffer(engine, false, dest.put()),
                              "Failed to prepare buffer for copy back from device resource.");
@@ -395,7 +398,7 @@ struct TensorBase : TBase {
             d3dResource,
             buffer_size_in_bytes,
             *device->GetD3DDeviceCache(),
-            GetCpuResource()->buffers());
+            CpuTensor()->buffers());
 
         // Reset the Allocator before return to the Cache. Must Sync this background thread to that completion before we do.
         device->GetD3DDeviceCache()->SyncD3D12ToCPU();
@@ -615,7 +618,7 @@ struct TensorBase : TBase {
     // Ensure that CreateReference is only called when there is 1 buffer.
     WINML_THROW_HR_IF_TRUE_MSG(
         E_ILLEGAL_METHOD_CALL,
-        GetCpuResource() != nullptr && GetCpuResource()->num_buffers() != 1, "A single buffer reference cannot be retrieved when the tensor is backed by multiple buffers!");
+        CpuTensor() != nullptr && CpuTensor()->num_buffers() != 1, "A single buffer reference cannot be retrieved when the tensor is backed by multiple buffers!");
 
     // Create a TensorMemoryBufferReference<T>
 
@@ -624,11 +627,11 @@ struct TensorBase : TBase {
     // "has been closed. In that case, the returned IMemoryBufferReference is already closed."
     // Creating a TensorMemoryBufferReference<T> with a null pointer is equivalent to creating it as closed.
 
-    auto memoryBufferReference = winrt::make<TensorMemoryBufferReference<T>>(shape_, m_resources);
+    auto memoryBufferReference = winrt::make<TensorMemoryBufferReference<T>>(shape_, resources_);
 
     // Create and cache a weak reference to the TensorMemoryBufferReference<T>
     winrt::weak_ref<TensorMemoryBufferReference<T>> weak(memoryBufferReference.as<TensorMemoryBufferReference<T>>());
-    m_outstandingReferences.push_back(weak);
+    outstanding_references_.push_back(weak);
 
     // Return the strong ref to the caller
     return memoryBufferReference;
@@ -638,7 +641,7 @@ struct TensorBase : TBase {
   // IMemoryBuffer::Close
   void Close() try {
     // Let go of the lifetime of the resources, this is will indicate that the memorybuffer is closed
-    m_resources = nullptr;
+    resources_ = nullptr;
   }
   WINML_CATCH_ALL
 
@@ -653,10 +656,10 @@ struct TensorBase : TBase {
 
     RETURN_HR_IF_NULL_MSG(
         E_ILLEGAL_METHOD_CALL,
-        m_resources,
+        resources_,
         "The tensor has been closed and its resources have been detached!");
 
-    return m_resources->GetBuffer(shape_, value, capacity);
+    return resources_->GetBuffer(shape_, value, capacity);
   }
 
   // ITensorNative::GetD3D12Resource
@@ -667,10 +670,10 @@ struct TensorBase : TBase {
       RETURN_HR_IF(ERROR_INVALID_FUNCTION, (std::is_same<T, std::string>::value));
       RETURN_HR_IF_NULL_MSG(
           E_ILLEGAL_METHOD_CALL,
-          m_resources,
+          resources_,
           "The tensor has been closed and its resources have been detached!");
 
-      GetGpuResource().copy_to(ppResource);
+      GpuTensor().copy_to(ppResource);
       return S_OK;
     }
     WINML_CATCH_ALL_COM
@@ -689,12 +692,11 @@ struct TensorBase : TBase {
     // owned IVectorView object.
 
     // Get the raw buffer pointer from the native tensor implementation.
-    auto number_of_elements = GetCpuResource()->number_of_elements();
-    auto buffer = GetCpuResource()->buffer();
+    auto buffer = CpuTensor()->buffer();
     auto element_data = static_cast<ElementType*>(buffer.data());
-    
+
     // Copy data that will be passed back to caller.
-    auto copy = std::vector<ElementType>(element_data, element_data + number_of_elements);
+    auto copy = std::vector<ElementType>(element_data, element_data + buffer.size());
 
     // Create IVectorView from copied data.
     return winrt::single_threaded_vector<ElementViewType>(std::move(copy)).GetView();
@@ -707,18 +709,17 @@ struct TensorBase : TBase {
     // Ensure that this call is being called with the correct template parameters
     ASSERT_TEMPLATE_PARAMETERS<_winml::Half, float>();
 
-    auto number_of_elements = GetCpuResource()->number_of_elements();
-    auto buffer = GetCpuResource()->buffer();
+    auto buffer = CpuTensor()->buffer();
     auto element_data = static_cast<_winml::Half*>(buffer.data());
 
     // Copy the HALFs to floats
-    std::vector<float> float_value(number_of_elements);
+    std::vector<float> float_value(buffer.size());
     DirectX::PackedVector::XMConvertHalfToFloatStream(
         float_value.data(),
         sizeof(float) /* output stride */,
         reinterpret_cast<DirectX::PackedVector::HALF*>(element_data),
         sizeof(_winml::Half) /* input stride */,
-        number_of_elements);
+        buffer.size());
 
     // Create IVectorView from copied data.
     return winrt::single_threaded_vector<float>(std::move(float_value)).GetView();
@@ -731,16 +732,15 @@ struct TensorBase : TBase {
     // Ensure that this call is being called with the correct template parameters
     ASSERT_TEMPLATE_PARAMETERS<std::string, winrt::hstring>();
 
-    auto number_of_elements = GetCpuResource()->number_of_elements();
-    auto buffer = GetCpuResource()->buffer();
-    auto element_data = static_cast<std::string*>(buffer.data());
+    auto string_data = std::static_pointer_cast<_winml::string_data>(CpuTensor()->get_data());
+    auto& string_vector = string_data->get_backing_vector();
 
-    auto copy = std::vector<winrt::hstring>(number_of_elements, L"");
+    auto copy = std::vector<winrt::hstring>(string_vector.size(), L"");
     std::generate(
         copy.begin(),
         copy.end(),
-        [n = 0, &element_data]() mutable {
-          return _winml::Strings::HStringFromUTF8(element_data[n++]);
+        [n = 0, &string_vector]() mutable {
+          return _winml::Strings::HStringFromUTF8(string_vector[n++]);
         });
 
     return winrt::single_threaded_vector<winrt::hstring>(std::move(copy)).GetView();
@@ -752,14 +752,13 @@ struct TensorBase : TBase {
   wfc::IVectorView<uint8_t> GetAsVectorView<int8_t, uint8_t>() try {
     ASSERT_TEMPLATE_PARAMETERS<int8_t, uint8_t>();
 
-    auto number_of_elements = GetCpuResource()->number_of_elements();
-    auto buffer = GetCpuResource()->buffer();
+    auto buffer = CpuTensor()->buffer();
     auto element_data = static_cast<int8_t*>(buffer.data());
 
     // Copy data that will be passed back to caller.
 
-    gsl::span<uint8_t> span(reinterpret_cast<uint8_t*>(element_data), number_of_elements);
-    std::vector<uint8_t> copy(span.begin(), span.begin() + number_of_elements);
+    gsl::span<uint8_t> span(reinterpret_cast<uint8_t*>(element_data), buffer.size());
+    std::vector<uint8_t> copy(span.begin(), span.begin() + buffer.size());
 
     // Create IVectorView from copied data.
     return winrt::single_threaded_vector<uint8_t>(std::move(copy)).GetView();
@@ -809,10 +808,10 @@ struct TensorBase : TBase {
     RETURN_HR_IF_NULL(E_POINTER, pIsPlaceHolder);
     RETURN_HR_IF_NULL_MSG(
         E_ILLEGAL_METHOD_CALL,
-        m_resources,
+        resources_,
         "The tensor has been closed and its resources have been detached!");
 
-    *pIsPlaceHolder = GetCpuResource() == nullptr && GetGpuResource() == nullptr;
+    *pIsPlaceHolder = CpuTensor() == nullptr && GpuTensor() == nullptr;
     return S_OK;
   }
 
@@ -827,7 +826,7 @@ struct TensorBase : TBase {
     ASSERT_TEMPLATE_PARAMETERS_EXACT<ElementType, ElementViewType>();
 
     shape_ = shape;
-    GetCpuResource() = std::make_shared<_winml::Tensor<T>>(shape, buffers);
+    CpuTensor() = std::make_shared<_winml::Tensor<T>>(shape, buffers);
   }
 
   template <>
@@ -837,7 +836,7 @@ struct TensorBase : TBase {
     ASSERT_TEMPLATE_PARAMETERS<_winml::Half, float>();
 
     shape_ = shape;
-    GetCpuResource() = std::make_shared<_winml::Tensor<T>>(shape, buffers);
+    CpuTensor() = std::make_shared<_winml::Tensor<T>>(shape, buffers);
   }
 
   template <>
@@ -847,7 +846,7 @@ struct TensorBase : TBase {
     ASSERT_TEMPLATE_PARAMETERS<int8_t, uint8_t>();
 
     shape_ = shape;
-    GetCpuResource() = std::make_shared<_winml::Tensor<T>>(shape, buffers);
+    CpuTensor() = std::make_shared<_winml::Tensor<T>>(shape, buffers);
   }
 
   // Specialized version to convert hstring to string
@@ -875,12 +874,12 @@ struct TensorBase : TBase {
     // Ensure that the Set APIs are only called when there is 1 buffer.
     // These APIs are only called when the tensor is being constructed from various collection and pointer public APIs.
     // They should always be backed by a single underlying buffer.
-    FAIL_FAST_HR_IF(E_ILLEGAL_METHOD_CALL, GetCpuResource()->num_buffers() != 1);
+    FAIL_FAST_HR_IF(E_ILLEGAL_METHOD_CALL, CpuTensor()->num_buffers() != 1);
 
     // This method accepts data as an array, T[], from the caller.
     // This is a non-destructive API, so the caller data is
     // left untouched, and the data is copied into internal buffers.
-    GetCpuResource()->set(data.size(), data.data());
+    CpuTensor()->set(data.size(), data.data());
   }
 
   // Specialized version to convert floats to float16
@@ -892,13 +891,12 @@ struct TensorBase : TBase {
     // Ensure that the Set APIs are only called when there is 1 buffer.
     // These APIs are only called when the tensor is being constructed from various collection and pointer public APIs.
     // They should always be backed by a single underlying buffer.
-    FAIL_FAST_HR_IF(E_ILLEGAL_METHOD_CALL, GetCpuResource()->num_buffers() != 1);
+    FAIL_FAST_HR_IF(E_ILLEGAL_METHOD_CALL, CpuTensor()->num_buffers() != 1);
 
-    auto number_of_elements = GetCpuResource()->number_of_elements();
-    auto buffer = GetCpuResource()->buffer();
+    auto buffer = CpuTensor()->buffer();
     auto element_data = static_cast<_winml::Half*>(buffer.data());
 
-    THROW_HR_IF(E_UNEXPECTED, data.size() != number_of_elements);
+    THROW_HR_IF(E_UNEXPECTED, data.size() != buffer.size());
     DirectX::PackedVector::XMConvertFloatToHalfStream(
         reinterpret_cast<DirectX::PackedVector::HALF*>(element_data),
         sizeof(_winml::Half) /* output stride */,
@@ -916,12 +914,12 @@ struct TensorBase : TBase {
     // Ensure that the Set APIs are only called when there is 1 buffer.
     // These APIs are only called when the tensor is being constructed from various collection and pointer public APIs.
     // They should always be backed by a single underlying buffer.
-    FAIL_FAST_HR_IF(E_ILLEGAL_METHOD_CALL, GetCpuResource()->num_buffers() != 1);
+    FAIL_FAST_HR_IF(E_ILLEGAL_METHOD_CALL, CpuTensor()->num_buffers() != 1);
 
     auto size = data.size();
     auto pData = data.data();
 
-    GetCpuResource()->set(size, reinterpret_cast<int8_t*>(const_cast<uint8_t*>(pData)));
+    CpuTensor()->set(size, reinterpret_cast<int8_t*>(const_cast<uint8_t*>(pData)));
   }
 
   // Specialized version to convert hstring to string
@@ -933,17 +931,16 @@ struct TensorBase : TBase {
     // Ensure that the Set APIs are only called when there is 1 buffer.
     // These APIs are only called when the tensor is being constructed from various collection and pointer public APIs.
     // They should always be backed by a single underlying buffer.
-    FAIL_FAST_HR_IF(E_ILLEGAL_METHOD_CALL, GetCpuResource()->num_buffers() != 1);
+    FAIL_FAST_HR_IF(E_ILLEGAL_METHOD_CALL, CpuTensor()->num_buffers() != 1);
 
-    auto number_of_elements = GetCpuResource()->number_of_elements();
-    auto buffer = GetCpuResource()->buffer();
-    THROW_HR_IF(E_UNEXPECTED, data.size() > number_of_elements);
+    auto string_data = std::static_pointer_cast<_winml::string_data>(CpuTensor()->get_data());
+    auto& string_vector = string_data->get_backing_vector();
 
-    auto element_data = static_cast<std::string*>(buffer.data());
+    THROW_HR_IF(E_UNEXPECTED, data.size() > string_vector.size());
 
     // Convert and copy into the underlying buffer
     std::transform(
-        data.begin(), data.end(), element_data,
+        data.begin(), data.end(), std::begin(string_vector),
         [](auto& element) mutable {
           return _winml::Strings::UTF8FromHString(element);
         });
@@ -962,9 +959,9 @@ struct TensorBase : TBase {
     // Ensure that the Set APIs are only called when there is 1 buffer.
     // These APIs are only called when the tensor is being constructed from various collection and pointer public APIs.
     // They should always be backed by a single underlying buffer.
-    FAIL_FAST_HR_IF(E_ILLEGAL_METHOD_CALL, GetCpuResource()->num_buffers() != 1);
+    FAIL_FAST_HR_IF(E_ILLEGAL_METHOD_CALL, CpuTensor()->num_buffers() != 1);
 
-    auto buffer = GetCpuResource()->buffer();
+    auto buffer = CpuTensor()->buffer();
     auto element_data = static_cast<ElementType*>(buffer.data());
 
     // This method accepts data as an IVectorView<T>.
@@ -983,9 +980,9 @@ struct TensorBase : TBase {
     // Ensure that the Set APIs are only called when there is 1 buffer.
     // These APIs are only called when the tensor is being constructed from various collection and pointer public APIs.
     // They should always be backed by a single underlying buffer.
-    FAIL_FAST_HR_IF(E_ILLEGAL_METHOD_CALL, GetCpuResource()->num_buffers() != 1);
+    FAIL_FAST_HR_IF(E_ILLEGAL_METHOD_CALL, CpuTensor()->num_buffers() != 1);
 
-    auto buffer = GetCpuResource()->buffer();
+    auto buffer = CpuTensor()->buffer();
     auto element_data = static_cast<_winml::Half*>(buffer.data());
 
     // Now that we take in IIterables and not vector views
@@ -1009,9 +1006,9 @@ struct TensorBase : TBase {
     // Ensure that the Set APIs are only called when there is 1 buffer.
     // These APIs are only called when the tensor is being constructed from various collection and pointer public APIs.
     // They should always be backed by a single underlying buffer.
-    FAIL_FAST_HR_IF(E_ILLEGAL_METHOD_CALL, GetCpuResource()->num_buffers() != 1);
+    FAIL_FAST_HR_IF(E_ILLEGAL_METHOD_CALL, CpuTensor()->num_buffers() != 1);
 
-    auto buffer = GetCpuResource()->buffer();
+    auto buffer = CpuTensor()->buffer();
     auto element_data = static_cast<int8_t*>(buffer.data());
     std::transform(begin(data), end(data), element_data, [](auto element) { return static_cast<int8_t>(element); });
   }
@@ -1026,39 +1023,39 @@ struct TensorBase : TBase {
     // Ensure that the Set APIs are only called when there is 1 buffer.
     // These APIs are only called when the tensor is being constructed from various collection and pointer public APIs.
     // They should always be backed by a single underlying buffer.
-    FAIL_FAST_HR_IF(E_ILLEGAL_METHOD_CALL, GetCpuResource()->num_buffers() != 1);
+    FAIL_FAST_HR_IF(E_ILLEGAL_METHOD_CALL, CpuTensor()->num_buffers() != 1);
 
-    auto buffer = GetCpuResource()->buffer();
-    auto element_data = static_cast<std::string*>(buffer.data());
+    auto string_data = std::static_pointer_cast<_winml::string_data>(CpuTensor()->get_data());
+    auto& string_vector = string_data->get_backing_vector();
 
     // Convert and copy into the underlying buffer
-    std::transform(begin(data), end(data), element_data, [](const auto& element) {
+    std::transform(begin(data), end(data), std::begin(string_vector), [](const auto& element) {
       return _winml::Strings::UTF8FromHString(element);
     });
   }
 
-  std::shared_ptr<_winml::Tensor<T>>& GetCpuResource() {
+  std::shared_ptr<_winml::Tensor<T>>& CpuTensor() {
     WINML_THROW_HR_IF_NULL_MSG(
         E_ILLEGAL_METHOD_CALL,
-        m_resources,
+        resources_,
         "The tensor has been closed and its resources are detached!");
 
-    return m_resources->CpuResource;
+    return resources_->cpu_resource_;
   }
 
-  winrt::com_ptr<ID3D12Resource>& GetGpuResource() {
+  winrt::com_ptr<ID3D12Resource>& GpuTensor() {
     WINML_THROW_HR_IF_NULL_MSG(
         E_ILLEGAL_METHOD_CALL,
-        m_resources,
+        resources_,
         "The tensor has been closed and its resources are detached!");
 
-    return m_resources->GpuResource;
+    return resources_->gpu_resource_;
   }
 
  private:
   std::vector<int64_t> shape_;
-  std::shared_ptr<TensorResources<T>> m_resources;
-  std::vector<winrt::weak_ref<TensorMemoryBufferReference<T>>> m_outstandingReferences;
+  std::shared_ptr<TensorResources<T>> resources_;
+  std::vector<winrt::weak_ref<TensorMemoryBufferReference<T>>> outstanding_references_;
   bool m_isClosed = false;
 };
 
diff --git a/winml/lib/Api/impl/TensorBuffer.h b/winml/lib/Api/impl/TensorBuffer.h
deleted file mode 100644
index 5d9611cdc5..0000000000
--- a/winml/lib/Api/impl/TensorBuffer.h
+++ /dev/null
@@ -1,234 +0,0 @@
-﻿// Copyright (c) Microsoft Corporation. All rights reserved.
-// Licensed under the MIT License.
-
-#pragma once
-
-#include "robuffer.h"
-#include "winrt/Windows.Storage.Streams.h"
-#include "DisjointBufferHelpers.h"
-
-namespace _winml {
-
-class VectorBuffer : public winrt::implements<
-                         VectorBuffer,
-                         wss::IBuffer,
-                         Windows::Storage::Streams::IBufferByteAccess> {
- public:
-  VectorBuffer(size_t size) : buffer_(size) {}
-
-  uint32_t Capacity() const {
-    return static_cast<uint32_t>(buffer_.size());
-  }
-
-  uint32_t Length() const {
-    throw winrt::hresult_error(E_NOTIMPL);
-  }
-
-  void Length(uint32_t /*value*/) {
-    throw winrt::hresult_error(E_NOTIMPL);
-  }
-
-  STDMETHOD(Buffer)
-  (uint8_t** value) {
-    RETURN_HR_IF_NULL(E_POINTER, value);
-    *value = buffer_.data();
-    return S_OK;
-  }
-
- private:
-  std::vector<BYTE> buffer_;
-};
-
-template <typename T>
-class TensorBuffer {
-  wss::IBuffer combined_buffer_;
-  std::vector<wss::IBuffer> buffers_;
-  size_t size_;
-
-  TensorBuffer(size_t size) :
-      size_(size),
-      combined_buffer_(winrt::make<VectorBuffer>(size * sizeof(T))),
-      buffers_ { combined_buffer_ } {
-    auto buffer = BufferAt(0);
-
-    // The initial release of WinML (RS5) shipped with behavior that would
-    // zero-initialize uninitialized tensors. After measuring, the performance impact
-    // of memsetting the memory buffer is quite small (<1ms for 3channel 720x720 TensorFloats).
-    // To maintain parity with RS5 behavior, we always zero out the memory buffer.
-    memset(buffer.data(), 0, buffer.size_bytes());
-  }
-
-  TensorBuffer(
-      size_t size,
-      wfc::IIterable<wss::IBuffer> const& buffers) : size_(size),
-                                                     combined_buffer_(nullptr),
-                                                     buffers_(begin(buffers), end(buffers)) {
-    if (buffers_.size() == 1) {
-      combined_buffer_ = buffers_[0];
-    } else {
-      // If there are many buffers, then the combined buffer will be a separately allocated value that combines all of the buffers.
-      // This needs to be lazily done however, as the extra memory should not be allocated when not needed (GPU).
-    }
-  }
-
-  auto CombinedBuffer() {
-    if (combined_buffer_ == nullptr) {
-      combined_buffer_ = winrt::make<VectorBuffer>(size_ * sizeof(T));
-    }
-    return BufferFrom(combined_buffer_);
-  }
-
- public:
-  static auto Create(size_t size) {
-    return std::shared_ptr<TensorBuffer>(new TensorBuffer(size));
-  }
-
-  static auto Create(
-      size_t size,
-      wss::IBuffer buffer) {
-    return std::shared_ptr<TensorBuffer>(new TensorBuffer(size, buffer));
-  }
-
-  static auto Create(
-      size_t size,
-      wfc::IIterable<wss::IBuffer> const& buffers) {
-    return std::shared_ptr<TensorBuffer>(new TensorBuffer(size, buffers));
-  }
-
-  auto NumElements() {
-    return size_;
-  }
-
-  auto SizeInBytes() {
-    return size_ * sizeof(T);
-  }
-
-  auto NumBuffers() {
-    return buffers_.size();
-  }
-
-  auto& Buffers() {
-    return buffers_;
-  }
-
-  auto Buffer(bool should_sync_buffer) {
-    if (buffers_.size() == 1) {
-      // Single buffer optimization to not create a temporary buffer that concatenates disjoint buffers into one.
-      return BufferAt(0);
-    }
-    auto span = CombinedBuffer();
-    if (should_sync_buffer) {
-      _winml::LoadOrStoreDisjointBuffers(
-        true /*load buffer*/,
-        buffers_.size(),
-        [this](size_t i) { return BufferAt(i); },
-        span);
-    }
-
-    return span;
-  }
-
-  auto Flush() {
-    auto should_flush = buffers_.size() != 1;
-    if (should_flush) {
-      auto span = CombinedBuffer();
-      _winml::LoadOrStoreDisjointBuffers(
-          false /*store buffer*/,
-          buffers_.size(),
-          [this](size_t i) { return BufferAt(i); },
-          span);
-    }
-    return should_flush;
-  }
-
-  auto Set(size_t size_in_bytes, const T* data) {
-    WINML_THROW_HR_IF_FALSE_MSG(
-        E_INVALIDARG,
-        size_in_bytes <= (size_ * sizeof(T)),
-        "Argument size (%llu) exceeds the tensor size (%llu).",
-        static_cast<uint64_t>(size_in_bytes),
-        static_cast<uint64_t>(size_ * sizeof(T)));
-    
-    gsl::span<byte> span(reinterpret_cast<byte*>(const_cast<T*>(data)), size_in_bytes);
-    _winml::LoadOrStoreDisjointBuffers(
-      false /*store buffer*/,
-      buffers_.size(),
-      [this](size_t i) { return BufferAt(i); },
-      span);
-  }
-
-  auto Set(std::vector<T>&& moveableData) {
-    Set(moveableData.size() * sizeof(T), moveableData.data());
-  }
-
- private:
-  auto BufferFrom(wss::IBuffer buffer) {
-    byte* current_data = nullptr;
-    auto bufferByteAccess = buffer.as<Windows::Storage::Streams::IBufferByteAccess>();
-    bufferByteAccess->Buffer(&current_data);
-    return gsl::span<byte>(
-        current_data,
-        static_cast<size_t>(buffer.Capacity()));
-  }
-
-  auto BufferAt(size_t index) {
-    return BufferFrom(buffers_[index]);
-  }
-};
-
-template <>
-class TensorBuffer<std::string> {
-  std::vector<std::string> buffer_;
-
-  TensorBuffer(size_t size) : buffer_(size) {}
-
- public:
-  static auto Create(size_t size) {
-    return std::shared_ptr<TensorBuffer>(new TensorBuffer(size));
-  }
-
-  auto NumElements() {
-    return buffer_.size();
-  }
-
-  auto SizeInBytes() {
-    return buffer_.size();
-  }
-
-  auto NumBuffers() {
-    return 1;
-  }
-
-  auto Flush() {
-    return false;
-  }
-
-  auto Buffers() -> std::vector<wss::IBuffer>& {
-    WINML_THROW_HR(E_UNEXPECTED);
-  }
-
-  auto BufferAt(size_t index) {
-    WINML_THROW_HR_IF_FALSE_MSG(
-        E_INVALIDARG,
-        index == 0,
-        "TensorString can only be backed by a single buffer!");
-    return gsl::span<byte>(reinterpret_cast<byte*>(buffer_.data()), buffer_.size());
-  }
-
-  auto Buffer(bool /*should_sync_buffer*/) {
-    return BufferAt(0);
-  }
-
-  auto Set(size_t size, std::string_view* data) {
-    WINML_THROW_HR_IF_FALSE_MSG(
-        E_INVALIDARG,
-        size <= buffer_.size(),
-        "Argument size (%d) exceeds the tensor size (%d).",
-        static_cast<int>(size),
-        static_cast<int>(buffer_.size()));
-
-    // Copy
-    std::copy(data, data + size, buffer_.begin());
-  }
-};
-}  // namespace _winml
\ No newline at end of file
diff --git a/winml/lib/Api/impl/TensorMemoryBufferReference.h b/winml/lib/Api/impl/TensorMemoryBufferReference.h
index 61e5f5613c..dcf2f12a64 100644
--- a/winml/lib/Api/impl/TensorMemoryBufferReference.h
+++ b/winml/lib/Api/impl/TensorMemoryBufferReference.h
@@ -29,12 +29,12 @@ struct TensorResources {
       *capacity = 0;
 
       // Lazily allocate the cpu resource on call to GetBuffer
-      if (CpuResource == nullptr) {
-        CpuResource = std::make_shared<_winml::Tensor<T>>(shape);
+      if (cpu_resource_ == nullptr) {
+        cpu_resource_ = std::make_shared<_winml::Tensor<T>>(shape);
       }
 
       // Get the data pointer and size
-      auto buffer = CpuResource->buffer();
+      auto buffer = cpu_resource_->buffer();
 
       // Set out parameters
       *capacity = static_cast<uint32_t>(buffer.size_bytes());
@@ -45,8 +45,8 @@ struct TensorResources {
   }
 
   // Theses are access directly by TensorMemoryBufferReference<T> and TensorBase
-  std::shared_ptr<_winml::Tensor<T>> CpuResource;
-  winrt::com_ptr<ID3D12Resource> GpuResource;
+  std::shared_ptr<_winml::Tensor<T>> cpu_resource_;
+  winrt::com_ptr<ID3D12Resource> gpu_resource_;
 };
 
 // This class holds onto the lifetime of TensorResources<T> so that they can be kept alive by TensorBase AND its active MBRs.