mirror of
https://github.com/saymrwulf/onnxruntime.git
synced 2026-05-14 20:48:00 +00:00
Refactor implementation of Tensor<T> and underlying buffer stores to improve binary size and maintainability (#5836)
* refactor tensor buffers to make cleaner * refactor to make tensor backing buffer implementation smaller and cleaner * missed virtual on destructor * remove unnecessary static_pointer_cast * add string vector accessor Co-authored-by: Sheil Kumar <sheilk@microsoft.com>
This commit is contained in:
parent
85f945a875
commit
84c1340f9b
16 changed files with 548 additions and 401 deletions
|
|
@ -400,15 +400,18 @@ endif(onnxruntime_USE_DML)
|
|||
# Add static library that will be archived/linked for both static/dynamic library
|
||||
add_library(winml_lib_api STATIC
|
||||
${winml_lib_api_dir}/impl/FeatureCompatibility.h
|
||||
${winml_lib_api_dir}/impl/IData.h
|
||||
${winml_lib_api_dir}/impl/IMapFeatureValue.h
|
||||
${winml_lib_api_dir}/impl/ISequenceFeatureValue.h
|
||||
${winml_lib_api_dir}/impl/MapBase.h
|
||||
${winml_lib_api_dir}/impl/NumericData.h
|
||||
${winml_lib_api_dir}/impl/SequenceBase.h
|
||||
${winml_lib_api_dir}/impl/StringData.h
|
||||
${winml_lib_api_dir}/impl/Tensor.h
|
||||
${winml_lib_api_dir}/impl/TensorBase.h
|
||||
${winml_lib_api_dir}/impl/TensorBuffer.h
|
||||
${winml_lib_api_dir}/impl/TensorKindFrom.h
|
||||
${winml_lib_api_dir}/impl/TensorMemoryBufferReference.h
|
||||
${winml_lib_api_dir}/NumericData.cpp
|
||||
${winml_lib_api_dir}/ImageFeatureDescriptor.cpp
|
||||
${winml_lib_api_dir}/ImageFeatureDescriptor.h
|
||||
${winml_lib_api_dir}/ImageFeatureValue.cpp
|
||||
|
|
@ -429,8 +432,11 @@ add_library(winml_lib_api STATIC
|
|||
${winml_lib_api_dir}/MapFeatureDescriptor.h
|
||||
${winml_lib_api_dir}/SequenceFeatureDescriptor.cpp
|
||||
${winml_lib_api_dir}/SequenceFeatureDescriptor.h
|
||||
${winml_lib_api_dir}/StringData.cpp
|
||||
${winml_lib_api_dir}/TensorFeatureDescriptor.cpp
|
||||
${winml_lib_api_dir}/TensorFeatureDescriptor.h
|
||||
${winml_lib_api_dir}/VectorBackedBuffer.h
|
||||
${winml_lib_api_dir}/VectorBackedBuffer.cpp
|
||||
${winml_lib_api_dir}/pch/pch.h
|
||||
)
|
||||
|
||||
|
|
|
|||
|
|
@ -3,7 +3,7 @@
|
|||
|
||||
namespace _winml {
|
||||
|
||||
void LoadOrStoreDisjointBuffers(
|
||||
static void LoadOrStoreDisjointBuffers(
|
||||
bool should_load_buffer,
|
||||
size_t num_buffers,
|
||||
std::function<gsl::span<byte>(size_t)> get_buffer,
|
||||
|
|
@ -31,4 +31,18 @@ void LoadOrStoreDisjointBuffers(
|
|||
}
|
||||
}
|
||||
|
||||
void LoadSpanFromDisjointBuffers(
|
||||
size_t num_buffers,
|
||||
std::function<gsl::span<byte>(size_t)> get_buffer,
|
||||
gsl::span<byte>& buffer_span) {
|
||||
LoadOrStoreDisjointBuffers(true /*load into the span*/, num_buffers, get_buffer, buffer_span);
|
||||
}
|
||||
|
||||
void StoreSpanIntoDisjointBuffers(
|
||||
size_t num_buffers,
|
||||
std::function<gsl::span<byte>(size_t)> get_buffer,
|
||||
gsl::span<byte>& buffer_span) {
|
||||
LoadOrStoreDisjointBuffers(false /*store into buffers*/, num_buffers, get_buffer, buffer_span);
|
||||
}
|
||||
|
||||
} // namespace _winml
|
||||
|
|
|
|||
|
|
@ -630,8 +630,7 @@ void TensorToVideoFrameConverter::ConvertBatchedDX12TensorToBuffers(
|
|||
byte* readback_buffer = nullptr;
|
||||
WINML_THROW_IF_FAILED(readback_heap_->Map(0, &CD3DX12_RANGE(0, buffer_size_in_bytes), reinterpret_cast<void**>(&readback_buffer)));
|
||||
auto readback_buffer_span = gsl::span<byte>(readback_buffer, buffer_size_in_bytes);
|
||||
_winml::LoadOrStoreDisjointBuffers(
|
||||
false /*load disjoint buffers into*/,
|
||||
_winml::StoreSpanIntoDisjointBuffers(
|
||||
buffers.size(),
|
||||
[&](size_t i) {
|
||||
byte* buffer_start = nullptr;
|
||||
|
|
|
|||
|
|
@ -559,8 +559,7 @@ void VideoFrameToTensorConverter::ConvertBuffersToBatchedGPUTensor(
|
|||
WINML_THROW_IF_FAILED(upload_heap_->Map(0, &CD3DX12_RANGE(0, 0), reinterpret_cast<void**>(&gpu_buffer)));
|
||||
auto gpu_buffer_span = gsl::span<byte>(gpu_buffer, buffer_size_in_bytes);
|
||||
|
||||
_winml::LoadOrStoreDisjointBuffers(
|
||||
true /*load disjoint buffers into*/,
|
||||
_winml::LoadSpanFromDisjointBuffers(
|
||||
buffers.size(),
|
||||
[&](size_t i) {
|
||||
byte* buffer_start = nullptr;
|
||||
|
|
|
|||
|
|
@ -7,8 +7,12 @@
|
|||
|
||||
namespace _winml {
|
||||
|
||||
void LoadOrStoreDisjointBuffers(
|
||||
bool should_load_buffer,
|
||||
void LoadSpanFromDisjointBuffers(
|
||||
size_t num_buffers,
|
||||
std::function<gsl::span<byte>(size_t)> get_buffer,
|
||||
gsl::span<byte>& buffer_span);
|
||||
|
||||
void StoreSpanIntoDisjointBuffers(
|
||||
size_t num_buffers,
|
||||
std::function<gsl::span<byte>(size_t)> get_buffer,
|
||||
gsl::span<byte>& buffer_span);
|
||||
|
|
|
|||
129
winml/lib/Api/NumericData.cpp
Normal file
129
winml/lib/Api/NumericData.cpp
Normal file
|
|
@ -0,0 +1,129 @@
|
|||
// Copyright (c) Microsoft Corporation. All rights reserved.
|
||||
// Licensed under the MIT License.
|
||||
#include "pch.h"
|
||||
|
||||
#include "impl/NumericData.h"
|
||||
#include "VectorBackedBuffer.h"
|
||||
#include "robuffer.h"
|
||||
#include "winrt/Windows.Storage.Streams.h"
|
||||
#include "DisjointBufferHelpers.h"
|
||||
|
||||
namespace _winml {
|
||||
|
||||
std::shared_ptr<_winml::idata> numeric_data::create(
|
||||
size_t num_elements,
|
||||
size_t element_size_in_bytes,
|
||||
wfc::IIterable<wss::IBuffer> const& buffers) {
|
||||
return std::make_shared<numeric_data>(num_elements, element_size_in_bytes, buffers);
|
||||
}
|
||||
|
||||
numeric_data::numeric_data(
|
||||
size_t num_elements, size_t element_size_in_bytes, wfc::IIterable<wss::IBuffer> const& buffers) :
|
||||
num_elements_(num_elements),
|
||||
element_size_in_bytes_(element_size_in_bytes),
|
||||
combined_buffer_(nullptr),
|
||||
buffers_() {
|
||||
if (buffers != nullptr) {
|
||||
buffers_ = { begin(buffers), end(buffers) };
|
||||
}
|
||||
|
||||
if (buffers_.size() == 0) {
|
||||
combined_buffer_ = winrt::make<vector_backed_buffer>(num_elements * element_size_in_bytes);
|
||||
buffers_ = { combined_buffer_ };
|
||||
auto buffer = buffer_at(0);
|
||||
|
||||
// The initial release of WinML (RS5) shipped with behavior that would
|
||||
// zero-initialize uninitialized tensors. After measuring, the performance impact
|
||||
// of memsetting the memory buffer is quite small (<1ms for 3channel 720x720 TensorFloats).
|
||||
// To maintain parity with RS5 behavior, we always zero out the memory buffer.
|
||||
memset(buffer.data(), 0, buffer.size_bytes());
|
||||
}
|
||||
else if (buffers_.size() == 1) {
|
||||
combined_buffer_ = buffers_[0];
|
||||
}
|
||||
else {
|
||||
// If there are many buffers, then the combined buffer will be a separately allocated value that combines all of the buffers.
|
||||
// This needs to be lazily done however, as the extra memory should not be allocated when not needed (GPU).
|
||||
}
|
||||
}
|
||||
|
||||
size_t numeric_data::num_elements() {
|
||||
return num_elements_;
|
||||
}
|
||||
|
||||
size_t numeric_data::size_in_bytes() {
|
||||
return num_elements_ * element_size_in_bytes_;
|
||||
}
|
||||
|
||||
size_t numeric_data::num_buffers() {
|
||||
return buffers_.size();
|
||||
}
|
||||
|
||||
std::vector<wss::IBuffer>& numeric_data::buffers() {
|
||||
return buffers_;
|
||||
}
|
||||
|
||||
gsl::span<byte> numeric_data::buffer(bool should_sync_buffer) {
|
||||
if (buffers_.size() == 1) {
|
||||
// Single buffer optimization to not create a temporary buffer that concatenates disjoint buffers into one.
|
||||
return buffer_at(0);
|
||||
}
|
||||
auto span = combined_buffer();
|
||||
if (should_sync_buffer) {
|
||||
_winml::LoadSpanFromDisjointBuffers(
|
||||
buffers_.size(),
|
||||
[this](size_t i) { return buffer_at(i); },
|
||||
span);
|
||||
}
|
||||
|
||||
return span;
|
||||
}
|
||||
|
||||
bool numeric_data::flush() {
|
||||
auto should_flush = buffers_.size() != 1;
|
||||
if (should_flush) {
|
||||
auto span = combined_buffer();
|
||||
_winml::StoreSpanIntoDisjointBuffers(
|
||||
buffers_.size(),
|
||||
[this](size_t i) { return buffer_at(i); },
|
||||
span);
|
||||
}
|
||||
return should_flush;
|
||||
}
|
||||
|
||||
void numeric_data::set(size_t data_size, const byte* data) {
|
||||
WINML_THROW_HR_IF_FALSE_MSG(
|
||||
E_INVALIDARG,
|
||||
data_size <= (num_elements_ * element_size_in_bytes_),
|
||||
"Argument size (%llu) exceeds the tensor size (%llu).",
|
||||
static_cast<uint64_t>(data_size),
|
||||
static_cast<uint64_t>(num_elements_ * element_size_in_bytes_));
|
||||
|
||||
gsl::span<byte> span(const_cast<byte*>(data), data_size);
|
||||
_winml::StoreSpanIntoDisjointBuffers(
|
||||
buffers_.size(),
|
||||
[this](size_t i) { return buffer_at(i); },
|
||||
span);
|
||||
}
|
||||
|
||||
static gsl::span<byte> get_span_from_ibuffer(wss::IBuffer buffer) {
|
||||
byte* current_data = nullptr;
|
||||
auto bufferByteAccess = buffer.as<Windows::Storage::Streams::IBufferByteAccess>();
|
||||
bufferByteAccess->Buffer(¤t_data);
|
||||
return gsl::span<byte>(
|
||||
current_data,
|
||||
static_cast<size_t>(buffer.Capacity()));
|
||||
}
|
||||
|
||||
gsl::span<byte> numeric_data::buffer_at(size_t index) {
|
||||
return get_span_from_ibuffer(buffers_[index]);
|
||||
}
|
||||
|
||||
gsl::span<byte> numeric_data::combined_buffer() {
|
||||
if (combined_buffer_ == nullptr) {
|
||||
combined_buffer_ = winrt::make<vector_backed_buffer>(num_elements_ * element_size_in_bytes_);
|
||||
}
|
||||
return get_span_from_ibuffer(combined_buffer_);
|
||||
}
|
||||
|
||||
} // namespace _winml
|
||||
62
winml/lib/Api/StringData.cpp
Normal file
62
winml/lib/Api/StringData.cpp
Normal file
|
|
@ -0,0 +1,62 @@
|
|||
// Copyright (c) Microsoft Corporation. All rights reserved.
|
||||
// Licensed under the MIT License.
|
||||
|
||||
#include "pch.h"
|
||||
|
||||
#include "impl/StringData.h"
|
||||
|
||||
namespace _winml {
|
||||
|
||||
string_data::string_data(size_t size) :
|
||||
buffer_(size) {}
|
||||
|
||||
std::shared_ptr<_winml::idata> string_data::create(size_t size) {
|
||||
return std::make_shared<string_data>(size);
|
||||
}
|
||||
|
||||
size_t string_data::num_elements() {
|
||||
return buffer_.size();
|
||||
}
|
||||
|
||||
size_t string_data::size_in_bytes() {
|
||||
WINML_THROW_HR(E_UNEXPECTED);
|
||||
}
|
||||
|
||||
size_t string_data::num_buffers() {
|
||||
return 1;
|
||||
}
|
||||
|
||||
bool string_data::flush() {
|
||||
// Vacuously true
|
||||
return true;
|
||||
}
|
||||
|
||||
std::vector<wss::IBuffer>& string_data::buffers() {
|
||||
WINML_THROW_HR(E_UNEXPECTED);
|
||||
}
|
||||
|
||||
gsl::span<byte> string_data::buffer(bool /*should_sync_buffer*/) {
|
||||
return gsl::span<byte>(reinterpret_cast<byte*>(buffer_.data()), buffer_.size());
|
||||
}
|
||||
|
||||
void string_data::set(size_t num_elements, const std::string_view* data) {
|
||||
WINML_THROW_HR_IF_FALSE_MSG(
|
||||
E_INVALIDARG,
|
||||
num_elements <= buffer_.size(),
|
||||
"Argument size (%d) exceeds the tensor size (%d).",
|
||||
static_cast<int>(num_elements),
|
||||
static_cast<int>(buffer_.size()));
|
||||
|
||||
// Copy
|
||||
std::copy(data, data + num_elements, buffer_.begin());
|
||||
}
|
||||
|
||||
void string_data::set(size_t /*data_size*/, const byte* /*data*/) {
|
||||
WINML_THROW_HR(E_UNEXPECTED);
|
||||
}
|
||||
|
||||
std::vector<std::string>& string_data::get_backing_vector() {
|
||||
return buffer_;
|
||||
}
|
||||
|
||||
} // namespace _winml
|
||||
29
winml/lib/Api/VectorBackedBuffer.cpp
Normal file
29
winml/lib/Api/VectorBackedBuffer.cpp
Normal file
|
|
@ -0,0 +1,29 @@
|
|||
// Copyright (c) Microsoft Corporation. All rights reserved.
|
||||
// Licensed under the MIT License.
|
||||
#include "pch.h"
|
||||
|
||||
#include "VectorBackedBuffer.h"
|
||||
|
||||
namespace _winml {
|
||||
|
||||
vector_backed_buffer::vector_backed_buffer(size_t size) : buffer_(size) {}
|
||||
|
||||
uint32_t vector_backed_buffer::Capacity() const {
|
||||
return static_cast<uint32_t>(buffer_.size());
|
||||
}
|
||||
|
||||
uint32_t vector_backed_buffer::Length() const {
|
||||
throw winrt::hresult_error(E_NOTIMPL);
|
||||
}
|
||||
|
||||
void vector_backed_buffer::Length(uint32_t /*value*/) {
|
||||
throw winrt::hresult_error(E_NOTIMPL);
|
||||
}
|
||||
|
||||
STDMETHODIMP vector_backed_buffer::Buffer(uint8_t** value) {
|
||||
RETURN_HR_IF_NULL(E_POINTER, value);
|
||||
*value = buffer_.data();
|
||||
return S_OK;
|
||||
}
|
||||
|
||||
} // namespace _winml
|
||||
28
winml/lib/Api/VectorBackedBuffer.h
Normal file
28
winml/lib/Api/VectorBackedBuffer.h
Normal file
|
|
@ -0,0 +1,28 @@
|
|||
// Copyright (c) Microsoft Corporation. All rights reserved.
|
||||
// Licensed under the MIT License.
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "robuffer.h"
|
||||
#include "winrt/Windows.Storage.Streams.h"
|
||||
|
||||
namespace _winml {
|
||||
|
||||
class vector_backed_buffer : public winrt::implements<
|
||||
vector_backed_buffer,
|
||||
wss::IBuffer,
|
||||
Windows::Storage::Streams::IBufferByteAccess> {
|
||||
public:
|
||||
vector_backed_buffer(size_t size);
|
||||
|
||||
uint32_t Capacity() const;
|
||||
uint32_t Length() const;
|
||||
void Length(uint32_t /*value*/);
|
||||
|
||||
STDMETHOD(Buffer)(uint8_t** value);
|
||||
|
||||
private:
|
||||
std::vector<BYTE> buffer_;
|
||||
};
|
||||
|
||||
} // namespace _winml
|
||||
25
winml/lib/Api/impl/IData.h
Normal file
25
winml/lib/Api/impl/IData.h
Normal file
|
|
@ -0,0 +1,25 @@
|
|||
// Copyright (c) Microsoft Corporation. All rights reserved.
|
||||
// Licensed under the MIT License.
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "IEngine.h"
|
||||
|
||||
// ILotusValueProviderPrivate exposes a private Lotus interface to the engine so that it can retrieve tensor
|
||||
// resources stored in winrt structures.
|
||||
|
||||
namespace _winml {
|
||||
|
||||
struct idata {
|
||||
virtual ~idata(){}
|
||||
|
||||
virtual size_t num_elements() = 0;
|
||||
virtual size_t size_in_bytes() = 0;
|
||||
virtual size_t num_buffers() = 0;
|
||||
virtual std::vector<wss::IBuffer>& buffers() = 0;
|
||||
virtual gsl::span<byte> buffer(bool should_sync_buffer) = 0;
|
||||
virtual bool flush() = 0;
|
||||
virtual void set(size_t data_size, const byte* data) = 0;
|
||||
};
|
||||
|
||||
} // namespace _winml
|
||||
46
winml/lib/Api/impl/NumericData.h
Normal file
46
winml/lib/Api/impl/NumericData.h
Normal file
|
|
@ -0,0 +1,46 @@
|
|||
// Copyright (c) Microsoft Corporation. All rights reserved.
|
||||
// Licensed under the MIT License.
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "IData.h"
|
||||
#include "robuffer.h"
|
||||
#include "winrt/Windows.Storage.Streams.h"
|
||||
|
||||
namespace _winml {
|
||||
|
||||
class numeric_data : public _winml::idata {
|
||||
public:
|
||||
static std::shared_ptr<_winml::idata> create(
|
||||
size_t num_elements,
|
||||
size_t element_size_in_bytes,
|
||||
wfc::IIterable<wss::IBuffer> const& buffers);
|
||||
|
||||
// Privte constructor as this type should be created as a shared_ptr
|
||||
numeric_data(size_t num_elements, size_t element_size_in_bytes, wfc::IIterable<wss::IBuffer> const& buffers);
|
||||
gsl::span<byte> buffer_at(size_t index);
|
||||
gsl::span<byte> combined_buffer();
|
||||
|
||||
public:
|
||||
size_t num_elements() override;
|
||||
size_t size_in_bytes() override;
|
||||
size_t num_buffers() override;
|
||||
|
||||
// Buffer accessors
|
||||
std::vector<wss::IBuffer>& buffers() override;
|
||||
gsl::span<byte> buffer(bool should_sync_buffer) override;
|
||||
|
||||
// Flush to buffers API
|
||||
bool flush() override;
|
||||
|
||||
// Set APIs
|
||||
void set(size_t data_size, const byte* data) override;
|
||||
|
||||
private:
|
||||
wss::IBuffer combined_buffer_;
|
||||
std::vector<wss::IBuffer> buffers_;
|
||||
size_t num_elements_;
|
||||
size_t element_size_in_bytes_;
|
||||
};
|
||||
|
||||
} // namespace _winml
|
||||
40
winml/lib/Api/impl/StringData.h
Normal file
40
winml/lib/Api/impl/StringData.h
Normal file
|
|
@ -0,0 +1,40 @@
|
|||
// Copyright (c) Microsoft Corporation. All rights reserved.
|
||||
// Licensed under the MIT License.
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "IData.h"
|
||||
#include "robuffer.h"
|
||||
#include "winrt/Windows.Storage.Streams.h"
|
||||
|
||||
namespace _winml {
|
||||
|
||||
class string_data : public _winml::idata {
|
||||
public:
|
||||
static std::shared_ptr<_winml::idata> create(size_t size);
|
||||
|
||||
string_data(size_t size);
|
||||
|
||||
size_t num_elements() override;
|
||||
size_t size_in_bytes() override;
|
||||
size_t num_buffers() override;
|
||||
|
||||
// Buffer accessors
|
||||
std::vector<wss::IBuffer>& buffers() override;
|
||||
gsl::span<byte> buffer(bool should_sync_buffer) override;
|
||||
|
||||
// Flush to buffers API
|
||||
bool flush() override;
|
||||
|
||||
// Set APIs
|
||||
void set(size_t data_size, const byte* data) override;
|
||||
|
||||
public:
|
||||
void set(size_t num_elements, const std::string_view* data);
|
||||
std::vector<std::string>& get_backing_vector();
|
||||
|
||||
private:
|
||||
std::vector<std::string> buffer_;
|
||||
};
|
||||
|
||||
} // namespace _winml
|
||||
|
|
@ -3,7 +3,8 @@
|
|||
|
||||
#pragma once
|
||||
|
||||
#include "TensorBuffer.h"
|
||||
#include "NumericData.h"
|
||||
#include "StringData.h"
|
||||
|
||||
//
|
||||
// the Tensor class is the actual object for CPU memory buffers.
|
||||
|
|
@ -12,81 +13,83 @@
|
|||
//
|
||||
namespace _winml {
|
||||
|
||||
inline size_t compute_size_of_shape(const std::vector<int64_t>& shape) {
|
||||
auto size_of_shape =
|
||||
static_cast<size_t>(
|
||||
std::accumulate(
|
||||
std::begin(shape),
|
||||
std::end(shape),
|
||||
static_cast<int64_t>(1),
|
||||
std::multiplies<int64_t>()));
|
||||
return size_of_shape;
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
inline auto create_data(
|
||||
const std::vector<int64_t>& shape,
|
||||
const wfc::IIterable<wss::IBuffer>& buffers) {
|
||||
return _winml::numeric_data::create(compute_size_of_shape(shape), sizeof(T), buffers);
|
||||
}
|
||||
|
||||
template <>
|
||||
inline auto create_data<std::string>(
|
||||
const std::vector<int64_t>& shape,
|
||||
const wfc::IIterable<wss::IBuffer>& /*buffers*/) {
|
||||
return _winml::string_data::create(compute_size_of_shape(shape));
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
class Tensor {
|
||||
private:
|
||||
std::shared_ptr<TensorBuffer<T>> buffer_;
|
||||
std::shared_ptr<_winml::idata> data_;
|
||||
std::vector<int64_t> shape_;
|
||||
|
||||
public:
|
||||
private:
|
||||
Tensor() = delete;
|
||||
|
||||
Tensor(
|
||||
std::vector<int64_t> const& shape,
|
||||
wfc::IIterable<wss::IBuffer> const& buffers) :
|
||||
shape_(shape),
|
||||
buffer_(TensorBuffer<T>::Create(
|
||||
static_cast<size_t>(std::accumulate(
|
||||
std::begin(shape), std::end(shape),
|
||||
static_cast<int64_t>(1), std::multiplies<int64_t>())),
|
||||
buffers)) {}
|
||||
public:
|
||||
Tensor(const std::vector<int64_t>& shape) :
|
||||
shape_(shape),
|
||||
data_(create_data<T>(shape, nullptr)) {}
|
||||
|
||||
Tensor(
|
||||
std::vector<int64_t> const& shape) : shape_(shape),
|
||||
buffer_(TensorBuffer<T>::Create(
|
||||
static_cast<size_t>(std::accumulate(
|
||||
std::begin(shape), std::end(shape),
|
||||
static_cast<int64_t>(1),
|
||||
std::multiplies<int64_t>())))) {}
|
||||
|
||||
Tensor(
|
||||
std::vector<int64_t> const&& shape) : shape_(std::move(shape)),
|
||||
buffer_(TensorBuffer<T>::Create(
|
||||
static_cast<size_t>(std::accumulate(
|
||||
std::begin(shape), std::end(shape),
|
||||
static_cast<int64_t>(1),
|
||||
std::multiplies<int64_t>())))) {
|
||||
}
|
||||
|
||||
auto number_of_elements() const {
|
||||
return buffer_->NumElements();
|
||||
}
|
||||
const std::vector<int64_t>& shape,
|
||||
const wfc::IIterable<wss::IBuffer>& buffers) :
|
||||
shape_(shape),
|
||||
data_(create_data<T>(shape, buffers)) {}
|
||||
|
||||
auto size_in_bytes() const {
|
||||
return buffer_->SizeInBytes();
|
||||
return data_->size_in_bytes();
|
||||
}
|
||||
|
||||
auto num_buffers() {
|
||||
return buffer_->NumBuffers();
|
||||
return data_->num_buffers();
|
||||
}
|
||||
|
||||
auto& buffers() {
|
||||
return buffer_->Buffers();
|
||||
return data_->buffers();
|
||||
}
|
||||
|
||||
auto buffer(bool should_sync_buffer = true) {
|
||||
auto span = buffer_->Buffer(should_sync_buffer);
|
||||
return gsl::span<T>(reinterpret_cast<T*>(span.data()), buffer_->NumElements());
|
||||
gsl::span<T> buffer(bool should_sync_buffer = true) {
|
||||
auto span = data_->buffer(should_sync_buffer);
|
||||
return gsl::span<T>(reinterpret_cast<T*>(span.data()), data_->num_elements());
|
||||
}
|
||||
|
||||
auto flush() {
|
||||
return buffer_->Flush();
|
||||
return data_->flush();
|
||||
}
|
||||
|
||||
void set(size_t size, const T* pData) {
|
||||
buffer_->Set(size * sizeof(T), pData);
|
||||
}
|
||||
|
||||
void set(std::vector<T>&& other) {
|
||||
buffer_->Set(other);
|
||||
void set(size_t size, const T* data) {
|
||||
auto size_in_bytes = size * sizeof(T);
|
||||
data_->set(size_in_bytes, reinterpret_cast<const byte*>(data));
|
||||
}
|
||||
|
||||
const std::vector<int64_t>& shape() const {
|
||||
return shape_;
|
||||
}
|
||||
|
||||
auto get_tensor_buffer() {
|
||||
return buffer_;
|
||||
auto get_data() {
|
||||
return data_;
|
||||
}
|
||||
};
|
||||
} // namespace _winml
|
||||
|
|
@ -74,28 +74,28 @@ struct TensorBase : TBase {
|
|||
/// b) TensorBase(winrt::Windows::Foundation::Collections::IIterable<int64_t> const& shape)
|
||||
/// 3) use provided backing gpu memory
|
||||
/// a) TensorBase(std::vector<int64_t> const& shape, ID3D12Resource* pResource)
|
||||
TensorBase() : m_resources(std::make_shared<TensorResources<T>>()) {
|
||||
TensorBase() : resources_(std::make_shared<TensorResources<T>>()) {
|
||||
}
|
||||
|
||||
TensorBase(wfc::IIterable<int64_t> const& shape) : shape_(begin(shape), end(shape)),
|
||||
m_resources(std::make_shared<TensorResources<T>>()) {
|
||||
GetCpuResource() = std::make_shared<_winml::Tensor<T>>(shape_);
|
||||
resources_(std::make_shared<TensorResources<T>>()) {
|
||||
CpuTensor() = std::make_shared<_winml::Tensor<T>>(shape_);
|
||||
}
|
||||
|
||||
TensorBase(std::vector<int64_t> const& shape) : shape_(shape),
|
||||
m_resources(std::make_shared<TensorResources<T>>()) {
|
||||
GetCpuResource() = std::make_shared<_winml::Tensor<T>>(shape_);
|
||||
resources_(std::make_shared<TensorResources<T>>()) {
|
||||
CpuTensor() = std::make_shared<_winml::Tensor<T>>(shape_);
|
||||
}
|
||||
|
||||
TensorBase(std::vector<int64_t> const& shape, ID3D12Resource* resource) : shape_(shape),
|
||||
m_resources(std::make_shared<TensorResources<T>>()) {
|
||||
resources_(std::make_shared<TensorResources<T>>()) {
|
||||
// This Api is not supported for TensorString
|
||||
WINML_THROW_HR_IF_TRUE_MSG(
|
||||
E_ILLEGAL_METHOD_CALL,
|
||||
(std::is_same<T, std::string>::value),
|
||||
"TensorString objects cannot be created from a ID3D12Resource!");
|
||||
|
||||
GetGpuResource().copy_from(resource);
|
||||
GpuTensor().copy_from(resource);
|
||||
}
|
||||
|
||||
HRESULT CreateGPUMLValue(ID3D12Resource* resource, BindingContext& context, IValue** out) {
|
||||
|
|
@ -117,21 +117,21 @@ struct TensorBase : TBase {
|
|||
auto engine = session->GetEngine();
|
||||
auto should_sync_buffer = context.type == _winml::BindingType::kInput;
|
||||
|
||||
if (GetCpuResource() != nullptr) {
|
||||
if (CpuTensor() != nullptr) {
|
||||
return CreateTensorValueFromExternalBuffer(engine, should_sync_buffer, out);
|
||||
}
|
||||
|
||||
// If there is no matching cpu resource, then fallback to a gpu resource
|
||||
if (GetGpuResource() != nullptr) {
|
||||
return CreateGPUMLValue(GetGpuResource().get(), context, out);
|
||||
if (GpuTensor() != nullptr) {
|
||||
return CreateGPUMLValue(GpuTensor().get(), context, out);
|
||||
}
|
||||
|
||||
WINML_THROW_HR(WINML_ERR_INVALID_BINDING);
|
||||
}
|
||||
|
||||
HRESULT GPUTensorize(_winml::BindingContext& context, IValue** out) {
|
||||
if (GetGpuResource() != nullptr) {
|
||||
return CreateGPUMLValue(GetGpuResource().get(), context, out);
|
||||
if (GpuTensor() != nullptr) {
|
||||
return CreateGPUMLValue(GpuTensor().get(), context, out);
|
||||
}
|
||||
|
||||
// Get engine
|
||||
|
|
@ -142,8 +142,8 @@ struct TensorBase : TBase {
|
|||
auto should_sync_buffer = context.type == _winml::BindingType::kInput;
|
||||
|
||||
// If there is no matching gpu resource, then fallback to a cpu resource
|
||||
if (GetCpuResource() != nullptr) {
|
||||
auto num_backing_buffers = GetCpuResource()->num_buffers();
|
||||
if (CpuTensor() != nullptr) {
|
||||
auto num_backing_buffers = CpuTensor()->num_buffers();
|
||||
if (num_backing_buffers == 1) {
|
||||
// If we have a single backing cpu buffer, there is no need to create GPU resources.
|
||||
// The engine will use the buffer provided, and perform the needed copies into the GPU context as needed.
|
||||
|
|
@ -154,24 +154,24 @@ struct TensorBase : TBase {
|
|||
// If we are binding inputs, then a GPU resource needs to be allocated, and individual buffer contents need
|
||||
// to be copied directly into a gpu resource.
|
||||
|
||||
if (GetGpuResource() == nullptr) {
|
||||
GetGpuResource() = CreateD3D12Resource(session);
|
||||
if (GpuTensor() == nullptr) {
|
||||
GpuTensor() = CreateD3D12Resource(session);
|
||||
}
|
||||
|
||||
_winml::ConverterResourceDescription descriptor = {};
|
||||
descriptor.pixel_format = static_cast<DWORD>(wgdx::DirectXPixelFormat::Unknown);
|
||||
descriptor.width = static_cast<int>(GetCpuResource()->size_in_bytes());
|
||||
descriptor.width = static_cast<int>(CpuTensor()->size_in_bytes());
|
||||
descriptor.height = static_cast<int>(1);
|
||||
descriptor.luid = device->GetD3DDevice()->GetAdapterLuid(); // Converted image on GPU
|
||||
|
||||
context.converter = _winml::PoolObjectWrapper::Create(device->TensorizerStore()->Fetch(descriptor));
|
||||
context.converter->Get()->Tensorizer->ConvertBuffersToBatchedGPUTensor(
|
||||
GetCpuResource()->buffers(),
|
||||
GetCpuResource()->size_in_bytes(),
|
||||
CpuTensor()->buffers(),
|
||||
CpuTensor()->size_in_bytes(),
|
||||
*device->GetD3DDeviceCache(),
|
||||
GetGpuResource().get());
|
||||
GpuTensor().get());
|
||||
|
||||
return CreateGPUMLValue(GetGpuResource().get(), context, out);
|
||||
return CreateGPUMLValue(GpuTensor().get(), context, out);
|
||||
|
||||
} else if (context.type == _winml::BindingType::kOutput) {
|
||||
// If we are binding outputs, then the buffers do not need to bound. If the engine produces a output on the gpu
|
||||
|
|
@ -179,8 +179,8 @@ struct TensorBase : TBase {
|
|||
// into the output buffers without temporary intermediary buffers! No binding here is necessary.
|
||||
// If the output produces a cpu buffer (even in the GPU case), we will already have a cpu buffer, and just need
|
||||
// to copy back to the output buffers, no binding is necessary.
|
||||
GetGpuResource() = CreateD3D12Resource(session);
|
||||
return CreateGPUMLValue(GetGpuResource().get(), context, out);
|
||||
GpuTensor() = CreateD3D12Resource(session);
|
||||
return CreateGPUMLValue(GpuTensor().get(), context, out);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -188,11 +188,11 @@ struct TensorBase : TBase {
|
|||
if (TensorKind() == winml::TensorKind::String) {
|
||||
// Lazily allocate the cpu TensorString resource
|
||||
// TensorStrings are CPU only, and so a gpu resource cannot be allocated for them.
|
||||
GetCpuResource() = std::make_shared<_winml::Tensor<T>>(shape_);
|
||||
CpuTensor() = std::make_shared<_winml::Tensor<T>>(shape_);
|
||||
return CreateTensorValueFromExternalBuffer(engine, should_sync_buffer, out);
|
||||
} else {
|
||||
GetGpuResource() = CreateD3D12Resource(session);
|
||||
return CreateGPUMLValue(GetGpuResource().get(), context, out);
|
||||
GpuTensor() = CreateD3D12Resource(session);
|
||||
return CreateGPUMLValue(GpuTensor().get(), context, out);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -242,8 +242,8 @@ struct TensorBase : TBase {
|
|||
void EnsureBufferNotInUse() {
|
||||
auto isBufferInUse =
|
||||
std::any_of(
|
||||
m_outstandingReferences.begin(),
|
||||
m_outstandingReferences.end(),
|
||||
outstanding_references_.begin(),
|
||||
outstanding_references_.end(),
|
||||
[](auto weakRef) { return weakRef.get() != nullptr; });
|
||||
|
||||
WINML_THROW_HR_IF_TRUE_MSG(WINML_ERR_INVALID_BINDING, isBufferInUse, "The tensor has outstanding memory buffer references that must be closed prior to evaluation!");
|
||||
|
|
@ -254,7 +254,7 @@ struct TensorBase : TBase {
|
|||
(_winml::BindingContext& context, IValue** out) {
|
||||
RETURN_HR_IF_NULL_MSG(
|
||||
WINML_ERR_INVALID_BINDING,
|
||||
m_resources,
|
||||
resources_,
|
||||
"The tensor has been closed and its resources have been detached!");
|
||||
|
||||
EnsureBufferNotInUse();
|
||||
|
|
@ -289,7 +289,7 @@ struct TensorBase : TBase {
|
|||
// the conditions of ASSERT_TEMPLATE_PARAMETERS_EXACT() are met.
|
||||
ASSERT_TEMPLATE_PARAMETERS<ElementType, ElementViewType>();
|
||||
|
||||
GetCpuResource()->set(size, reinterpret_cast<ElementType*>(data));
|
||||
CpuTensor()->set(size, reinterpret_cast<ElementType*>(data));
|
||||
}
|
||||
|
||||
template <>
|
||||
|
|
@ -297,7 +297,8 @@ struct TensorBase : TBase {
|
|||
// Ensure that this call is being called with the correct template parameters
|
||||
ASSERT_TEMPLATE_PARAMETERS<std::string, winrt::hstring>();
|
||||
|
||||
GetCpuResource()->get_tensor_buffer()->Set(size, reinterpret_cast<std::string_view*>(data));
|
||||
auto string_data = std::static_pointer_cast<_winml::string_data>(CpuTensor()->get_data());
|
||||
string_data->set(size, reinterpret_cast<std::string_view*>(data));
|
||||
}
|
||||
|
||||
template <typename ElementType = T, typename ElementViewType = ViewT>
|
||||
|
|
@ -307,8 +308,8 @@ struct TensorBase : TBase {
|
|||
ASSERT_TEMPLATE_PARAMETERS<ElementType, ElementViewType>();
|
||||
|
||||
RETURN_IF_FAILED_MSG(engine->CreateTensorValueFromExternalBuffer(
|
||||
GetCpuResource()->buffer(sync_buffer).data(), GetCpuResource()->size_in_bytes(), GetCpuResource()->shape().data(),
|
||||
GetCpuResource()->shape().size(), TensorKind(), value),
|
||||
CpuTensor()->buffer(sync_buffer).data(), CpuTensor()->size_in_bytes(), CpuTensor()->shape().data(),
|
||||
CpuTensor()->shape().size(), TensorKind(), value),
|
||||
"Failed to prepare buffer for copy back from device resource.");
|
||||
return S_OK;
|
||||
}
|
||||
|
|
@ -318,17 +319,19 @@ struct TensorBase : TBase {
|
|||
// Ensure that this call is being called with the correct template parameters
|
||||
ASSERT_TEMPLATE_PARAMETERS<std::string, winrt::hstring>();
|
||||
|
||||
auto string_data = std::static_pointer_cast<_winml::string_data>(CpuTensor()->get_data());
|
||||
auto& string_vector = string_data->get_backing_vector();
|
||||
|
||||
std::vector<const char*> raw_values;
|
||||
auto string_array = static_cast<std::string*>(GetCpuResource()->buffer().data());
|
||||
std::transform(
|
||||
string_array,
|
||||
string_array + GetCpuResource()->number_of_elements(),
|
||||
std::begin(string_vector),
|
||||
std::end(string_vector),
|
||||
std::back_inserter(raw_values),
|
||||
[&](auto& str) { return str.c_str(); });
|
||||
|
||||
RETURN_IF_FAILED_MSG(engine->CreateStringTensorValueFromDataWithCopy(
|
||||
raw_values.data(), raw_values.size(), GetCpuResource()->shape().data(),
|
||||
GetCpuResource()->shape().size(), value),
|
||||
raw_values.data(), raw_values.size(), CpuTensor()->shape().data(),
|
||||
CpuTensor()->shape().size(), value),
|
||||
"Failed to prepare buffer for copy back from device resource.");
|
||||
return S_OK;
|
||||
}
|
||||
|
|
@ -338,7 +341,7 @@ struct TensorBase : TBase {
|
|||
(BindingContext& context, IValue* value) {
|
||||
RETURN_HR_IF_NULL_MSG(
|
||||
E_ILLEGAL_METHOD_CALL,
|
||||
m_resources,
|
||||
resources_,
|
||||
"The tensor has been closed and its resources have been detached during evaluation!");
|
||||
|
||||
_winml::Resource updated_resource;
|
||||
|
|
@ -348,14 +351,14 @@ struct TensorBase : TBase {
|
|||
RETURN_IF_FAILED_MSG(value->GetTensorShape(shape_), "Failed to get the tensor shape from resource!");
|
||||
|
||||
// make sure we always have a CPU resource
|
||||
if (GetCpuResource() == nullptr) {
|
||||
GetCpuResource() = std::make_shared<_winml::Tensor<T>>(shape_);
|
||||
if (CpuTensor() == nullptr) {
|
||||
CpuTensor() = std::make_shared<_winml::Tensor<T>>(shape_);
|
||||
}
|
||||
|
||||
bool is_cpu;
|
||||
if (SUCCEEDED(value->IsCpu(&is_cpu)) && is_cpu) {
|
||||
// Get the data pointer and size
|
||||
auto buffer = GetCpuResource()->buffer(false);
|
||||
auto buffer = CpuTensor()->buffer(false);
|
||||
|
||||
if (updated_resource.get() != reinterpret_cast<void*>(buffer.data())) {
|
||||
// Only copy the data if the source and destination are not the same!
|
||||
|
|
@ -366,7 +369,7 @@ struct TensorBase : TBase {
|
|||
} else {
|
||||
// If the engine wrote to the data directly, it is possible that the underlying data was held by many buffers
|
||||
// In that case the underlying buffers will not match the engine output, and they need to be flushed.
|
||||
GetCpuResource()->flush();
|
||||
CpuTensor()->flush();
|
||||
}
|
||||
} else {
|
||||
// If we got a gpu resource, we should move the data to the cpu so accessors can retrieve the data.
|
||||
|
|
@ -377,7 +380,7 @@ struct TensorBase : TBase {
|
|||
auto device = session->Device().as<winmlp::LearningModelDevice>();
|
||||
auto engine = session->GetEngine();
|
||||
|
||||
if (GetCpuResource()->num_buffers() == 1) {
|
||||
if (CpuTensor()->num_buffers() == 1) {
|
||||
winrt::com_ptr<IValue> dest;
|
||||
RETURN_IF_FAILED_MSG(CreateTensorValueFromExternalBuffer(engine, false, dest.put()),
|
||||
"Failed to prepare buffer for copy back from device resource.");
|
||||
|
|
@ -395,7 +398,7 @@ struct TensorBase : TBase {
|
|||
d3dResource,
|
||||
buffer_size_in_bytes,
|
||||
*device->GetD3DDeviceCache(),
|
||||
GetCpuResource()->buffers());
|
||||
CpuTensor()->buffers());
|
||||
|
||||
// Reset the Allocator before return to the Cache. Must Sync this background thread to that completion before we do.
|
||||
device->GetD3DDeviceCache()->SyncD3D12ToCPU();
|
||||
|
|
@ -615,7 +618,7 @@ struct TensorBase : TBase {
|
|||
// Ensure that CreateReference is only called when there is 1 buffer.
|
||||
WINML_THROW_HR_IF_TRUE_MSG(
|
||||
E_ILLEGAL_METHOD_CALL,
|
||||
GetCpuResource() != nullptr && GetCpuResource()->num_buffers() != 1, "A single buffer reference cannot be retrieved when the tensor is backed by multiple buffers!");
|
||||
CpuTensor() != nullptr && CpuTensor()->num_buffers() != 1, "A single buffer reference cannot be retrieved when the tensor is backed by multiple buffers!");
|
||||
|
||||
// Create a TensorMemoryBufferReference<T>
|
||||
|
||||
|
|
@ -624,11 +627,11 @@ struct TensorBase : TBase {
|
|||
// "has been closed. In that case, the returned IMemoryBufferReference is already closed."
|
||||
// Creating a TensorMemoryBufferReference<T> with a null pointer is equivalent to creating it as closed.
|
||||
|
||||
auto memoryBufferReference = winrt::make<TensorMemoryBufferReference<T>>(shape_, m_resources);
|
||||
auto memoryBufferReference = winrt::make<TensorMemoryBufferReference<T>>(shape_, resources_);
|
||||
|
||||
// Create and cache a weak reference to the TensorMemoryBufferReference<T>
|
||||
winrt::weak_ref<TensorMemoryBufferReference<T>> weak(memoryBufferReference.as<TensorMemoryBufferReference<T>>());
|
||||
m_outstandingReferences.push_back(weak);
|
||||
outstanding_references_.push_back(weak);
|
||||
|
||||
// Return the strong ref to the caller
|
||||
return memoryBufferReference;
|
||||
|
|
@ -638,7 +641,7 @@ struct TensorBase : TBase {
|
|||
// IMemoryBuffer::Close
|
||||
void Close() try {
|
||||
// Let go of the lifetime of the resources, this is will indicate that the memorybuffer is closed
|
||||
m_resources = nullptr;
|
||||
resources_ = nullptr;
|
||||
}
|
||||
WINML_CATCH_ALL
|
||||
|
||||
|
|
@ -653,10 +656,10 @@ struct TensorBase : TBase {
|
|||
|
||||
RETURN_HR_IF_NULL_MSG(
|
||||
E_ILLEGAL_METHOD_CALL,
|
||||
m_resources,
|
||||
resources_,
|
||||
"The tensor has been closed and its resources have been detached!");
|
||||
|
||||
return m_resources->GetBuffer(shape_, value, capacity);
|
||||
return resources_->GetBuffer(shape_, value, capacity);
|
||||
}
|
||||
|
||||
// ITensorNative::GetD3D12Resource
|
||||
|
|
@ -667,10 +670,10 @@ struct TensorBase : TBase {
|
|||
RETURN_HR_IF(ERROR_INVALID_FUNCTION, (std::is_same<T, std::string>::value));
|
||||
RETURN_HR_IF_NULL_MSG(
|
||||
E_ILLEGAL_METHOD_CALL,
|
||||
m_resources,
|
||||
resources_,
|
||||
"The tensor has been closed and its resources have been detached!");
|
||||
|
||||
GetGpuResource().copy_to(ppResource);
|
||||
GpuTensor().copy_to(ppResource);
|
||||
return S_OK;
|
||||
}
|
||||
WINML_CATCH_ALL_COM
|
||||
|
|
@ -689,12 +692,11 @@ struct TensorBase : TBase {
|
|||
// owned IVectorView object.
|
||||
|
||||
// Get the raw buffer pointer from the native tensor implementation.
|
||||
auto number_of_elements = GetCpuResource()->number_of_elements();
|
||||
auto buffer = GetCpuResource()->buffer();
|
||||
auto buffer = CpuTensor()->buffer();
|
||||
auto element_data = static_cast<ElementType*>(buffer.data());
|
||||
|
||||
|
||||
// Copy data that will be passed back to caller.
|
||||
auto copy = std::vector<ElementType>(element_data, element_data + number_of_elements);
|
||||
auto copy = std::vector<ElementType>(element_data, element_data + buffer.size());
|
||||
|
||||
// Create IVectorView from copied data.
|
||||
return winrt::single_threaded_vector<ElementViewType>(std::move(copy)).GetView();
|
||||
|
|
@ -707,18 +709,17 @@ struct TensorBase : TBase {
|
|||
// Ensure that this call is being called with the correct template parameters
|
||||
ASSERT_TEMPLATE_PARAMETERS<_winml::Half, float>();
|
||||
|
||||
auto number_of_elements = GetCpuResource()->number_of_elements();
|
||||
auto buffer = GetCpuResource()->buffer();
|
||||
auto buffer = CpuTensor()->buffer();
|
||||
auto element_data = static_cast<_winml::Half*>(buffer.data());
|
||||
|
||||
// Copy the HALFs to floats
|
||||
std::vector<float> float_value(number_of_elements);
|
||||
std::vector<float> float_value(buffer.size());
|
||||
DirectX::PackedVector::XMConvertHalfToFloatStream(
|
||||
float_value.data(),
|
||||
sizeof(float) /* output stride */,
|
||||
reinterpret_cast<DirectX::PackedVector::HALF*>(element_data),
|
||||
sizeof(_winml::Half) /* input stride */,
|
||||
number_of_elements);
|
||||
buffer.size());
|
||||
|
||||
// Create IVectorView from copied data.
|
||||
return winrt::single_threaded_vector<float>(std::move(float_value)).GetView();
|
||||
|
|
@ -731,16 +732,15 @@ struct TensorBase : TBase {
|
|||
// Ensure that this call is being called with the correct template parameters
|
||||
ASSERT_TEMPLATE_PARAMETERS<std::string, winrt::hstring>();
|
||||
|
||||
auto number_of_elements = GetCpuResource()->number_of_elements();
|
||||
auto buffer = GetCpuResource()->buffer();
|
||||
auto element_data = static_cast<std::string*>(buffer.data());
|
||||
auto string_data = std::static_pointer_cast<_winml::string_data>(CpuTensor()->get_data());
|
||||
auto& string_vector = string_data->get_backing_vector();
|
||||
|
||||
auto copy = std::vector<winrt::hstring>(number_of_elements, L"");
|
||||
auto copy = std::vector<winrt::hstring>(string_vector.size(), L"");
|
||||
std::generate(
|
||||
copy.begin(),
|
||||
copy.end(),
|
||||
[n = 0, &element_data]() mutable {
|
||||
return _winml::Strings::HStringFromUTF8(element_data[n++]);
|
||||
[n = 0, &string_vector]() mutable {
|
||||
return _winml::Strings::HStringFromUTF8(string_vector[n++]);
|
||||
});
|
||||
|
||||
return winrt::single_threaded_vector<winrt::hstring>(std::move(copy)).GetView();
|
||||
|
|
@ -752,14 +752,13 @@ struct TensorBase : TBase {
|
|||
wfc::IVectorView<uint8_t> GetAsVectorView<int8_t, uint8_t>() try {
|
||||
ASSERT_TEMPLATE_PARAMETERS<int8_t, uint8_t>();
|
||||
|
||||
auto number_of_elements = GetCpuResource()->number_of_elements();
|
||||
auto buffer = GetCpuResource()->buffer();
|
||||
auto buffer = CpuTensor()->buffer();
|
||||
auto element_data = static_cast<int8_t*>(buffer.data());
|
||||
|
||||
// Copy data that will be passed back to caller.
|
||||
|
||||
gsl::span<uint8_t> span(reinterpret_cast<uint8_t*>(element_data), number_of_elements);
|
||||
std::vector<uint8_t> copy(span.begin(), span.begin() + number_of_elements);
|
||||
gsl::span<uint8_t> span(reinterpret_cast<uint8_t*>(element_data), buffer.size());
|
||||
std::vector<uint8_t> copy(span.begin(), span.begin() + buffer.size());
|
||||
|
||||
// Create IVectorView from copied data.
|
||||
return winrt::single_threaded_vector<uint8_t>(std::move(copy)).GetView();
|
||||
|
|
@ -809,10 +808,10 @@ struct TensorBase : TBase {
|
|||
RETURN_HR_IF_NULL(E_POINTER, pIsPlaceHolder);
|
||||
RETURN_HR_IF_NULL_MSG(
|
||||
E_ILLEGAL_METHOD_CALL,
|
||||
m_resources,
|
||||
resources_,
|
||||
"The tensor has been closed and its resources have been detached!");
|
||||
|
||||
*pIsPlaceHolder = GetCpuResource() == nullptr && GetGpuResource() == nullptr;
|
||||
*pIsPlaceHolder = CpuTensor() == nullptr && GpuTensor() == nullptr;
|
||||
return S_OK;
|
||||
}
|
||||
|
||||
|
|
@ -827,7 +826,7 @@ struct TensorBase : TBase {
|
|||
ASSERT_TEMPLATE_PARAMETERS_EXACT<ElementType, ElementViewType>();
|
||||
|
||||
shape_ = shape;
|
||||
GetCpuResource() = std::make_shared<_winml::Tensor<T>>(shape, buffers);
|
||||
CpuTensor() = std::make_shared<_winml::Tensor<T>>(shape, buffers);
|
||||
}
|
||||
|
||||
template <>
|
||||
|
|
@ -837,7 +836,7 @@ struct TensorBase : TBase {
|
|||
ASSERT_TEMPLATE_PARAMETERS<_winml::Half, float>();
|
||||
|
||||
shape_ = shape;
|
||||
GetCpuResource() = std::make_shared<_winml::Tensor<T>>(shape, buffers);
|
||||
CpuTensor() = std::make_shared<_winml::Tensor<T>>(shape, buffers);
|
||||
}
|
||||
|
||||
template <>
|
||||
|
|
@ -847,7 +846,7 @@ struct TensorBase : TBase {
|
|||
ASSERT_TEMPLATE_PARAMETERS<int8_t, uint8_t>();
|
||||
|
||||
shape_ = shape;
|
||||
GetCpuResource() = std::make_shared<_winml::Tensor<T>>(shape, buffers);
|
||||
CpuTensor() = std::make_shared<_winml::Tensor<T>>(shape, buffers);
|
||||
}
|
||||
|
||||
// Specialized version to convert hstring to string
|
||||
|
|
@ -875,12 +874,12 @@ struct TensorBase : TBase {
|
|||
// Ensure that the Set APIs are only called when there is 1 buffer.
|
||||
// These APIs are only called when the tensor is being constructed from various collection and pointer public APIs.
|
||||
// They should always be backed by a single underlying buffer.
|
||||
FAIL_FAST_HR_IF(E_ILLEGAL_METHOD_CALL, GetCpuResource()->num_buffers() != 1);
|
||||
FAIL_FAST_HR_IF(E_ILLEGAL_METHOD_CALL, CpuTensor()->num_buffers() != 1);
|
||||
|
||||
// This method accepts data as an array, T[], from the caller.
|
||||
// This is a non-destructive API, so the caller data is
|
||||
// left untouched, and the data is copied into internal buffers.
|
||||
GetCpuResource()->set(data.size(), data.data());
|
||||
CpuTensor()->set(data.size(), data.data());
|
||||
}
|
||||
|
||||
// Specialized version to convert floats to float16
|
||||
|
|
@ -892,13 +891,12 @@ struct TensorBase : TBase {
|
|||
// Ensure that the Set APIs are only called when there is 1 buffer.
|
||||
// These APIs are only called when the tensor is being constructed from various collection and pointer public APIs.
|
||||
// They should always be backed by a single underlying buffer.
|
||||
FAIL_FAST_HR_IF(E_ILLEGAL_METHOD_CALL, GetCpuResource()->num_buffers() != 1);
|
||||
FAIL_FAST_HR_IF(E_ILLEGAL_METHOD_CALL, CpuTensor()->num_buffers() != 1);
|
||||
|
||||
auto number_of_elements = GetCpuResource()->number_of_elements();
|
||||
auto buffer = GetCpuResource()->buffer();
|
||||
auto buffer = CpuTensor()->buffer();
|
||||
auto element_data = static_cast<_winml::Half*>(buffer.data());
|
||||
|
||||
THROW_HR_IF(E_UNEXPECTED, data.size() != number_of_elements);
|
||||
THROW_HR_IF(E_UNEXPECTED, data.size() != buffer.size());
|
||||
DirectX::PackedVector::XMConvertFloatToHalfStream(
|
||||
reinterpret_cast<DirectX::PackedVector::HALF*>(element_data),
|
||||
sizeof(_winml::Half) /* output stride */,
|
||||
|
|
@ -916,12 +914,12 @@ struct TensorBase : TBase {
|
|||
// Ensure that the Set APIs are only called when there is 1 buffer.
|
||||
// These APIs are only called when the tensor is being constructed from various collection and pointer public APIs.
|
||||
// They should always be backed by a single underlying buffer.
|
||||
FAIL_FAST_HR_IF(E_ILLEGAL_METHOD_CALL, GetCpuResource()->num_buffers() != 1);
|
||||
FAIL_FAST_HR_IF(E_ILLEGAL_METHOD_CALL, CpuTensor()->num_buffers() != 1);
|
||||
|
||||
auto size = data.size();
|
||||
auto pData = data.data();
|
||||
|
||||
GetCpuResource()->set(size, reinterpret_cast<int8_t*>(const_cast<uint8_t*>(pData)));
|
||||
CpuTensor()->set(size, reinterpret_cast<int8_t*>(const_cast<uint8_t*>(pData)));
|
||||
}
|
||||
|
||||
// Specialized version to convert hstring to string
|
||||
|
|
@ -933,17 +931,16 @@ struct TensorBase : TBase {
|
|||
// Ensure that the Set APIs are only called when there is 1 buffer.
|
||||
// These APIs are only called when the tensor is being constructed from various collection and pointer public APIs.
|
||||
// They should always be backed by a single underlying buffer.
|
||||
FAIL_FAST_HR_IF(E_ILLEGAL_METHOD_CALL, GetCpuResource()->num_buffers() != 1);
|
||||
FAIL_FAST_HR_IF(E_ILLEGAL_METHOD_CALL, CpuTensor()->num_buffers() != 1);
|
||||
|
||||
auto number_of_elements = GetCpuResource()->number_of_elements();
|
||||
auto buffer = GetCpuResource()->buffer();
|
||||
THROW_HR_IF(E_UNEXPECTED, data.size() > number_of_elements);
|
||||
auto string_data = std::static_pointer_cast<_winml::string_data>(CpuTensor()->get_data());
|
||||
auto& string_vector = string_data->get_backing_vector();
|
||||
|
||||
auto element_data = static_cast<std::string*>(buffer.data());
|
||||
THROW_HR_IF(E_UNEXPECTED, data.size() > string_vector.size());
|
||||
|
||||
// Convert and copy into the underlying buffer
|
||||
std::transform(
|
||||
data.begin(), data.end(), element_data,
|
||||
data.begin(), data.end(), std::begin(string_vector),
|
||||
[](auto& element) mutable {
|
||||
return _winml::Strings::UTF8FromHString(element);
|
||||
});
|
||||
|
|
@ -962,9 +959,9 @@ struct TensorBase : TBase {
|
|||
// Ensure that the Set APIs are only called when there is 1 buffer.
|
||||
// These APIs are only called when the tensor is being constructed from various collection and pointer public APIs.
|
||||
// They should always be backed by a single underlying buffer.
|
||||
FAIL_FAST_HR_IF(E_ILLEGAL_METHOD_CALL, GetCpuResource()->num_buffers() != 1);
|
||||
FAIL_FAST_HR_IF(E_ILLEGAL_METHOD_CALL, CpuTensor()->num_buffers() != 1);
|
||||
|
||||
auto buffer = GetCpuResource()->buffer();
|
||||
auto buffer = CpuTensor()->buffer();
|
||||
auto element_data = static_cast<ElementType*>(buffer.data());
|
||||
|
||||
// This method accepts data as an IVectorView<T>.
|
||||
|
|
@ -983,9 +980,9 @@ struct TensorBase : TBase {
|
|||
// Ensure that the Set APIs are only called when there is 1 buffer.
|
||||
// These APIs are only called when the tensor is being constructed from various collection and pointer public APIs.
|
||||
// They should always be backed by a single underlying buffer.
|
||||
FAIL_FAST_HR_IF(E_ILLEGAL_METHOD_CALL, GetCpuResource()->num_buffers() != 1);
|
||||
FAIL_FAST_HR_IF(E_ILLEGAL_METHOD_CALL, CpuTensor()->num_buffers() != 1);
|
||||
|
||||
auto buffer = GetCpuResource()->buffer();
|
||||
auto buffer = CpuTensor()->buffer();
|
||||
auto element_data = static_cast<_winml::Half*>(buffer.data());
|
||||
|
||||
// Now that we take in IIterables and not vector views
|
||||
|
|
@ -1009,9 +1006,9 @@ struct TensorBase : TBase {
|
|||
// Ensure that the Set APIs are only called when there is 1 buffer.
|
||||
// These APIs are only called when the tensor is being constructed from various collection and pointer public APIs.
|
||||
// They should always be backed by a single underlying buffer.
|
||||
FAIL_FAST_HR_IF(E_ILLEGAL_METHOD_CALL, GetCpuResource()->num_buffers() != 1);
|
||||
FAIL_FAST_HR_IF(E_ILLEGAL_METHOD_CALL, CpuTensor()->num_buffers() != 1);
|
||||
|
||||
auto buffer = GetCpuResource()->buffer();
|
||||
auto buffer = CpuTensor()->buffer();
|
||||
auto element_data = static_cast<int8_t*>(buffer.data());
|
||||
std::transform(begin(data), end(data), element_data, [](auto element) { return static_cast<int8_t>(element); });
|
||||
}
|
||||
|
|
@ -1026,39 +1023,39 @@ struct TensorBase : TBase {
|
|||
// Ensure that the Set APIs are only called when there is 1 buffer.
|
||||
// These APIs are only called when the tensor is being constructed from various collection and pointer public APIs.
|
||||
// They should always be backed by a single underlying buffer.
|
||||
FAIL_FAST_HR_IF(E_ILLEGAL_METHOD_CALL, GetCpuResource()->num_buffers() != 1);
|
||||
FAIL_FAST_HR_IF(E_ILLEGAL_METHOD_CALL, CpuTensor()->num_buffers() != 1);
|
||||
|
||||
auto buffer = GetCpuResource()->buffer();
|
||||
auto element_data = static_cast<std::string*>(buffer.data());
|
||||
auto string_data = std::static_pointer_cast<_winml::string_data>(CpuTensor()->get_data());
|
||||
auto& string_vector = string_data->get_backing_vector();
|
||||
|
||||
// Convert and copy into the underlying buffer
|
||||
std::transform(begin(data), end(data), element_data, [](const auto& element) {
|
||||
std::transform(begin(data), end(data), std::begin(string_vector), [](const auto& element) {
|
||||
return _winml::Strings::UTF8FromHString(element);
|
||||
});
|
||||
}
|
||||
|
||||
std::shared_ptr<_winml::Tensor<T>>& GetCpuResource() {
|
||||
std::shared_ptr<_winml::Tensor<T>>& CpuTensor() {
|
||||
WINML_THROW_HR_IF_NULL_MSG(
|
||||
E_ILLEGAL_METHOD_CALL,
|
||||
m_resources,
|
||||
resources_,
|
||||
"The tensor has been closed and its resources are detached!");
|
||||
|
||||
return m_resources->CpuResource;
|
||||
return resources_->cpu_resource_;
|
||||
}
|
||||
|
||||
winrt::com_ptr<ID3D12Resource>& GetGpuResource() {
|
||||
winrt::com_ptr<ID3D12Resource>& GpuTensor() {
|
||||
WINML_THROW_HR_IF_NULL_MSG(
|
||||
E_ILLEGAL_METHOD_CALL,
|
||||
m_resources,
|
||||
resources_,
|
||||
"The tensor has been closed and its resources are detached!");
|
||||
|
||||
return m_resources->GpuResource;
|
||||
return resources_->gpu_resource_;
|
||||
}
|
||||
|
||||
private:
|
||||
std::vector<int64_t> shape_;
|
||||
std::shared_ptr<TensorResources<T>> m_resources;
|
||||
std::vector<winrt::weak_ref<TensorMemoryBufferReference<T>>> m_outstandingReferences;
|
||||
std::shared_ptr<TensorResources<T>> resources_;
|
||||
std::vector<winrt::weak_ref<TensorMemoryBufferReference<T>>> outstanding_references_;
|
||||
bool m_isClosed = false;
|
||||
};
|
||||
|
||||
|
|
|
|||
|
|
@ -1,234 +0,0 @@
|
|||
// Copyright (c) Microsoft Corporation. All rights reserved.
|
||||
// Licensed under the MIT License.
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "robuffer.h"
|
||||
#include "winrt/Windows.Storage.Streams.h"
|
||||
#include "DisjointBufferHelpers.h"
|
||||
|
||||
namespace _winml {
|
||||
|
||||
class VectorBuffer : public winrt::implements<
|
||||
VectorBuffer,
|
||||
wss::IBuffer,
|
||||
Windows::Storage::Streams::IBufferByteAccess> {
|
||||
public:
|
||||
VectorBuffer(size_t size) : buffer_(size) {}
|
||||
|
||||
uint32_t Capacity() const {
|
||||
return static_cast<uint32_t>(buffer_.size());
|
||||
}
|
||||
|
||||
uint32_t Length() const {
|
||||
throw winrt::hresult_error(E_NOTIMPL);
|
||||
}
|
||||
|
||||
void Length(uint32_t /*value*/) {
|
||||
throw winrt::hresult_error(E_NOTIMPL);
|
||||
}
|
||||
|
||||
STDMETHOD(Buffer)
|
||||
(uint8_t** value) {
|
||||
RETURN_HR_IF_NULL(E_POINTER, value);
|
||||
*value = buffer_.data();
|
||||
return S_OK;
|
||||
}
|
||||
|
||||
private:
|
||||
std::vector<BYTE> buffer_;
|
||||
};
|
||||
|
||||
template <typename T>
|
||||
class TensorBuffer {
|
||||
wss::IBuffer combined_buffer_;
|
||||
std::vector<wss::IBuffer> buffers_;
|
||||
size_t size_;
|
||||
|
||||
TensorBuffer(size_t size) :
|
||||
size_(size),
|
||||
combined_buffer_(winrt::make<VectorBuffer>(size * sizeof(T))),
|
||||
buffers_ { combined_buffer_ } {
|
||||
auto buffer = BufferAt(0);
|
||||
|
||||
// The initial release of WinML (RS5) shipped with behavior that would
|
||||
// zero-initialize uninitialized tensors. After measuring, the performance impact
|
||||
// of memsetting the memory buffer is quite small (<1ms for 3channel 720x720 TensorFloats).
|
||||
// To maintain parity with RS5 behavior, we always zero out the memory buffer.
|
||||
memset(buffer.data(), 0, buffer.size_bytes());
|
||||
}
|
||||
|
||||
TensorBuffer(
|
||||
size_t size,
|
||||
wfc::IIterable<wss::IBuffer> const& buffers) : size_(size),
|
||||
combined_buffer_(nullptr),
|
||||
buffers_(begin(buffers), end(buffers)) {
|
||||
if (buffers_.size() == 1) {
|
||||
combined_buffer_ = buffers_[0];
|
||||
} else {
|
||||
// If there are many buffers, then the combined buffer will be a separately allocated value that combines all of the buffers.
|
||||
// This needs to be lazily done however, as the extra memory should not be allocated when not needed (GPU).
|
||||
}
|
||||
}
|
||||
|
||||
auto CombinedBuffer() {
|
||||
if (combined_buffer_ == nullptr) {
|
||||
combined_buffer_ = winrt::make<VectorBuffer>(size_ * sizeof(T));
|
||||
}
|
||||
return BufferFrom(combined_buffer_);
|
||||
}
|
||||
|
||||
public:
|
||||
static auto Create(size_t size) {
|
||||
return std::shared_ptr<TensorBuffer>(new TensorBuffer(size));
|
||||
}
|
||||
|
||||
static auto Create(
|
||||
size_t size,
|
||||
wss::IBuffer buffer) {
|
||||
return std::shared_ptr<TensorBuffer>(new TensorBuffer(size, buffer));
|
||||
}
|
||||
|
||||
static auto Create(
|
||||
size_t size,
|
||||
wfc::IIterable<wss::IBuffer> const& buffers) {
|
||||
return std::shared_ptr<TensorBuffer>(new TensorBuffer(size, buffers));
|
||||
}
|
||||
|
||||
auto NumElements() {
|
||||
return size_;
|
||||
}
|
||||
|
||||
auto SizeInBytes() {
|
||||
return size_ * sizeof(T);
|
||||
}
|
||||
|
||||
auto NumBuffers() {
|
||||
return buffers_.size();
|
||||
}
|
||||
|
||||
auto& Buffers() {
|
||||
return buffers_;
|
||||
}
|
||||
|
||||
auto Buffer(bool should_sync_buffer) {
|
||||
if (buffers_.size() == 1) {
|
||||
// Single buffer optimization to not create a temporary buffer that concatenates disjoint buffers into one.
|
||||
return BufferAt(0);
|
||||
}
|
||||
auto span = CombinedBuffer();
|
||||
if (should_sync_buffer) {
|
||||
_winml::LoadOrStoreDisjointBuffers(
|
||||
true /*load buffer*/,
|
||||
buffers_.size(),
|
||||
[this](size_t i) { return BufferAt(i); },
|
||||
span);
|
||||
}
|
||||
|
||||
return span;
|
||||
}
|
||||
|
||||
auto Flush() {
|
||||
auto should_flush = buffers_.size() != 1;
|
||||
if (should_flush) {
|
||||
auto span = CombinedBuffer();
|
||||
_winml::LoadOrStoreDisjointBuffers(
|
||||
false /*store buffer*/,
|
||||
buffers_.size(),
|
||||
[this](size_t i) { return BufferAt(i); },
|
||||
span);
|
||||
}
|
||||
return should_flush;
|
||||
}
|
||||
|
||||
auto Set(size_t size_in_bytes, const T* data) {
|
||||
WINML_THROW_HR_IF_FALSE_MSG(
|
||||
E_INVALIDARG,
|
||||
size_in_bytes <= (size_ * sizeof(T)),
|
||||
"Argument size (%llu) exceeds the tensor size (%llu).",
|
||||
static_cast<uint64_t>(size_in_bytes),
|
||||
static_cast<uint64_t>(size_ * sizeof(T)));
|
||||
|
||||
gsl::span<byte> span(reinterpret_cast<byte*>(const_cast<T*>(data)), size_in_bytes);
|
||||
_winml::LoadOrStoreDisjointBuffers(
|
||||
false /*store buffer*/,
|
||||
buffers_.size(),
|
||||
[this](size_t i) { return BufferAt(i); },
|
||||
span);
|
||||
}
|
||||
|
||||
auto Set(std::vector<T>&& moveableData) {
|
||||
Set(moveableData.size() * sizeof(T), moveableData.data());
|
||||
}
|
||||
|
||||
private:
|
||||
auto BufferFrom(wss::IBuffer buffer) {
|
||||
byte* current_data = nullptr;
|
||||
auto bufferByteAccess = buffer.as<Windows::Storage::Streams::IBufferByteAccess>();
|
||||
bufferByteAccess->Buffer(¤t_data);
|
||||
return gsl::span<byte>(
|
||||
current_data,
|
||||
static_cast<size_t>(buffer.Capacity()));
|
||||
}
|
||||
|
||||
auto BufferAt(size_t index) {
|
||||
return BufferFrom(buffers_[index]);
|
||||
}
|
||||
};
|
||||
|
||||
template <>
|
||||
class TensorBuffer<std::string> {
|
||||
std::vector<std::string> buffer_;
|
||||
|
||||
TensorBuffer(size_t size) : buffer_(size) {}
|
||||
|
||||
public:
|
||||
static auto Create(size_t size) {
|
||||
return std::shared_ptr<TensorBuffer>(new TensorBuffer(size));
|
||||
}
|
||||
|
||||
auto NumElements() {
|
||||
return buffer_.size();
|
||||
}
|
||||
|
||||
auto SizeInBytes() {
|
||||
return buffer_.size();
|
||||
}
|
||||
|
||||
auto NumBuffers() {
|
||||
return 1;
|
||||
}
|
||||
|
||||
auto Flush() {
|
||||
return false;
|
||||
}
|
||||
|
||||
auto Buffers() -> std::vector<wss::IBuffer>& {
|
||||
WINML_THROW_HR(E_UNEXPECTED);
|
||||
}
|
||||
|
||||
auto BufferAt(size_t index) {
|
||||
WINML_THROW_HR_IF_FALSE_MSG(
|
||||
E_INVALIDARG,
|
||||
index == 0,
|
||||
"TensorString can only be backed by a single buffer!");
|
||||
return gsl::span<byte>(reinterpret_cast<byte*>(buffer_.data()), buffer_.size());
|
||||
}
|
||||
|
||||
auto Buffer(bool /*should_sync_buffer*/) {
|
||||
return BufferAt(0);
|
||||
}
|
||||
|
||||
auto Set(size_t size, std::string_view* data) {
|
||||
WINML_THROW_HR_IF_FALSE_MSG(
|
||||
E_INVALIDARG,
|
||||
size <= buffer_.size(),
|
||||
"Argument size (%d) exceeds the tensor size (%d).",
|
||||
static_cast<int>(size),
|
||||
static_cast<int>(buffer_.size()));
|
||||
|
||||
// Copy
|
||||
std::copy(data, data + size, buffer_.begin());
|
||||
}
|
||||
};
|
||||
} // namespace _winml
|
||||
|
|
@ -29,12 +29,12 @@ struct TensorResources {
|
|||
*capacity = 0;
|
||||
|
||||
// Lazily allocate the cpu resource on call to GetBuffer
|
||||
if (CpuResource == nullptr) {
|
||||
CpuResource = std::make_shared<_winml::Tensor<T>>(shape);
|
||||
if (cpu_resource_ == nullptr) {
|
||||
cpu_resource_ = std::make_shared<_winml::Tensor<T>>(shape);
|
||||
}
|
||||
|
||||
// Get the data pointer and size
|
||||
auto buffer = CpuResource->buffer();
|
||||
auto buffer = cpu_resource_->buffer();
|
||||
|
||||
// Set out parameters
|
||||
*capacity = static_cast<uint32_t>(buffer.size_bytes());
|
||||
|
|
@ -45,8 +45,8 @@ struct TensorResources {
|
|||
}
|
||||
|
||||
// Theses are access directly by TensorMemoryBufferReference<T> and TensorBase
|
||||
std::shared_ptr<_winml::Tensor<T>> CpuResource;
|
||||
winrt::com_ptr<ID3D12Resource> GpuResource;
|
||||
std::shared_ptr<_winml::Tensor<T>> cpu_resource_;
|
||||
winrt::com_ptr<ID3D12Resource> gpu_resource_;
|
||||
};
|
||||
|
||||
// This class holds onto the lifetime of TensorResources<T> so that they can be kept alive by TensorBase AND its active MBRs.
|
||||
|
|
|
|||
Loading…
Reference in a new issue