diff --git a/cmake/winml.cmake b/cmake/winml.cmake index 9d71857473..82f7541f6d 100644 --- a/cmake/winml.cmake +++ b/cmake/winml.cmake @@ -400,15 +400,18 @@ endif(onnxruntime_USE_DML) # Add static library that will be archived/linked for both static/dynamic library add_library(winml_lib_api STATIC ${winml_lib_api_dir}/impl/FeatureCompatibility.h + ${winml_lib_api_dir}/impl/IData.h ${winml_lib_api_dir}/impl/IMapFeatureValue.h ${winml_lib_api_dir}/impl/ISequenceFeatureValue.h ${winml_lib_api_dir}/impl/MapBase.h + ${winml_lib_api_dir}/impl/NumericData.h ${winml_lib_api_dir}/impl/SequenceBase.h + ${winml_lib_api_dir}/impl/StringData.h ${winml_lib_api_dir}/impl/Tensor.h ${winml_lib_api_dir}/impl/TensorBase.h - ${winml_lib_api_dir}/impl/TensorBuffer.h ${winml_lib_api_dir}/impl/TensorKindFrom.h ${winml_lib_api_dir}/impl/TensorMemoryBufferReference.h + ${winml_lib_api_dir}/NumericData.cpp ${winml_lib_api_dir}/ImageFeatureDescriptor.cpp ${winml_lib_api_dir}/ImageFeatureDescriptor.h ${winml_lib_api_dir}/ImageFeatureValue.cpp @@ -429,8 +432,11 @@ add_library(winml_lib_api STATIC ${winml_lib_api_dir}/MapFeatureDescriptor.h ${winml_lib_api_dir}/SequenceFeatureDescriptor.cpp ${winml_lib_api_dir}/SequenceFeatureDescriptor.h + ${winml_lib_api_dir}/StringData.cpp ${winml_lib_api_dir}/TensorFeatureDescriptor.cpp ${winml_lib_api_dir}/TensorFeatureDescriptor.h + ${winml_lib_api_dir}/VectorBackedBuffer.h + ${winml_lib_api_dir}/VectorBackedBuffer.cpp ${winml_lib_api_dir}/pch/pch.h ) diff --git a/winml/lib/Api.Image/DisjointBufferHelpers.cpp b/winml/lib/Api.Image/DisjointBufferHelpers.cpp index 9ce31b0d1c..1a94bd2c16 100644 --- a/winml/lib/Api.Image/DisjointBufferHelpers.cpp +++ b/winml/lib/Api.Image/DisjointBufferHelpers.cpp @@ -3,7 +3,7 @@ namespace _winml { -void LoadOrStoreDisjointBuffers( +static void LoadOrStoreDisjointBuffers( bool should_load_buffer, size_t num_buffers, std::function(size_t)> get_buffer, @@ -31,4 +31,18 @@ void LoadOrStoreDisjointBuffers( } } +void LoadSpanFromDisjointBuffers( + size_t num_buffers, + std::function(size_t)> get_buffer, + gsl::span& buffer_span) { + LoadOrStoreDisjointBuffers(true /*load into the span*/, num_buffers, get_buffer, buffer_span); +} + +void StoreSpanIntoDisjointBuffers( + size_t num_buffers, + std::function(size_t)> get_buffer, + gsl::span& buffer_span) { + LoadOrStoreDisjointBuffers(false /*store into buffers*/, num_buffers, get_buffer, buffer_span); +} + } // namespace _winml diff --git a/winml/lib/Api.Image/TensorToVideoFrameConverter.cpp b/winml/lib/Api.Image/TensorToVideoFrameConverter.cpp index b2d3da143b..1f64fa73c9 100644 --- a/winml/lib/Api.Image/TensorToVideoFrameConverter.cpp +++ b/winml/lib/Api.Image/TensorToVideoFrameConverter.cpp @@ -630,8 +630,7 @@ void TensorToVideoFrameConverter::ConvertBatchedDX12TensorToBuffers( byte* readback_buffer = nullptr; WINML_THROW_IF_FAILED(readback_heap_->Map(0, &CD3DX12_RANGE(0, buffer_size_in_bytes), reinterpret_cast(&readback_buffer))); auto readback_buffer_span = gsl::span(readback_buffer, buffer_size_in_bytes); - _winml::LoadOrStoreDisjointBuffers( - false /*load disjoint buffers into*/, + _winml::StoreSpanIntoDisjointBuffers( buffers.size(), [&](size_t i) { byte* buffer_start = nullptr; diff --git a/winml/lib/Api.Image/VideoFrameToTensorConverter.cpp b/winml/lib/Api.Image/VideoFrameToTensorConverter.cpp index 8a70a28b04..20a0ad93f1 100644 --- a/winml/lib/Api.Image/VideoFrameToTensorConverter.cpp +++ b/winml/lib/Api.Image/VideoFrameToTensorConverter.cpp @@ -559,8 +559,7 @@ void VideoFrameToTensorConverter::ConvertBuffersToBatchedGPUTensor( WINML_THROW_IF_FAILED(upload_heap_->Map(0, &CD3DX12_RANGE(0, 0), reinterpret_cast(&gpu_buffer))); auto gpu_buffer_span = gsl::span(gpu_buffer, buffer_size_in_bytes); - _winml::LoadOrStoreDisjointBuffers( - true /*load disjoint buffers into*/, + _winml::LoadSpanFromDisjointBuffers( buffers.size(), [&](size_t i) { byte* buffer_start = nullptr; diff --git a/winml/lib/Api.Image/inc/DisjointBufferHelpers.h b/winml/lib/Api.Image/inc/DisjointBufferHelpers.h index eb4ba6a893..9e6c354e43 100644 --- a/winml/lib/Api.Image/inc/DisjointBufferHelpers.h +++ b/winml/lib/Api.Image/inc/DisjointBufferHelpers.h @@ -7,8 +7,12 @@ namespace _winml { -void LoadOrStoreDisjointBuffers( - bool should_load_buffer, +void LoadSpanFromDisjointBuffers( + size_t num_buffers, + std::function(size_t)> get_buffer, + gsl::span& buffer_span); + +void StoreSpanIntoDisjointBuffers( size_t num_buffers, std::function(size_t)> get_buffer, gsl::span& buffer_span); diff --git a/winml/lib/Api/NumericData.cpp b/winml/lib/Api/NumericData.cpp new file mode 100644 index 0000000000..8c09eebb2e --- /dev/null +++ b/winml/lib/Api/NumericData.cpp @@ -0,0 +1,129 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. +#include "pch.h" + +#include "impl/NumericData.h" +#include "VectorBackedBuffer.h" +#include "robuffer.h" +#include "winrt/Windows.Storage.Streams.h" +#include "DisjointBufferHelpers.h" + +namespace _winml { + +std::shared_ptr<_winml::idata> numeric_data::create( + size_t num_elements, + size_t element_size_in_bytes, + wfc::IIterable const& buffers) { + return std::make_shared(num_elements, element_size_in_bytes, buffers); +} + +numeric_data::numeric_data( + size_t num_elements, size_t element_size_in_bytes, wfc::IIterable const& buffers) : + num_elements_(num_elements), + element_size_in_bytes_(element_size_in_bytes), + combined_buffer_(nullptr), + buffers_() { + if (buffers != nullptr) { + buffers_ = { begin(buffers), end(buffers) }; + } + + if (buffers_.size() == 0) { + combined_buffer_ = winrt::make(num_elements * element_size_in_bytes); + buffers_ = { combined_buffer_ }; + auto buffer = buffer_at(0); + + // The initial release of WinML (RS5) shipped with behavior that would + // zero-initialize uninitialized tensors. After measuring, the performance impact + // of memsetting the memory buffer is quite small (<1ms for 3channel 720x720 TensorFloats). + // To maintain parity with RS5 behavior, we always zero out the memory buffer. + memset(buffer.data(), 0, buffer.size_bytes()); + } + else if (buffers_.size() == 1) { + combined_buffer_ = buffers_[0]; + } + else { + // If there are many buffers, then the combined buffer will be a separately allocated value that combines all of the buffers. + // This needs to be lazily done however, as the extra memory should not be allocated when not needed (GPU). + } +} + +size_t numeric_data::num_elements() { + return num_elements_; +} + +size_t numeric_data::size_in_bytes() { + return num_elements_ * element_size_in_bytes_; +} + +size_t numeric_data::num_buffers() { + return buffers_.size(); +} + +std::vector& numeric_data::buffers() { + return buffers_; +} + +gsl::span numeric_data::buffer(bool should_sync_buffer) { + if (buffers_.size() == 1) { + // Single buffer optimization to not create a temporary buffer that concatenates disjoint buffers into one. + return buffer_at(0); + } + auto span = combined_buffer(); + if (should_sync_buffer) { + _winml::LoadSpanFromDisjointBuffers( + buffers_.size(), + [this](size_t i) { return buffer_at(i); }, + span); + } + + return span; +} + +bool numeric_data::flush() { + auto should_flush = buffers_.size() != 1; + if (should_flush) { + auto span = combined_buffer(); + _winml::StoreSpanIntoDisjointBuffers( + buffers_.size(), + [this](size_t i) { return buffer_at(i); }, + span); + } + return should_flush; +} + +void numeric_data::set(size_t data_size, const byte* data) { + WINML_THROW_HR_IF_FALSE_MSG( + E_INVALIDARG, + data_size <= (num_elements_ * element_size_in_bytes_), + "Argument size (%llu) exceeds the tensor size (%llu).", + static_cast(data_size), + static_cast(num_elements_ * element_size_in_bytes_)); + + gsl::span span(const_cast(data), data_size); + _winml::StoreSpanIntoDisjointBuffers( + buffers_.size(), + [this](size_t i) { return buffer_at(i); }, + span); +} + +static gsl::span get_span_from_ibuffer(wss::IBuffer buffer) { + byte* current_data = nullptr; + auto bufferByteAccess = buffer.as(); + bufferByteAccess->Buffer(¤t_data); + return gsl::span( + current_data, + static_cast(buffer.Capacity())); +} + +gsl::span numeric_data::buffer_at(size_t index) { + return get_span_from_ibuffer(buffers_[index]); +} + +gsl::span numeric_data::combined_buffer() { + if (combined_buffer_ == nullptr) { + combined_buffer_ = winrt::make(num_elements_ * element_size_in_bytes_); + } + return get_span_from_ibuffer(combined_buffer_); +} + +} // namespace _winml \ No newline at end of file diff --git a/winml/lib/Api/StringData.cpp b/winml/lib/Api/StringData.cpp new file mode 100644 index 0000000000..09bc423fdc --- /dev/null +++ b/winml/lib/Api/StringData.cpp @@ -0,0 +1,62 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +#include "pch.h" + +#include "impl/StringData.h" + +namespace _winml { + +string_data::string_data(size_t size) : + buffer_(size) {} + +std::shared_ptr<_winml::idata> string_data::create(size_t size) { + return std::make_shared(size); +} + +size_t string_data::num_elements() { + return buffer_.size(); +} + +size_t string_data::size_in_bytes() { + WINML_THROW_HR(E_UNEXPECTED); +} + +size_t string_data::num_buffers() { + return 1; +} + +bool string_data::flush() { + // Vacuously true + return true; +} + +std::vector& string_data::buffers() { + WINML_THROW_HR(E_UNEXPECTED); +} + +gsl::span string_data::buffer(bool /*should_sync_buffer*/) { + return gsl::span(reinterpret_cast(buffer_.data()), buffer_.size()); +} + +void string_data::set(size_t num_elements, const std::string_view* data) { + WINML_THROW_HR_IF_FALSE_MSG( + E_INVALIDARG, + num_elements <= buffer_.size(), + "Argument size (%d) exceeds the tensor size (%d).", + static_cast(num_elements), + static_cast(buffer_.size())); + + // Copy + std::copy(data, data + num_elements, buffer_.begin()); +} + +void string_data::set(size_t /*data_size*/, const byte* /*data*/) { + WINML_THROW_HR(E_UNEXPECTED); +} + +std::vector& string_data::get_backing_vector() { + return buffer_; +} + +} // namespace _winml \ No newline at end of file diff --git a/winml/lib/Api/VectorBackedBuffer.cpp b/winml/lib/Api/VectorBackedBuffer.cpp new file mode 100644 index 0000000000..d12f664956 --- /dev/null +++ b/winml/lib/Api/VectorBackedBuffer.cpp @@ -0,0 +1,29 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. +#include "pch.h" + +#include "VectorBackedBuffer.h" + +namespace _winml { + +vector_backed_buffer::vector_backed_buffer(size_t size) : buffer_(size) {} + +uint32_t vector_backed_buffer::Capacity() const { + return static_cast(buffer_.size()); +} + +uint32_t vector_backed_buffer::Length() const { + throw winrt::hresult_error(E_NOTIMPL); +} + +void vector_backed_buffer::Length(uint32_t /*value*/) { + throw winrt::hresult_error(E_NOTIMPL); +} + +STDMETHODIMP vector_backed_buffer::Buffer(uint8_t** value) { + RETURN_HR_IF_NULL(E_POINTER, value); + *value = buffer_.data(); + return S_OK; +} + +} // namespace _winml \ No newline at end of file diff --git a/winml/lib/Api/VectorBackedBuffer.h b/winml/lib/Api/VectorBackedBuffer.h new file mode 100644 index 0000000000..a12b26ffd7 --- /dev/null +++ b/winml/lib/Api/VectorBackedBuffer.h @@ -0,0 +1,28 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +#pragma once + +#include "robuffer.h" +#include "winrt/Windows.Storage.Streams.h" + +namespace _winml { + +class vector_backed_buffer : public winrt::implements< + vector_backed_buffer, + wss::IBuffer, + Windows::Storage::Streams::IBufferByteAccess> { + public: + vector_backed_buffer(size_t size); + + uint32_t Capacity() const; + uint32_t Length() const; + void Length(uint32_t /*value*/); + + STDMETHOD(Buffer)(uint8_t** value); + + private: + std::vector buffer_; +}; + +} // namespace _winml \ No newline at end of file diff --git a/winml/lib/Api/impl/IData.h b/winml/lib/Api/impl/IData.h new file mode 100644 index 0000000000..d649f4d5e1 --- /dev/null +++ b/winml/lib/Api/impl/IData.h @@ -0,0 +1,25 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +#pragma once + +#include "IEngine.h" + +// ILotusValueProviderPrivate exposes a private Lotus interface to the engine so that it can retrieve tensor +// resources stored in winrt structures. + +namespace _winml { + +struct idata { + virtual ~idata(){} + + virtual size_t num_elements() = 0; + virtual size_t size_in_bytes() = 0; + virtual size_t num_buffers() = 0; + virtual std::vector& buffers() = 0; + virtual gsl::span buffer(bool should_sync_buffer) = 0; + virtual bool flush() = 0; + virtual void set(size_t data_size, const byte* data) = 0; +}; + +} // namespace _winml \ No newline at end of file diff --git a/winml/lib/Api/impl/NumericData.h b/winml/lib/Api/impl/NumericData.h new file mode 100644 index 0000000000..1921c2f422 --- /dev/null +++ b/winml/lib/Api/impl/NumericData.h @@ -0,0 +1,46 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +#pragma once + +#include "IData.h" +#include "robuffer.h" +#include "winrt/Windows.Storage.Streams.h" + +namespace _winml { + +class numeric_data : public _winml::idata { + public: + static std::shared_ptr<_winml::idata> create( + size_t num_elements, + size_t element_size_in_bytes, + wfc::IIterable const& buffers); + + // Privte constructor as this type should be created as a shared_ptr + numeric_data(size_t num_elements, size_t element_size_in_bytes, wfc::IIterable const& buffers); + gsl::span buffer_at(size_t index); + gsl::span combined_buffer(); + + public: + size_t num_elements() override; + size_t size_in_bytes() override; + size_t num_buffers() override; + + // Buffer accessors + std::vector& buffers() override; + gsl::span buffer(bool should_sync_buffer) override; + + // Flush to buffers API + bool flush() override; + + // Set APIs + void set(size_t data_size, const byte* data) override; + + private: + wss::IBuffer combined_buffer_; + std::vector buffers_; + size_t num_elements_; + size_t element_size_in_bytes_; +}; + +} // namespace _winml \ No newline at end of file diff --git a/winml/lib/Api/impl/StringData.h b/winml/lib/Api/impl/StringData.h new file mode 100644 index 0000000000..17c83d8be1 --- /dev/null +++ b/winml/lib/Api/impl/StringData.h @@ -0,0 +1,40 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +#pragma once + +#include "IData.h" +#include "robuffer.h" +#include "winrt/Windows.Storage.Streams.h" + +namespace _winml { + +class string_data : public _winml::idata { + public: + static std::shared_ptr<_winml::idata> create(size_t size); + + string_data(size_t size); + + size_t num_elements() override; + size_t size_in_bytes() override; + size_t num_buffers() override; + + // Buffer accessors + std::vector& buffers() override; + gsl::span buffer(bool should_sync_buffer) override; + + // Flush to buffers API + bool flush() override; + + // Set APIs + void set(size_t data_size, const byte* data) override; + + public: + void set(size_t num_elements, const std::string_view* data); + std::vector& get_backing_vector(); + + private: + std::vector buffer_; +}; + +} // namespace _winml \ No newline at end of file diff --git a/winml/lib/Api/impl/Tensor.h b/winml/lib/Api/impl/Tensor.h index a82a94a30d..a1bb950b35 100644 --- a/winml/lib/Api/impl/Tensor.h +++ b/winml/lib/Api/impl/Tensor.h @@ -3,7 +3,8 @@ #pragma once -#include "TensorBuffer.h" +#include "NumericData.h" +#include "StringData.h" // // the Tensor class is the actual object for CPU memory buffers. @@ -12,81 +13,83 @@ // namespace _winml { +inline size_t compute_size_of_shape(const std::vector& shape) { + auto size_of_shape = + static_cast( + std::accumulate( + std::begin(shape), + std::end(shape), + static_cast(1), + std::multiplies())); + return size_of_shape; +} + +template +inline auto create_data( + const std::vector& shape, + const wfc::IIterable& buffers) { + return _winml::numeric_data::create(compute_size_of_shape(shape), sizeof(T), buffers); +} + +template <> +inline auto create_data( + const std::vector& shape, + const wfc::IIterable& /*buffers*/) { + return _winml::string_data::create(compute_size_of_shape(shape)); +} + template class Tensor { private: - std::shared_ptr> buffer_; + std::shared_ptr<_winml::idata> data_; std::vector shape_; - public: + private: Tensor() = delete; - Tensor( - std::vector const& shape, - wfc::IIterable const& buffers) : - shape_(shape), - buffer_(TensorBuffer::Create( - static_cast(std::accumulate( - std::begin(shape), std::end(shape), - static_cast(1), std::multiplies())), - buffers)) {} + public: + Tensor(const std::vector& shape) : + shape_(shape), + data_(create_data(shape, nullptr)) {} Tensor( - std::vector const& shape) : shape_(shape), - buffer_(TensorBuffer::Create( - static_cast(std::accumulate( - std::begin(shape), std::end(shape), - static_cast(1), - std::multiplies())))) {} - - Tensor( - std::vector const&& shape) : shape_(std::move(shape)), - buffer_(TensorBuffer::Create( - static_cast(std::accumulate( - std::begin(shape), std::end(shape), - static_cast(1), - std::multiplies())))) { - } - - auto number_of_elements() const { - return buffer_->NumElements(); - } + const std::vector& shape, + const wfc::IIterable& buffers) : + shape_(shape), + data_(create_data(shape, buffers)) {} auto size_in_bytes() const { - return buffer_->SizeInBytes(); + return data_->size_in_bytes(); } auto num_buffers() { - return buffer_->NumBuffers(); + return data_->num_buffers(); } auto& buffers() { - return buffer_->Buffers(); + return data_->buffers(); } - auto buffer(bool should_sync_buffer = true) { - auto span = buffer_->Buffer(should_sync_buffer); - return gsl::span(reinterpret_cast(span.data()), buffer_->NumElements()); + gsl::span buffer(bool should_sync_buffer = true) { + auto span = data_->buffer(should_sync_buffer); + return gsl::span(reinterpret_cast(span.data()), data_->num_elements()); } auto flush() { - return buffer_->Flush(); + return data_->flush(); } - void set(size_t size, const T* pData) { - buffer_->Set(size * sizeof(T), pData); - } - - void set(std::vector&& other) { - buffer_->Set(other); + void set(size_t size, const T* data) { + auto size_in_bytes = size * sizeof(T); + data_->set(size_in_bytes, reinterpret_cast(data)); } const std::vector& shape() const { return shape_; } - auto get_tensor_buffer() { - return buffer_; + auto get_data() { + return data_; } }; } // namespace _winml \ No newline at end of file diff --git a/winml/lib/Api/impl/TensorBase.h b/winml/lib/Api/impl/TensorBase.h index 59aab905a9..181f3f68d4 100644 --- a/winml/lib/Api/impl/TensorBase.h +++ b/winml/lib/Api/impl/TensorBase.h @@ -74,28 +74,28 @@ struct TensorBase : TBase { /// b) TensorBase(winrt::Windows::Foundation::Collections::IIterable const& shape) /// 3) use provided backing gpu memory /// a) TensorBase(std::vector const& shape, ID3D12Resource* pResource) - TensorBase() : m_resources(std::make_shared>()) { + TensorBase() : resources_(std::make_shared>()) { } TensorBase(wfc::IIterable const& shape) : shape_(begin(shape), end(shape)), - m_resources(std::make_shared>()) { - GetCpuResource() = std::make_shared<_winml::Tensor>(shape_); + resources_(std::make_shared>()) { + CpuTensor() = std::make_shared<_winml::Tensor>(shape_); } TensorBase(std::vector const& shape) : shape_(shape), - m_resources(std::make_shared>()) { - GetCpuResource() = std::make_shared<_winml::Tensor>(shape_); + resources_(std::make_shared>()) { + CpuTensor() = std::make_shared<_winml::Tensor>(shape_); } TensorBase(std::vector const& shape, ID3D12Resource* resource) : shape_(shape), - m_resources(std::make_shared>()) { + resources_(std::make_shared>()) { // This Api is not supported for TensorString WINML_THROW_HR_IF_TRUE_MSG( E_ILLEGAL_METHOD_CALL, (std::is_same::value), "TensorString objects cannot be created from a ID3D12Resource!"); - GetGpuResource().copy_from(resource); + GpuTensor().copy_from(resource); } HRESULT CreateGPUMLValue(ID3D12Resource* resource, BindingContext& context, IValue** out) { @@ -117,21 +117,21 @@ struct TensorBase : TBase { auto engine = session->GetEngine(); auto should_sync_buffer = context.type == _winml::BindingType::kInput; - if (GetCpuResource() != nullptr) { + if (CpuTensor() != nullptr) { return CreateTensorValueFromExternalBuffer(engine, should_sync_buffer, out); } // If there is no matching cpu resource, then fallback to a gpu resource - if (GetGpuResource() != nullptr) { - return CreateGPUMLValue(GetGpuResource().get(), context, out); + if (GpuTensor() != nullptr) { + return CreateGPUMLValue(GpuTensor().get(), context, out); } WINML_THROW_HR(WINML_ERR_INVALID_BINDING); } HRESULT GPUTensorize(_winml::BindingContext& context, IValue** out) { - if (GetGpuResource() != nullptr) { - return CreateGPUMLValue(GetGpuResource().get(), context, out); + if (GpuTensor() != nullptr) { + return CreateGPUMLValue(GpuTensor().get(), context, out); } // Get engine @@ -142,8 +142,8 @@ struct TensorBase : TBase { auto should_sync_buffer = context.type == _winml::BindingType::kInput; // If there is no matching gpu resource, then fallback to a cpu resource - if (GetCpuResource() != nullptr) { - auto num_backing_buffers = GetCpuResource()->num_buffers(); + if (CpuTensor() != nullptr) { + auto num_backing_buffers = CpuTensor()->num_buffers(); if (num_backing_buffers == 1) { // If we have a single backing cpu buffer, there is no need to create GPU resources. // The engine will use the buffer provided, and perform the needed copies into the GPU context as needed. @@ -154,24 +154,24 @@ struct TensorBase : TBase { // If we are binding inputs, then a GPU resource needs to be allocated, and individual buffer contents need // to be copied directly into a gpu resource. - if (GetGpuResource() == nullptr) { - GetGpuResource() = CreateD3D12Resource(session); + if (GpuTensor() == nullptr) { + GpuTensor() = CreateD3D12Resource(session); } _winml::ConverterResourceDescription descriptor = {}; descriptor.pixel_format = static_cast(wgdx::DirectXPixelFormat::Unknown); - descriptor.width = static_cast(GetCpuResource()->size_in_bytes()); + descriptor.width = static_cast(CpuTensor()->size_in_bytes()); descriptor.height = static_cast(1); descriptor.luid = device->GetD3DDevice()->GetAdapterLuid(); // Converted image on GPU context.converter = _winml::PoolObjectWrapper::Create(device->TensorizerStore()->Fetch(descriptor)); context.converter->Get()->Tensorizer->ConvertBuffersToBatchedGPUTensor( - GetCpuResource()->buffers(), - GetCpuResource()->size_in_bytes(), + CpuTensor()->buffers(), + CpuTensor()->size_in_bytes(), *device->GetD3DDeviceCache(), - GetGpuResource().get()); + GpuTensor().get()); - return CreateGPUMLValue(GetGpuResource().get(), context, out); + return CreateGPUMLValue(GpuTensor().get(), context, out); } else if (context.type == _winml::BindingType::kOutput) { // If we are binding outputs, then the buffers do not need to bound. If the engine produces a output on the gpu @@ -179,8 +179,8 @@ struct TensorBase : TBase { // into the output buffers without temporary intermediary buffers! No binding here is necessary. // If the output produces a cpu buffer (even in the GPU case), we will already have a cpu buffer, and just need // to copy back to the output buffers, no binding is necessary. - GetGpuResource() = CreateD3D12Resource(session); - return CreateGPUMLValue(GetGpuResource().get(), context, out); + GpuTensor() = CreateD3D12Resource(session); + return CreateGPUMLValue(GpuTensor().get(), context, out); } } } @@ -188,11 +188,11 @@ struct TensorBase : TBase { if (TensorKind() == winml::TensorKind::String) { // Lazily allocate the cpu TensorString resource // TensorStrings are CPU only, and so a gpu resource cannot be allocated for them. - GetCpuResource() = std::make_shared<_winml::Tensor>(shape_); + CpuTensor() = std::make_shared<_winml::Tensor>(shape_); return CreateTensorValueFromExternalBuffer(engine, should_sync_buffer, out); } else { - GetGpuResource() = CreateD3D12Resource(session); - return CreateGPUMLValue(GetGpuResource().get(), context, out); + GpuTensor() = CreateD3D12Resource(session); + return CreateGPUMLValue(GpuTensor().get(), context, out); } } @@ -242,8 +242,8 @@ struct TensorBase : TBase { void EnsureBufferNotInUse() { auto isBufferInUse = std::any_of( - m_outstandingReferences.begin(), - m_outstandingReferences.end(), + outstanding_references_.begin(), + outstanding_references_.end(), [](auto weakRef) { return weakRef.get() != nullptr; }); WINML_THROW_HR_IF_TRUE_MSG(WINML_ERR_INVALID_BINDING, isBufferInUse, "The tensor has outstanding memory buffer references that must be closed prior to evaluation!"); @@ -254,7 +254,7 @@ struct TensorBase : TBase { (_winml::BindingContext& context, IValue** out) { RETURN_HR_IF_NULL_MSG( WINML_ERR_INVALID_BINDING, - m_resources, + resources_, "The tensor has been closed and its resources have been detached!"); EnsureBufferNotInUse(); @@ -289,7 +289,7 @@ struct TensorBase : TBase { // the conditions of ASSERT_TEMPLATE_PARAMETERS_EXACT() are met. ASSERT_TEMPLATE_PARAMETERS(); - GetCpuResource()->set(size, reinterpret_cast(data)); + CpuTensor()->set(size, reinterpret_cast(data)); } template <> @@ -297,7 +297,8 @@ struct TensorBase : TBase { // Ensure that this call is being called with the correct template parameters ASSERT_TEMPLATE_PARAMETERS(); - GetCpuResource()->get_tensor_buffer()->Set(size, reinterpret_cast(data)); + auto string_data = std::static_pointer_cast<_winml::string_data>(CpuTensor()->get_data()); + string_data->set(size, reinterpret_cast(data)); } template @@ -307,8 +308,8 @@ struct TensorBase : TBase { ASSERT_TEMPLATE_PARAMETERS(); RETURN_IF_FAILED_MSG(engine->CreateTensorValueFromExternalBuffer( - GetCpuResource()->buffer(sync_buffer).data(), GetCpuResource()->size_in_bytes(), GetCpuResource()->shape().data(), - GetCpuResource()->shape().size(), TensorKind(), value), + CpuTensor()->buffer(sync_buffer).data(), CpuTensor()->size_in_bytes(), CpuTensor()->shape().data(), + CpuTensor()->shape().size(), TensorKind(), value), "Failed to prepare buffer for copy back from device resource."); return S_OK; } @@ -318,17 +319,19 @@ struct TensorBase : TBase { // Ensure that this call is being called with the correct template parameters ASSERT_TEMPLATE_PARAMETERS(); + auto string_data = std::static_pointer_cast<_winml::string_data>(CpuTensor()->get_data()); + auto& string_vector = string_data->get_backing_vector(); + std::vector raw_values; - auto string_array = static_cast(GetCpuResource()->buffer().data()); std::transform( - string_array, - string_array + GetCpuResource()->number_of_elements(), + std::begin(string_vector), + std::end(string_vector), std::back_inserter(raw_values), [&](auto& str) { return str.c_str(); }); RETURN_IF_FAILED_MSG(engine->CreateStringTensorValueFromDataWithCopy( - raw_values.data(), raw_values.size(), GetCpuResource()->shape().data(), - GetCpuResource()->shape().size(), value), + raw_values.data(), raw_values.size(), CpuTensor()->shape().data(), + CpuTensor()->shape().size(), value), "Failed to prepare buffer for copy back from device resource."); return S_OK; } @@ -338,7 +341,7 @@ struct TensorBase : TBase { (BindingContext& context, IValue* value) { RETURN_HR_IF_NULL_MSG( E_ILLEGAL_METHOD_CALL, - m_resources, + resources_, "The tensor has been closed and its resources have been detached during evaluation!"); _winml::Resource updated_resource; @@ -348,14 +351,14 @@ struct TensorBase : TBase { RETURN_IF_FAILED_MSG(value->GetTensorShape(shape_), "Failed to get the tensor shape from resource!"); // make sure we always have a CPU resource - if (GetCpuResource() == nullptr) { - GetCpuResource() = std::make_shared<_winml::Tensor>(shape_); + if (CpuTensor() == nullptr) { + CpuTensor() = std::make_shared<_winml::Tensor>(shape_); } bool is_cpu; if (SUCCEEDED(value->IsCpu(&is_cpu)) && is_cpu) { // Get the data pointer and size - auto buffer = GetCpuResource()->buffer(false); + auto buffer = CpuTensor()->buffer(false); if (updated_resource.get() != reinterpret_cast(buffer.data())) { // Only copy the data if the source and destination are not the same! @@ -366,7 +369,7 @@ struct TensorBase : TBase { } else { // If the engine wrote to the data directly, it is possible that the underlying data was held by many buffers // In that case the underlying buffers will not match the engine output, and they need to be flushed. - GetCpuResource()->flush(); + CpuTensor()->flush(); } } else { // If we got a gpu resource, we should move the data to the cpu so accessors can retrieve the data. @@ -377,7 +380,7 @@ struct TensorBase : TBase { auto device = session->Device().as(); auto engine = session->GetEngine(); - if (GetCpuResource()->num_buffers() == 1) { + if (CpuTensor()->num_buffers() == 1) { winrt::com_ptr dest; RETURN_IF_FAILED_MSG(CreateTensorValueFromExternalBuffer(engine, false, dest.put()), "Failed to prepare buffer for copy back from device resource."); @@ -395,7 +398,7 @@ struct TensorBase : TBase { d3dResource, buffer_size_in_bytes, *device->GetD3DDeviceCache(), - GetCpuResource()->buffers()); + CpuTensor()->buffers()); // Reset the Allocator before return to the Cache. Must Sync this background thread to that completion before we do. device->GetD3DDeviceCache()->SyncD3D12ToCPU(); @@ -615,7 +618,7 @@ struct TensorBase : TBase { // Ensure that CreateReference is only called when there is 1 buffer. WINML_THROW_HR_IF_TRUE_MSG( E_ILLEGAL_METHOD_CALL, - GetCpuResource() != nullptr && GetCpuResource()->num_buffers() != 1, "A single buffer reference cannot be retrieved when the tensor is backed by multiple buffers!"); + CpuTensor() != nullptr && CpuTensor()->num_buffers() != 1, "A single buffer reference cannot be retrieved when the tensor is backed by multiple buffers!"); // Create a TensorMemoryBufferReference @@ -624,11 +627,11 @@ struct TensorBase : TBase { // "has been closed. In that case, the returned IMemoryBufferReference is already closed." // Creating a TensorMemoryBufferReference with a null pointer is equivalent to creating it as closed. - auto memoryBufferReference = winrt::make>(shape_, m_resources); + auto memoryBufferReference = winrt::make>(shape_, resources_); // Create and cache a weak reference to the TensorMemoryBufferReference winrt::weak_ref> weak(memoryBufferReference.as>()); - m_outstandingReferences.push_back(weak); + outstanding_references_.push_back(weak); // Return the strong ref to the caller return memoryBufferReference; @@ -638,7 +641,7 @@ struct TensorBase : TBase { // IMemoryBuffer::Close void Close() try { // Let go of the lifetime of the resources, this is will indicate that the memorybuffer is closed - m_resources = nullptr; + resources_ = nullptr; } WINML_CATCH_ALL @@ -653,10 +656,10 @@ struct TensorBase : TBase { RETURN_HR_IF_NULL_MSG( E_ILLEGAL_METHOD_CALL, - m_resources, + resources_, "The tensor has been closed and its resources have been detached!"); - return m_resources->GetBuffer(shape_, value, capacity); + return resources_->GetBuffer(shape_, value, capacity); } // ITensorNative::GetD3D12Resource @@ -667,10 +670,10 @@ struct TensorBase : TBase { RETURN_HR_IF(ERROR_INVALID_FUNCTION, (std::is_same::value)); RETURN_HR_IF_NULL_MSG( E_ILLEGAL_METHOD_CALL, - m_resources, + resources_, "The tensor has been closed and its resources have been detached!"); - GetGpuResource().copy_to(ppResource); + GpuTensor().copy_to(ppResource); return S_OK; } WINML_CATCH_ALL_COM @@ -689,12 +692,11 @@ struct TensorBase : TBase { // owned IVectorView object. // Get the raw buffer pointer from the native tensor implementation. - auto number_of_elements = GetCpuResource()->number_of_elements(); - auto buffer = GetCpuResource()->buffer(); + auto buffer = CpuTensor()->buffer(); auto element_data = static_cast(buffer.data()); - + // Copy data that will be passed back to caller. - auto copy = std::vector(element_data, element_data + number_of_elements); + auto copy = std::vector(element_data, element_data + buffer.size()); // Create IVectorView from copied data. return winrt::single_threaded_vector(std::move(copy)).GetView(); @@ -707,18 +709,17 @@ struct TensorBase : TBase { // Ensure that this call is being called with the correct template parameters ASSERT_TEMPLATE_PARAMETERS<_winml::Half, float>(); - auto number_of_elements = GetCpuResource()->number_of_elements(); - auto buffer = GetCpuResource()->buffer(); + auto buffer = CpuTensor()->buffer(); auto element_data = static_cast<_winml::Half*>(buffer.data()); // Copy the HALFs to floats - std::vector float_value(number_of_elements); + std::vector float_value(buffer.size()); DirectX::PackedVector::XMConvertHalfToFloatStream( float_value.data(), sizeof(float) /* output stride */, reinterpret_cast(element_data), sizeof(_winml::Half) /* input stride */, - number_of_elements); + buffer.size()); // Create IVectorView from copied data. return winrt::single_threaded_vector(std::move(float_value)).GetView(); @@ -731,16 +732,15 @@ struct TensorBase : TBase { // Ensure that this call is being called with the correct template parameters ASSERT_TEMPLATE_PARAMETERS(); - auto number_of_elements = GetCpuResource()->number_of_elements(); - auto buffer = GetCpuResource()->buffer(); - auto element_data = static_cast(buffer.data()); + auto string_data = std::static_pointer_cast<_winml::string_data>(CpuTensor()->get_data()); + auto& string_vector = string_data->get_backing_vector(); - auto copy = std::vector(number_of_elements, L""); + auto copy = std::vector(string_vector.size(), L""); std::generate( copy.begin(), copy.end(), - [n = 0, &element_data]() mutable { - return _winml::Strings::HStringFromUTF8(element_data[n++]); + [n = 0, &string_vector]() mutable { + return _winml::Strings::HStringFromUTF8(string_vector[n++]); }); return winrt::single_threaded_vector(std::move(copy)).GetView(); @@ -752,14 +752,13 @@ struct TensorBase : TBase { wfc::IVectorView GetAsVectorView() try { ASSERT_TEMPLATE_PARAMETERS(); - auto number_of_elements = GetCpuResource()->number_of_elements(); - auto buffer = GetCpuResource()->buffer(); + auto buffer = CpuTensor()->buffer(); auto element_data = static_cast(buffer.data()); // Copy data that will be passed back to caller. - gsl::span span(reinterpret_cast(element_data), number_of_elements); - std::vector copy(span.begin(), span.begin() + number_of_elements); + gsl::span span(reinterpret_cast(element_data), buffer.size()); + std::vector copy(span.begin(), span.begin() + buffer.size()); // Create IVectorView from copied data. return winrt::single_threaded_vector(std::move(copy)).GetView(); @@ -809,10 +808,10 @@ struct TensorBase : TBase { RETURN_HR_IF_NULL(E_POINTER, pIsPlaceHolder); RETURN_HR_IF_NULL_MSG( E_ILLEGAL_METHOD_CALL, - m_resources, + resources_, "The tensor has been closed and its resources have been detached!"); - *pIsPlaceHolder = GetCpuResource() == nullptr && GetGpuResource() == nullptr; + *pIsPlaceHolder = CpuTensor() == nullptr && GpuTensor() == nullptr; return S_OK; } @@ -827,7 +826,7 @@ struct TensorBase : TBase { ASSERT_TEMPLATE_PARAMETERS_EXACT(); shape_ = shape; - GetCpuResource() = std::make_shared<_winml::Tensor>(shape, buffers); + CpuTensor() = std::make_shared<_winml::Tensor>(shape, buffers); } template <> @@ -837,7 +836,7 @@ struct TensorBase : TBase { ASSERT_TEMPLATE_PARAMETERS<_winml::Half, float>(); shape_ = shape; - GetCpuResource() = std::make_shared<_winml::Tensor>(shape, buffers); + CpuTensor() = std::make_shared<_winml::Tensor>(shape, buffers); } template <> @@ -847,7 +846,7 @@ struct TensorBase : TBase { ASSERT_TEMPLATE_PARAMETERS(); shape_ = shape; - GetCpuResource() = std::make_shared<_winml::Tensor>(shape, buffers); + CpuTensor() = std::make_shared<_winml::Tensor>(shape, buffers); } // Specialized version to convert hstring to string @@ -875,12 +874,12 @@ struct TensorBase : TBase { // Ensure that the Set APIs are only called when there is 1 buffer. // These APIs are only called when the tensor is being constructed from various collection and pointer public APIs. // They should always be backed by a single underlying buffer. - FAIL_FAST_HR_IF(E_ILLEGAL_METHOD_CALL, GetCpuResource()->num_buffers() != 1); + FAIL_FAST_HR_IF(E_ILLEGAL_METHOD_CALL, CpuTensor()->num_buffers() != 1); // This method accepts data as an array, T[], from the caller. // This is a non-destructive API, so the caller data is // left untouched, and the data is copied into internal buffers. - GetCpuResource()->set(data.size(), data.data()); + CpuTensor()->set(data.size(), data.data()); } // Specialized version to convert floats to float16 @@ -892,13 +891,12 @@ struct TensorBase : TBase { // Ensure that the Set APIs are only called when there is 1 buffer. // These APIs are only called when the tensor is being constructed from various collection and pointer public APIs. // They should always be backed by a single underlying buffer. - FAIL_FAST_HR_IF(E_ILLEGAL_METHOD_CALL, GetCpuResource()->num_buffers() != 1); + FAIL_FAST_HR_IF(E_ILLEGAL_METHOD_CALL, CpuTensor()->num_buffers() != 1); - auto number_of_elements = GetCpuResource()->number_of_elements(); - auto buffer = GetCpuResource()->buffer(); + auto buffer = CpuTensor()->buffer(); auto element_data = static_cast<_winml::Half*>(buffer.data()); - THROW_HR_IF(E_UNEXPECTED, data.size() != number_of_elements); + THROW_HR_IF(E_UNEXPECTED, data.size() != buffer.size()); DirectX::PackedVector::XMConvertFloatToHalfStream( reinterpret_cast(element_data), sizeof(_winml::Half) /* output stride */, @@ -916,12 +914,12 @@ struct TensorBase : TBase { // Ensure that the Set APIs are only called when there is 1 buffer. // These APIs are only called when the tensor is being constructed from various collection and pointer public APIs. // They should always be backed by a single underlying buffer. - FAIL_FAST_HR_IF(E_ILLEGAL_METHOD_CALL, GetCpuResource()->num_buffers() != 1); + FAIL_FAST_HR_IF(E_ILLEGAL_METHOD_CALL, CpuTensor()->num_buffers() != 1); auto size = data.size(); auto pData = data.data(); - GetCpuResource()->set(size, reinterpret_cast(const_cast(pData))); + CpuTensor()->set(size, reinterpret_cast(const_cast(pData))); } // Specialized version to convert hstring to string @@ -933,17 +931,16 @@ struct TensorBase : TBase { // Ensure that the Set APIs are only called when there is 1 buffer. // These APIs are only called when the tensor is being constructed from various collection and pointer public APIs. // They should always be backed by a single underlying buffer. - FAIL_FAST_HR_IF(E_ILLEGAL_METHOD_CALL, GetCpuResource()->num_buffers() != 1); + FAIL_FAST_HR_IF(E_ILLEGAL_METHOD_CALL, CpuTensor()->num_buffers() != 1); - auto number_of_elements = GetCpuResource()->number_of_elements(); - auto buffer = GetCpuResource()->buffer(); - THROW_HR_IF(E_UNEXPECTED, data.size() > number_of_elements); + auto string_data = std::static_pointer_cast<_winml::string_data>(CpuTensor()->get_data()); + auto& string_vector = string_data->get_backing_vector(); - auto element_data = static_cast(buffer.data()); + THROW_HR_IF(E_UNEXPECTED, data.size() > string_vector.size()); // Convert and copy into the underlying buffer std::transform( - data.begin(), data.end(), element_data, + data.begin(), data.end(), std::begin(string_vector), [](auto& element) mutable { return _winml::Strings::UTF8FromHString(element); }); @@ -962,9 +959,9 @@ struct TensorBase : TBase { // Ensure that the Set APIs are only called when there is 1 buffer. // These APIs are only called when the tensor is being constructed from various collection and pointer public APIs. // They should always be backed by a single underlying buffer. - FAIL_FAST_HR_IF(E_ILLEGAL_METHOD_CALL, GetCpuResource()->num_buffers() != 1); + FAIL_FAST_HR_IF(E_ILLEGAL_METHOD_CALL, CpuTensor()->num_buffers() != 1); - auto buffer = GetCpuResource()->buffer(); + auto buffer = CpuTensor()->buffer(); auto element_data = static_cast(buffer.data()); // This method accepts data as an IVectorView. @@ -983,9 +980,9 @@ struct TensorBase : TBase { // Ensure that the Set APIs are only called when there is 1 buffer. // These APIs are only called when the tensor is being constructed from various collection and pointer public APIs. // They should always be backed by a single underlying buffer. - FAIL_FAST_HR_IF(E_ILLEGAL_METHOD_CALL, GetCpuResource()->num_buffers() != 1); + FAIL_FAST_HR_IF(E_ILLEGAL_METHOD_CALL, CpuTensor()->num_buffers() != 1); - auto buffer = GetCpuResource()->buffer(); + auto buffer = CpuTensor()->buffer(); auto element_data = static_cast<_winml::Half*>(buffer.data()); // Now that we take in IIterables and not vector views @@ -1009,9 +1006,9 @@ struct TensorBase : TBase { // Ensure that the Set APIs are only called when there is 1 buffer. // These APIs are only called when the tensor is being constructed from various collection and pointer public APIs. // They should always be backed by a single underlying buffer. - FAIL_FAST_HR_IF(E_ILLEGAL_METHOD_CALL, GetCpuResource()->num_buffers() != 1); + FAIL_FAST_HR_IF(E_ILLEGAL_METHOD_CALL, CpuTensor()->num_buffers() != 1); - auto buffer = GetCpuResource()->buffer(); + auto buffer = CpuTensor()->buffer(); auto element_data = static_cast(buffer.data()); std::transform(begin(data), end(data), element_data, [](auto element) { return static_cast(element); }); } @@ -1026,39 +1023,39 @@ struct TensorBase : TBase { // Ensure that the Set APIs are only called when there is 1 buffer. // These APIs are only called when the tensor is being constructed from various collection and pointer public APIs. // They should always be backed by a single underlying buffer. - FAIL_FAST_HR_IF(E_ILLEGAL_METHOD_CALL, GetCpuResource()->num_buffers() != 1); + FAIL_FAST_HR_IF(E_ILLEGAL_METHOD_CALL, CpuTensor()->num_buffers() != 1); - auto buffer = GetCpuResource()->buffer(); - auto element_data = static_cast(buffer.data()); + auto string_data = std::static_pointer_cast<_winml::string_data>(CpuTensor()->get_data()); + auto& string_vector = string_data->get_backing_vector(); // Convert and copy into the underlying buffer - std::transform(begin(data), end(data), element_data, [](const auto& element) { + std::transform(begin(data), end(data), std::begin(string_vector), [](const auto& element) { return _winml::Strings::UTF8FromHString(element); }); } - std::shared_ptr<_winml::Tensor>& GetCpuResource() { + std::shared_ptr<_winml::Tensor>& CpuTensor() { WINML_THROW_HR_IF_NULL_MSG( E_ILLEGAL_METHOD_CALL, - m_resources, + resources_, "The tensor has been closed and its resources are detached!"); - return m_resources->CpuResource; + return resources_->cpu_resource_; } - winrt::com_ptr& GetGpuResource() { + winrt::com_ptr& GpuTensor() { WINML_THROW_HR_IF_NULL_MSG( E_ILLEGAL_METHOD_CALL, - m_resources, + resources_, "The tensor has been closed and its resources are detached!"); - return m_resources->GpuResource; + return resources_->gpu_resource_; } private: std::vector shape_; - std::shared_ptr> m_resources; - std::vector>> m_outstandingReferences; + std::shared_ptr> resources_; + std::vector>> outstanding_references_; bool m_isClosed = false; }; diff --git a/winml/lib/Api/impl/TensorBuffer.h b/winml/lib/Api/impl/TensorBuffer.h deleted file mode 100644 index 5d9611cdc5..0000000000 --- a/winml/lib/Api/impl/TensorBuffer.h +++ /dev/null @@ -1,234 +0,0 @@ -// Copyright (c) Microsoft Corporation. All rights reserved. -// Licensed under the MIT License. - -#pragma once - -#include "robuffer.h" -#include "winrt/Windows.Storage.Streams.h" -#include "DisjointBufferHelpers.h" - -namespace _winml { - -class VectorBuffer : public winrt::implements< - VectorBuffer, - wss::IBuffer, - Windows::Storage::Streams::IBufferByteAccess> { - public: - VectorBuffer(size_t size) : buffer_(size) {} - - uint32_t Capacity() const { - return static_cast(buffer_.size()); - } - - uint32_t Length() const { - throw winrt::hresult_error(E_NOTIMPL); - } - - void Length(uint32_t /*value*/) { - throw winrt::hresult_error(E_NOTIMPL); - } - - STDMETHOD(Buffer) - (uint8_t** value) { - RETURN_HR_IF_NULL(E_POINTER, value); - *value = buffer_.data(); - return S_OK; - } - - private: - std::vector buffer_; -}; - -template -class TensorBuffer { - wss::IBuffer combined_buffer_; - std::vector buffers_; - size_t size_; - - TensorBuffer(size_t size) : - size_(size), - combined_buffer_(winrt::make(size * sizeof(T))), - buffers_ { combined_buffer_ } { - auto buffer = BufferAt(0); - - // The initial release of WinML (RS5) shipped with behavior that would - // zero-initialize uninitialized tensors. After measuring, the performance impact - // of memsetting the memory buffer is quite small (<1ms for 3channel 720x720 TensorFloats). - // To maintain parity with RS5 behavior, we always zero out the memory buffer. - memset(buffer.data(), 0, buffer.size_bytes()); - } - - TensorBuffer( - size_t size, - wfc::IIterable const& buffers) : size_(size), - combined_buffer_(nullptr), - buffers_(begin(buffers), end(buffers)) { - if (buffers_.size() == 1) { - combined_buffer_ = buffers_[0]; - } else { - // If there are many buffers, then the combined buffer will be a separately allocated value that combines all of the buffers. - // This needs to be lazily done however, as the extra memory should not be allocated when not needed (GPU). - } - } - - auto CombinedBuffer() { - if (combined_buffer_ == nullptr) { - combined_buffer_ = winrt::make(size_ * sizeof(T)); - } - return BufferFrom(combined_buffer_); - } - - public: - static auto Create(size_t size) { - return std::shared_ptr(new TensorBuffer(size)); - } - - static auto Create( - size_t size, - wss::IBuffer buffer) { - return std::shared_ptr(new TensorBuffer(size, buffer)); - } - - static auto Create( - size_t size, - wfc::IIterable const& buffers) { - return std::shared_ptr(new TensorBuffer(size, buffers)); - } - - auto NumElements() { - return size_; - } - - auto SizeInBytes() { - return size_ * sizeof(T); - } - - auto NumBuffers() { - return buffers_.size(); - } - - auto& Buffers() { - return buffers_; - } - - auto Buffer(bool should_sync_buffer) { - if (buffers_.size() == 1) { - // Single buffer optimization to not create a temporary buffer that concatenates disjoint buffers into one. - return BufferAt(0); - } - auto span = CombinedBuffer(); - if (should_sync_buffer) { - _winml::LoadOrStoreDisjointBuffers( - true /*load buffer*/, - buffers_.size(), - [this](size_t i) { return BufferAt(i); }, - span); - } - - return span; - } - - auto Flush() { - auto should_flush = buffers_.size() != 1; - if (should_flush) { - auto span = CombinedBuffer(); - _winml::LoadOrStoreDisjointBuffers( - false /*store buffer*/, - buffers_.size(), - [this](size_t i) { return BufferAt(i); }, - span); - } - return should_flush; - } - - auto Set(size_t size_in_bytes, const T* data) { - WINML_THROW_HR_IF_FALSE_MSG( - E_INVALIDARG, - size_in_bytes <= (size_ * sizeof(T)), - "Argument size (%llu) exceeds the tensor size (%llu).", - static_cast(size_in_bytes), - static_cast(size_ * sizeof(T))); - - gsl::span span(reinterpret_cast(const_cast(data)), size_in_bytes); - _winml::LoadOrStoreDisjointBuffers( - false /*store buffer*/, - buffers_.size(), - [this](size_t i) { return BufferAt(i); }, - span); - } - - auto Set(std::vector&& moveableData) { - Set(moveableData.size() * sizeof(T), moveableData.data()); - } - - private: - auto BufferFrom(wss::IBuffer buffer) { - byte* current_data = nullptr; - auto bufferByteAccess = buffer.as(); - bufferByteAccess->Buffer(¤t_data); - return gsl::span( - current_data, - static_cast(buffer.Capacity())); - } - - auto BufferAt(size_t index) { - return BufferFrom(buffers_[index]); - } -}; - -template <> -class TensorBuffer { - std::vector buffer_; - - TensorBuffer(size_t size) : buffer_(size) {} - - public: - static auto Create(size_t size) { - return std::shared_ptr(new TensorBuffer(size)); - } - - auto NumElements() { - return buffer_.size(); - } - - auto SizeInBytes() { - return buffer_.size(); - } - - auto NumBuffers() { - return 1; - } - - auto Flush() { - return false; - } - - auto Buffers() -> std::vector& { - WINML_THROW_HR(E_UNEXPECTED); - } - - auto BufferAt(size_t index) { - WINML_THROW_HR_IF_FALSE_MSG( - E_INVALIDARG, - index == 0, - "TensorString can only be backed by a single buffer!"); - return gsl::span(reinterpret_cast(buffer_.data()), buffer_.size()); - } - - auto Buffer(bool /*should_sync_buffer*/) { - return BufferAt(0); - } - - auto Set(size_t size, std::string_view* data) { - WINML_THROW_HR_IF_FALSE_MSG( - E_INVALIDARG, - size <= buffer_.size(), - "Argument size (%d) exceeds the tensor size (%d).", - static_cast(size), - static_cast(buffer_.size())); - - // Copy - std::copy(data, data + size, buffer_.begin()); - } -}; -} // namespace _winml \ No newline at end of file diff --git a/winml/lib/Api/impl/TensorMemoryBufferReference.h b/winml/lib/Api/impl/TensorMemoryBufferReference.h index 61e5f5613c..dcf2f12a64 100644 --- a/winml/lib/Api/impl/TensorMemoryBufferReference.h +++ b/winml/lib/Api/impl/TensorMemoryBufferReference.h @@ -29,12 +29,12 @@ struct TensorResources { *capacity = 0; // Lazily allocate the cpu resource on call to GetBuffer - if (CpuResource == nullptr) { - CpuResource = std::make_shared<_winml::Tensor>(shape); + if (cpu_resource_ == nullptr) { + cpu_resource_ = std::make_shared<_winml::Tensor>(shape); } // Get the data pointer and size - auto buffer = CpuResource->buffer(); + auto buffer = cpu_resource_->buffer(); // Set out parameters *capacity = static_cast(buffer.size_bytes()); @@ -45,8 +45,8 @@ struct TensorResources { } // Theses are access directly by TensorMemoryBufferReference and TensorBase - std::shared_ptr<_winml::Tensor> CpuResource; - winrt::com_ptr GpuResource; + std::shared_ptr<_winml::Tensor> cpu_resource_; + winrt::com_ptr gpu_resource_; }; // This class holds onto the lifetime of TensorResources so that they can be kept alive by TensorBase AND its active MBRs.