// Copyright (c) Microsoft Corporation. All rights reserved. // Licensed under the MIT License. #pragma once #include "robuffer.h" #include "winrt/Windows.Storage.Streams.h" #include "DisjointBufferHelpers.h" namespace _winml { class VectorBuffer : public winrt::implements< VectorBuffer, wss::IBuffer, Windows::Storage::Streams::IBufferByteAccess> { public: VectorBuffer(size_t size) : buffer_(size) {} uint32_t Capacity() const { return static_cast(buffer_.size()); } uint32_t Length() const { throw winrt::hresult_error(E_NOTIMPL); } void Length(uint32_t /*value*/) { throw winrt::hresult_error(E_NOTIMPL); } STDMETHOD(Buffer) (uint8_t** value) { RETURN_HR_IF_NULL(E_POINTER, value); *value = buffer_.data(); return S_OK; } private: std::vector buffer_; }; template class TensorBuffer { wss::IBuffer combined_buffer_; std::vector buffers_; size_t size_; TensorBuffer(size_t size) : size_(size), combined_buffer_(winrt::make(size * sizeof(T))), buffers_ { combined_buffer_ } { auto buffer = BufferAt(0); // The initial release of WinML (RS5) shipped with behavior that would // zero-initialize uninitialized tensors. After measuring, the performance impact // of memsetting the memory buffer is quite small (<1ms for 3channel 720x720 TensorFloats). // To maintain parity with RS5 behavior, we always zero out the memory buffer. memset(buffer.data(), 0, buffer.size_bytes()); } TensorBuffer( size_t size, wfc::IIterable const& buffers) : size_(size), combined_buffer_(nullptr), buffers_(begin(buffers), end(buffers)) { if (buffers_.size() == 1) { combined_buffer_ = buffers_[0]; } else { // If there are many buffers, then the combined buffer will be a separately allocated value that combines all of the buffers. // This needs to be lazily done however, as the extra memory should not be allocated when not needed (GPU). } } auto CombinedBuffer() { if (combined_buffer_ == nullptr) { combined_buffer_ = winrt::make(size_ * sizeof(T)); } return BufferFrom(combined_buffer_); } public: static auto Create(size_t size) { return std::shared_ptr(new TensorBuffer(size)); } static auto Create( size_t size, wss::IBuffer buffer) { return std::shared_ptr(new TensorBuffer(size, buffer)); } static auto Create( size_t size, wfc::IIterable const& buffers) { return std::shared_ptr(new TensorBuffer(size, buffers)); } auto NumElements() { return size_; } auto SizeInBytes() { return size_ * sizeof(T); } auto NumBuffers() { return buffers_.size(); } auto& Buffers() { return buffers_; } auto Buffer(bool should_sync_buffer) { if (buffers_.size() == 1) { // Single buffer optimization to not create a temporary buffer that concatenates disjoint buffers into one. return BufferAt(0); } auto span = CombinedBuffer(); if (should_sync_buffer) { _winml::LoadOrStoreDisjointBuffers( true /*load buffer*/, buffers_.size(), [this](size_t i) { return BufferAt(i); }, span); } return span; } auto Flush() { auto should_flush = buffers_.size() != 1; if (should_flush) { auto span = CombinedBuffer(); _winml::LoadOrStoreDisjointBuffers( false /*store buffer*/, buffers_.size(), [this](size_t i) { return BufferAt(i); }, span); } return should_flush; } auto Set(size_t size_in_bytes, const T* data) { WINML_THROW_HR_IF_FALSE_MSG( E_INVALIDARG, size_in_bytes <= (size_ * sizeof(T)), "Argument size (%llu) exceeds the tensor size (%llu).", static_cast(size_in_bytes), static_cast(size_ * sizeof(T))); gsl::span span(reinterpret_cast(const_cast(data)), size_in_bytes); _winml::LoadOrStoreDisjointBuffers( false /*store buffer*/, buffers_.size(), [this](size_t i) { return BufferAt(i); }, span); } auto Set(std::vector&& moveableData) { Set(moveableData.size() * sizeof(T), moveableData.data()); } private: auto BufferFrom(wss::IBuffer buffer) { byte* current_data = nullptr; auto bufferByteAccess = buffer.as(); bufferByteAccess->Buffer(¤t_data); return gsl::span( current_data, static_cast(buffer.Capacity())); } auto BufferAt(size_t index) { return BufferFrom(buffers_[index]); } }; template <> class TensorBuffer { std::vector buffer_; TensorBuffer(size_t size) : buffer_(size) {} public: static auto Create(size_t size) { return std::shared_ptr(new TensorBuffer(size)); } auto NumElements() { return buffer_.size(); } auto SizeInBytes() { return buffer_.size(); } auto NumBuffers() { return 1; } auto Flush() { return false; } auto Buffers() -> std::vector& { WINML_THROW_HR(E_UNEXPECTED); } auto BufferAt(size_t index) { WINML_THROW_HR_IF_FALSE_MSG( E_INVALIDARG, index == 0, "TensorString can only be backed by a single buffer!"); return gsl::span(reinterpret_cast(buffer_.data()), buffer_.size()); } auto Buffer(bool /*should_sync_buffer*/) { return BufferAt(0); } auto Set(size_t size, std::string_view* data) { WINML_THROW_HR_IF_FALSE_MSG( E_INVALIDARG, size <= buffer_.size(), "Argument size (%d) exceeds the tensor size (%d).", static_cast(size), static_cast(buffer_.size())); // Copy std::copy(data, data + size, buffer_.begin()); } }; } // namespace _winml