// Copyright (c) Microsoft Corporation. All rights reserved. // Licensed under the MIT License. #pragma once #pragma warning(push) #pragma warning(disable : 6387) #ifdef _WIN32 #pragma warning(push) #pragma warning(disable : 26814) #endif #include "LearningModelBinding.h" #include "LearningModelDevice.h" #include "LearningModelSession.h" #include "TensorKindFrom.h" #include "TensorMemoryBufferReference.h" #include "core/session/onnxruntime_c_api.h" #include "ConverterResourceStore.h" #include "VideoFrameToTensorConverter.h" #include "TensorToVideoFrameConverter.h" #include "D3DDeviceCache.h" namespace _winml { // TensorBase // // This is the base class for all data based Tensor types. It exposes array and IVectorView // based getter and setters. // // Look in FeatureValue.h to see where all of them actually get created with CREATE_TENSOR() // // Supported derived classes: // Float, Int8, UInt8, UInt16, Int16, Int32, Int64, Boolean, Double, UInt32, UInt64 // // Unsupported types // Float16 and String have different access patterns and Int8, Complex64, Complex128 are unsupported // template struct TensorBase : TBase { template static void ASSERT_TEMPLATE_PARAMETERS() { // This adds compile time checks that ensure that the API can only be called when: // 1) the first template parameter matches the internal type (T), // since the api attempts copy the tensor memory of type T into a vector of type ElementType. // 2) the second template parameter matches the return type static_assert( std::is_same::value, "This API can only be called with template parameters that match its internal data type T." ); static_assert( std::is_same::value, "This API can only be called with template parameters that match its internal data type T." ); } template static void ASSERT_TEMPLATE_PARAMETERS_EXACT() { // This adds compile time checks that ensure that the API can only be called when: // 1) the conditions of ASSERT_TEMPLATE_PARAMETERS() are met. // 2) the ABI type (ViewT) matches the internal type (t). ASSERT_TEMPLATE_PARAMETERS(); static_assert( std::is_same::value, "This API can only be called with matching T and ViewT. Explicit specialization is required." ); } /// On creation, tensors can either: /// 1) act as a placeholder without any backing memory (output tensors, chained values). In this case we /// create the backing memory when the buffer is accessed. The buffer is allocated one of there scenarios: /// GPUTensorize during binding (used to create DML resources for chaining) /// UpdateSourceResourceData after eval (used for output placeholder tensors or unbound outputs) /// GetBuffer when accessed by users /// a) TensorBase() /// 2) allocate backing cpu memory (when a shape is provided) /// a) TensorBase(std::vector const& shape) /// b) TensorBase(winrt::Windows::Foundation::Collections::IIterable const& shape) /// 3) use provided backing gpu memory /// a) TensorBase(std::vector const& shape, ID3D12Resource* pResource) TensorBase() : resources_(std::make_shared>()) {} TensorBase(wfc::IIterable const& shape) : shape_(begin(shape), end(shape)), resources_(std::make_shared>()) { CpuTensor() = std::make_shared<_winml::Tensor>(shape_); } TensorBase(std::vector const& shape) : shape_(shape), resources_(std::make_shared>()) { CpuTensor() = std::make_shared<_winml::Tensor>(shape_); } TensorBase(std::vector const& shape, ID3D12Resource* resource) : shape_(shape), resources_(std::make_shared>()) { // This Api is not supported for TensorString WINML_THROW_HR_IF_TRUE_MSG( E_ILLEGAL_METHOD_CALL, (std::is_same::value), "TensorString objects cannot be created from a ID3D12Resource!" ); GpuTensor().copy_from(resource); } HRESULT CreateGPUMLValue(ID3D12Resource* resource, BindingContext& context, IValue** out) { THROW_HR_IF_NULL(E_INVALIDARG, resource); auto session = context.session.as(); auto device = session->Device().as(); WINML_THROW_HR_IF_TRUE_MSG( WINML_ERR_INVALID_BINDING, device->IsCpuDevice(), "Cannot create GPU tensor on CPU device" ); auto engine = session->GetEngine(); RETURN_IF_FAILED( engine->CreateTensorValueFromExternalD3DResource(resource, shape_.data(), shape_.size(), TensorKind(), out) ); return S_OK; } HRESULT CPUTensorize(_winml::BindingContext& context, IValue** out) { auto session = context.session.as(); auto engine = session->GetEngine(); auto should_sync_buffer = context.type == _winml::BindingType::kInput; if (CpuTensor() != nullptr) { return CreateTensorValueFromExternalBuffer(engine, should_sync_buffer, out); } // If there is no matching cpu resource, then fallback to a gpu resource if (GpuTensor() != nullptr) { return CreateGPUMLValue(GpuTensor().get(), context, out); } WINML_THROW_HR(WINML_ERR_INVALID_BINDING); } HRESULT GPUTensorize(_winml::BindingContext& context, IValue** out) { if (GpuTensor() != nullptr) { return CreateGPUMLValue(GpuTensor().get(), context, out); } // Get engine auto session = context.session.as(); auto device = session->Device().as(); auto engine = session->GetEngine(); auto should_sync_buffer = context.type == _winml::BindingType::kInput; // If there is no matching gpu resource, then fallback to a cpu resource if (CpuTensor() != nullptr) { auto num_backing_buffers = CpuTensor()->num_buffers(); if (num_backing_buffers == 1) { // If we have a single backing cpu buffer, there is no need to create GPU resources. // The engine will use the buffer provided, and perform the needed copies into the GPU context as needed. return CreateTensorValueFromExternalBuffer(engine, should_sync_buffer, out); } else { // If we have multiple backing cpu buffers, then... if (context.type == _winml::BindingType::kInput) { // If we are binding inputs, then a GPU resource needs to be allocated, and individual buffer contents need // to be copied directly into a gpu resource. if (GpuTensor() == nullptr) { GpuTensor() = CreateD3D12Resource(session); } _winml::ConverterResourceDescription descriptor = {}; descriptor.pixel_format = static_cast(wgdx::DirectXPixelFormat::Unknown); descriptor.width = static_cast(CpuTensor()->size_in_bytes()); descriptor.height = static_cast(1); descriptor.luid = device->GetD3DDevice()->GetAdapterLuid(); // Converted image on GPU context.converter = _winml::PoolObjectWrapper::Create(device->TensorizerStore()->Fetch(descriptor)); context.converter->Get()->Tensorizer->ConvertBuffersToBatchedGPUTensor( CpuTensor()->buffers(), CpuTensor()->size_in_bytes(), *device->GetD3DDeviceCache(), GpuTensor().get() ); return CreateGPUMLValue(GpuTensor().get(), context, out); } else if (context.type == _winml::BindingType::kOutput) { // If we are binding outputs, then the buffers do not need to bound. If the engine produces a output on the gpu // we already have a detensorize path which will need to copy the allocated output d3d12resource // into the output buffers without temporary intermediary buffers! No binding here is necessary. // If the output produces a cpu buffer (even in the GPU case), we will already have a cpu buffer, and just need // to copy back to the output buffers, no binding is necessary. GpuTensor() = CreateD3D12Resource(session); return CreateGPUMLValue(GpuTensor().get(), context, out); } } } if (TensorKind() == winml::TensorKind::String) { // Lazily allocate the cpu TensorString resource // TensorStrings are CPU only, and so a gpu resource cannot be allocated for them. CpuTensor() = std::make_shared<_winml::Tensor>(shape_); return CreateTensorValueFromExternalBuffer(engine, should_sync_buffer, out); } else { GpuTensor() = CreateD3D12Resource(session); return CreateGPUMLValue(GpuTensor().get(), context, out); } } winrt::com_ptr CreateD3D12Resource(winrt::com_ptr session) { // Try to allocate the backing memory for the caller auto bufferSize = std::accumulate(std::begin(shape_), std::end(shape_), static_cast(1), std::multiplies()); auto bufferByteSize = sizeof(T) * bufferSize; // DML needs the resources' sizes to be a multiple of 4 bytes if (bufferByteSize % 4 != 0) { bufferByteSize += 4 - (bufferByteSize % 4); } D3D12_HEAP_PROPERTIES heapProperties = { D3D12_HEAP_TYPE_DEFAULT, D3D12_CPU_PAGE_PROPERTY_UNKNOWN, D3D12_MEMORY_POOL_UNKNOWN, 0, 0 }; D3D12_RESOURCE_DESC resourceDesc = { D3D12_RESOURCE_DIMENSION_BUFFER, 0, static_cast(bufferByteSize), 1, 1, 1, DXGI_FORMAT_UNKNOWN, {1, 0}, D3D12_TEXTURE_LAYOUT_ROW_MAJOR, D3D12_RESOURCE_FLAG_ALLOW_UNORDERED_ACCESS }; auto device = session->Device().as(); winrt::com_ptr gpu_resource = nullptr; device->GetD3DDevice()->CreateCommittedResource( &heapProperties, D3D12_HEAP_FLAG_NONE, &resourceDesc, D3D12_RESOURCE_STATE_COMMON, nullptr, __uuidof(ID3D12Resource), gpu_resource.put_void() ); return gpu_resource; } void EnsureBufferNotInUse() { auto isBufferInUse = std::any_of(outstanding_references_.begin(), outstanding_references_.end(), [](auto weakRef) { return weakRef.get() != nullptr; }); WINML_THROW_HR_IF_TRUE_MSG( WINML_ERR_INVALID_BINDING, isBufferInUse, "The tensor has outstanding memory buffer references that must be closed prior to evaluation!" ); } // ILotusValueProviderPrivate::GetOrtValue STDMETHOD(GetValue) (_winml::BindingContext& context, IValue** out) { RETURN_HR_IF_NULL_MSG( WINML_ERR_INVALID_BINDING, resources_, "The tensor has been closed and its resources have been detached!" ); EnsureBufferNotInUse(); auto spSession = context.session.as(); auto spDevice = spSession->Device().as(); if (spDevice->IsCpuDevice()) { RETURN_IF_FAILED(CPUTensorize(context, out)); } else { RETURN_IF_FAILED(GPUTensorize(context, out)); } return S_OK; } static int64_t ShapeSize(std::vector shape) { // for each dim int64_t size = 1; for (size_t i = 0; i < shape.size(); i++) { // find out it's total size size *= shape[i]; // make sure there are no invalid dimensions (-1 or any invalid shape) THROW_HR_IF(E_INVALIDARG, shape[i] <= 0); } return size; } template void SetBufferFromValueResourceBuffer(size_t size, void* data) { // This adds compile time checks that ensure that the API can only be called when // the conditions of ASSERT_TEMPLATE_PARAMETERS_EXACT() are met. ASSERT_TEMPLATE_PARAMETERS(); CpuTensor()->set(size, reinterpret_cast(data)); } template <> void SetBufferFromValueResourceBuffer(size_t size, void* data) { // Ensure that this call is being called with the correct template parameters ASSERT_TEMPLATE_PARAMETERS(); auto string_data = std::static_pointer_cast<_winml::string_data>(CpuTensor()->get_data()); string_data->set(size, reinterpret_cast(data)); } template HRESULT CreateTensorValueFromExternalBuffer(_winml::IEngine* engine, bool sync_buffer, IValue** value) { // This adds compile time checks that ensure that the API can only be called when // the conditions of ASSERT_TEMPLATE_PARAMETERS_EXACT() are met. ASSERT_TEMPLATE_PARAMETERS(); RETURN_IF_FAILED_MSG( engine->CreateTensorValueFromExternalBuffer( CpuTensor()->buffer(sync_buffer).data(), CpuTensor()->size_in_bytes(), CpuTensor()->shape().data(), CpuTensor()->shape().size(), TensorKind(), value ), "Failed to prepare buffer for copy back from device resource." ); return S_OK; } template <> HRESULT CreateTensorValueFromExternalBuffer( _winml::IEngine* engine, bool /*sync_buffer*/, IValue** value ) { // Ensure that this call is being called with the correct template parameters ASSERT_TEMPLATE_PARAMETERS(); auto string_data = std::static_pointer_cast<_winml::string_data>(CpuTensor()->get_data()); auto& string_vector = string_data->get_backing_vector(); std::vector raw_values; std::transform(std::begin(string_vector), std::end(string_vector), std::back_inserter(raw_values), [&](auto& str) { return str.c_str(); }); RETURN_IF_FAILED_MSG( engine->CreateStringTensorValueFromDataWithCopy( raw_values.data(), raw_values.size(), CpuTensor()->shape().data(), CpuTensor()->shape().size(), value ), "Failed to prepare buffer for copy back from device resource." ); return S_OK; } bool GetDisableTensorCpuSyncFromMetadata(const wfc::IPropertySet& properties) { if (properties != nullptr && properties.HasKey(L"DisableTensorCpuSync")) { if (auto disableTensorCpuSyncInspectable = properties.Lookup(L"DisableTensorCpuSync")) { auto disableTensorCpuSyncValue = disableTensorCpuSyncInspectable.as(); return disableTensorCpuSyncValue.GetBoolean(); } } return false; } // ILotusValueProviderPrivate::UpdateSourceResourceData STDMETHOD(UpdateSourceResourceData) (BindingContext& context, IValue* value) { RETURN_HR_IF_NULL_MSG( E_ILLEGAL_METHOD_CALL, resources_, "The tensor has been closed and its resources have been detached during evaluation!" ); _winml::Resource updated_resource; RETURN_IF_FAILED(value->GetResource(updated_resource)); // get the shape RETURN_IF_FAILED_MSG(value->GetTensorShape(shape_), "Failed to get the tensor shape from resource!"); bool is_cpu; bool isCpuOutput = SUCCEEDED(value->IsCpu(&is_cpu)) && is_cpu; bool disableTensorCpuSyncProperty = GetDisableTensorCpuSyncFromMetadata(context.properties); bool disableCpuSync = !isCpuOutput && disableTensorCpuSyncProperty; // make sure we always have a CPU resource if (!disableCpuSync && CpuTensor() == nullptr) { CpuTensor() = std::make_shared<_winml::Tensor>(shape_); } if (isCpuOutput) { // Get the data pointer and size auto buffer = CpuTensor()->buffer(false); if (updated_resource.get() != reinterpret_cast(buffer.data())) { // Only copy the data if the source and destination are not the same! // The engine provided buffer will not match the tensor buffer when // the tensor is created as a placeholder output, or as an unbound output. auto shape_size = static_cast(ShapeSize(shape_)); SetBufferFromValueResourceBuffer(shape_size, updated_resource.get()); } else { // If the engine wrote to the data directly, it is possible that the underlying data was held by many buffers // In that case the underlying buffers will not match the engine output, and they need to be flushed. CpuTensor()->flush(); } } else if (!disableCpuSync) { // If we got a gpu resource, we should move the data to the cpu so accessors can retrieve the data. // We don't need to copy the engine provided dx resource into a local copy since we always preallocate gpu // resources for tensors. Therefore we are certain that the returned dxresource is the same as the one we passed in // and was updated in place. auto session = context.session.as(); auto device = session->Device().as(); auto engine = session->GetEngine(); if (CpuTensor()->num_buffers() == 1) { winrt::com_ptr dest; RETURN_IF_FAILED_MSG( CreateTensorValueFromExternalBuffer(engine, false, dest.put()), "Failed to prepare buffer for copy back from device resource." ); RETURN_IF_FAILED(engine->CopyValueAcrossDevices(value, dest.get())); } else { auto buffer_size_in_bytes = static_cast(ShapeSize(shape_)) * sizeof(T); _winml::ConverterResourceDescription descriptor = {}; descriptor.pixel_format = static_cast(wgdx::DirectXPixelFormat::Unknown); descriptor.luid = device->GetD3DDevice()->GetAdapterLuid(); // Converted image on GPU auto pooled_converter = _winml::PoolObjectWrapper::Create(device->DetensorizerStore()->Fetch(descriptor)); auto d3dResource = reinterpret_cast(updated_resource.get()); pooled_converter->Get()->Detensorizer->ConvertBatchedDX12TensorToBuffers( d3dResource, buffer_size_in_bytes, *device->GetD3DDeviceCache(), CpuTensor()->buffers() ); // Reset the Allocator before return to the Cache. Must Sync this background thread to that completion before we do. device->GetD3DDeviceCache()->SyncD3D12ToCPU(); pooled_converter->Get()->Detensorizer->ResetAllocator(); } } // release any converter resources and return to the pool context.converter = nullptr; return S_OK; } /// /// Tensor Creation Patterns /// // ITensor::Create static typename TBase::class_type Create() try { return winrt::make(); } WINML_CATCH_ALL // ITensor::Create static typename TBase::class_type Create(wfc::IIterable const& shape) try { typename TBase::class_type tensorValue = winrt::make(); auto tensorValueImpl = tensorValue.as(); tensorValueImpl->shape_ = std::vector(begin(shape), end(shape)); return tensorValue; } WINML_CATCH_ALL // ITensor::CreateFromIterable static typename TBase::class_type CreateFromIterable( wfc::IIterable shape, wfc::IIterable const& data ) try { std::vector vecShape(begin(shape), end(shape)); if (HasFreeDimensions(vecShape)) { // If the tensor is being created with a free dimension, the data needs to // provide its actual size so that the free dimension can be computed. // In the case of IIterable, there is no Size accessor, and so we require that // in this case the underlying object also implement IVectorView, so that we may // efficiently query the size of the data. if (auto vectorView = data.try_as>()) { vecShape = GetAdjustedShape(vecShape, vectorView.Size()); } } typename TBase::class_type tensorValue = winrt::make(vecShape); auto tensorValueImpl = tensorValue.as(); tensorValueImpl->SetBufferFromIterable(data); return tensorValue; } WINML_CATCH_ALL // ITensor::CreateFromArray static typename TBase::class_type CreateFromArray( wfc::IIterable shape, winrt::array_view data ) try { std::vector vecShape(begin(shape), end(shape)); return CreateFromArrayInternal(vecShape, data); } WINML_CATCH_ALL // ITensor::CreateFromShapeArrayAndDataArray static typename TBase::class_type CreateFromShapeArrayAndDataArray( winrt::array_view shape, winrt::array_view data ) try { std::vector vecShape(shape.begin(), shape.end()); return CreateFromArrayInternal(vecShape, data); } WINML_CATCH_ALL static typename TBase::class_type CreateFromArrayInternal( std::vector shape, winrt::array_view data ) { if (HasFreeDimensions(shape)) { shape = GetAdjustedShape(shape, data.size()); } typename TBase::class_type tensorValue = winrt::make(shape); auto tensorValueImpl = tensorValue.as(); tensorValueImpl->SetBufferFromArray(data); return tensorValue; } // ITensor::CreateFromBuffer static typename TBase::class_type CreateFromBuffer( winrt::array_view shape, wss::IBuffer const& buffer ) try { std::vector vec_shape(std::begin(shape), std::end(shape)); auto buffers = winrt::single_threaded_vector(); buffers.Append(buffer); return TensorBase::CreateFromBatchedBuffersInternal(vec_shape, buffers); } WINML_CATCH_ALL // ITensor::CreateFromBatchedBuffers static typename TBase::class_type CreateFromBatchedBuffers( wfc::IIterable shape, wfc::IIterable const& buffers ) try { std::vector vec_shape(begin(shape), end(shape)); return TensorBase::CreateFromBatchedBuffersInternal(vec_shape, buffers); } WINML_CATCH_ALL // ITensor::CreateFromBatchedBuffersInternal static typename TBase::class_type CreateFromBatchedBuffersInternal( std::vector shape, wfc::IIterable const& buffers ) { if (HasFreeDimensions(shape)) { // If the tensor is being created with a free dimension, the data needs to // provide its actual size so that the free dimension can be computed. // In the case of IIterable, there is no Size accessor, and so we require that // in this case the underlying object also implement IVectorView, so that we may // efficiently query the size of the data. auto size_in_bytes = 0; for (auto buffer : buffers) { size_in_bytes += buffer.Capacity(); } auto num_elements = size_in_bytes / sizeof(T); shape = GetAdjustedShape(shape, num_elements); } typename TBase::class_type tensorValue = winrt::make(); auto tensorValueImpl = tensorValue.as(); tensorValueImpl->SetBufferFromIterableOfBuffers(shape, buffers); return tensorValue; } // ITensorNative::CreateFromD3D12Resource static HRESULT CreateFromD3D12Resource(ID3D12Resource* value, __int64* shape, int shapeCount, IUnknown** result) { try { // make sure they gave us a valid shape THROW_HR_IF(E_INVALIDARG, shape == nullptr); THROW_HR_IF(E_INVALIDARG, shapeCount == 0); // turn the shape into a vector<> std::vector shapeVector(shape, shape + shapeCount); // for each dim UINT64 width = ShapeSize(shapeVector) * sizeof(T); // make sure they gave us a valid value THROW_HR_IF(E_INVALIDARG, value == nullptr); // make sure it's a d3d12 buffer (!texture) auto desc = value->GetDesc(); THROW_HR_IF(E_INVALIDARG, desc.Dimension != D3D12_RESOURCE_DIMENSION_BUFFER); // make sure it's big enough THROW_HR_IF(E_INVALIDARG, desc.Width < width); // make the underlying winrt object typename TBase::class_type tensorValue = winrt::make(shapeVector, value); // return it (the caller owns the ref) *result = tensorValue.as().detach(); return S_OK; } WINML_CATCH_ALL_COM } static std::vector GetAdjustedShape(std::vector shape, uint64_t actualSize) { auto shapeSize = std::accumulate( std::begin(shape), std::end(shape), static_cast(1), [](const auto& accumulatedValue, const auto& next) { if (next == -1) { return accumulatedValue; } else { return accumulatedValue * next; } } ); THROW_HR_IF(E_INVALIDARG, actualSize % shapeSize != 0); auto foundIt = std::find_if(std::begin(shape), std::end(shape), [](auto dim) { return dim == -1; }); auto iFreeDimension = std::distance(std::begin(shape), foundIt); shape[iFreeDimension] = static_cast(actualSize / shapeSize); return shape; } static bool HasFreeDimensions(std::vector const& shape) { // Ensure that all dimension values are either -1, or positive auto unsupportedIt = std::find_if(begin(shape), end(shape), [](const auto& dim) { return dim < -1; }); THROW_HR_IF(E_INVALIDARG, unsupportedIt != end(shape)); auto nFreeDimensions = std::count(begin(shape), end(shape), -1); if (nFreeDimensions == 0) { return false; } else if (nFreeDimensions == 1) { return true; } else { throw winrt::hresult_invalid_argument(); } } /// /// Tensor Data Buffer Accessor APIs /// // IMemoryBuffer::CreateReference wf::IMemoryBufferReference CreateReference() try { // Ensure that CreateReference is only called when there is 1 buffer. WINML_THROW_HR_IF_TRUE_MSG( E_ILLEGAL_METHOD_CALL, CpuTensor() != nullptr && CpuTensor()->num_buffers() != 1, "A single buffer reference cannot be retrieved when the tensor is backed by multiple buffers!" ); // Create a TensorMemoryBufferReference // Per IMemoryBuffer.CreateReference (https://docs.microsoft.com/en-us/uwp/api/windows.foundation.imemorybuffer.createreference) // "This method always successfully returns a new IMemoryBufferReference object even after the IMemoryBuffer // "has been closed. In that case, the returned IMemoryBufferReference is already closed." // Creating a TensorMemoryBufferReference with a null pointer is equivalent to creating it as closed. auto memoryBufferReference = winrt::make>(shape_, resources_); // Create and cache a weak reference to the TensorMemoryBufferReference winrt::weak_ref> weak(memoryBufferReference.as>()); outstanding_references_.push_back(weak); // Return the strong ref to the caller return memoryBufferReference; } WINML_CATCH_ALL // IMemoryBuffer::Close void Close() try { // Let go of the lifetime of the resources, this is will indicate that the memorybuffer is closed resources_ = nullptr; } WINML_CATCH_ALL // ITensorNative::GetBuffer STDMETHOD(GetBuffer) (BYTE** value, UINT32* capacity) { // This Api is not supported for TensorString RETURN_HR_IF_MSG( ERROR_INVALID_FUNCTION, (std::is_same_v), "TensorString objects cannot return byte buffers!" ); RETURN_HR_IF_NULL_MSG( E_ILLEGAL_METHOD_CALL, resources_, "The tensor has been closed and its resources have been detached!" ); return resources_->GetBuffer(shape_, value, capacity); } // ITensorNative::GetD3D12Resource STDMETHOD(GetD3D12Resource) (ID3D12Resource** ppResource) { try { // This Api is not supported for TensorString RETURN_HR_IF(ERROR_INVALID_FUNCTION, (std::is_same::value)); RETURN_HR_IF_NULL_MSG( E_ILLEGAL_METHOD_CALL, resources_, "The tensor has been closed and its resources have been detached!" ); GpuTensor().copy_to(ppResource); return S_OK; } WINML_CATCH_ALL_COM } // ITensor::GetAsVectorView template wfc::IVectorView GetAsVectorView() try { // This adds compile time checks that ensure that the API can only be called when: // 1) the conditions of ASSERT_TEMPLATE_PARAMETERS_EXACT() are met. // 2) the signature of the method conforms to the ABI signature and the return value matches the ABI Return Type (ViewT). ASSERT_TEMPLATE_PARAMETERS_EXACT(); // This method returns the raw tensor data as an IVectorView. // This is a slow API that performs a buffer copy into a caller // owned IVectorView object. // Get the raw buffer pointer from the native tensor implementation. auto buffer = CpuTensor()->buffer(); auto element_data = static_cast(buffer.data()); // Copy data that will be passed back to caller. auto copy = std::vector(element_data, element_data + buffer.size()); // Create IVectorView from copied data. return winrt::single_threaded_vector(std::move(copy)).GetView(); } WINML_CATCH_ALL // Specialized version to convert float16 to float template <> wfc::IVectorView GetAsVectorView<_winml::Half, float>() try { // Ensure that this call is being called with the correct template parameters ASSERT_TEMPLATE_PARAMETERS<_winml::Half, float>(); auto buffer = CpuTensor()->buffer(); auto element_data = static_cast<_winml::Half*>(buffer.data()); // Copy the HALFs to floats std::vector float_value(buffer.size()); DirectX::PackedVector::XMConvertHalfToFloatStream( float_value.data(), sizeof(float) /* output stride */, reinterpret_cast(element_data), sizeof(_winml::Half) /* input stride */, buffer.size() ); // Create IVectorView from copied data. return winrt::single_threaded_vector(std::move(float_value)).GetView(); } WINML_CATCH_ALL // Specialized version to convert string to hstring template <> wfc::IVectorView GetAsVectorView() try { // Ensure that this call is being called with the correct template parameters ASSERT_TEMPLATE_PARAMETERS(); auto string_data = std::static_pointer_cast<_winml::string_data>(CpuTensor()->get_data()); auto& string_vector = string_data->get_backing_vector(); auto copy = std::vector(string_vector.size(), L""); std::generate(copy.begin(), copy.end(), [n = 0, &string_vector]() mutable { return _winml::Strings::HStringFromUTF8(string_vector[n++]); }); return winrt::single_threaded_vector(std::move(copy)).GetView(); } WINML_CATCH_ALL // Specialized version to convert int8_t to uint8_t template <> wfc::IVectorView GetAsVectorView() try { ASSERT_TEMPLATE_PARAMETERS(); auto buffer = CpuTensor()->buffer(); auto element_data = static_cast(buffer.data()); // Copy data that will be passed back to caller. gsl::span span(reinterpret_cast(element_data), buffer.size()); std::vector copy(span.begin(), span.begin() + buffer.size()); // Create IVectorView from copied data. return winrt::single_threaded_vector(std::move(copy)).GetView(); } WINML_CATCH_ALL /// /// Tensor Property Accessors /// // ILearningModelFeatureValue implementation winml::LearningModelFeatureKind Kind() try { return winml::LearningModelFeatureKind::Tensor; } WINML_CATCH_ALL // ITensor::TensorKind winml::TensorKind TensorKind() try { return TensorKindFrom::Type; } WINML_CATCH_ALL // ITensor::Shape wfc::IVectorView Shape() try { std::vector copy(shape_.cbegin(), shape_.cend()); return winrt::single_threaded_vector(std::move(copy)).GetView(); } WINML_CATCH_ALL // ILotusValueProviderPrivate::AbiRepresentation STDMETHOD(AbiRepresentation) (wf::IInspectable& abiRepresentation) { using ABIType = typename TBase::class_type; ABIType to = nullptr; RETURN_IF_FAILED(this->QueryInterface(winrt::guid_of(), reinterpret_cast(winrt::put_abi(to)))); to.as(abiRepresentation); return S_OK; } // ILotusValueProviderPrivate::IsPlaceholder STDMETHOD(IsPlaceholder) (bool* pIsPlaceHolder) { RETURN_HR_IF_NULL(E_POINTER, pIsPlaceHolder); RETURN_HR_IF_NULL_MSG( E_ILLEGAL_METHOD_CALL, resources_, "The tensor has been closed and its resources have been detached!" ); *pIsPlaceHolder = CpuTensor() == nullptr && GpuTensor() == nullptr; return S_OK; } private: /// /// SetBufferFromArray and parameterized specializations for MLFloat16, int8_t, and std::string /// template void SetBufferFromIterableOfBuffers(const std::vector& shape, wfc::IIterable const& buffers) { // This adds compile time checks that ensure that the API can only be called when // the conditions of ASSERT_TEMPLATE_PARAMETERS_EXACT() are met. ASSERT_TEMPLATE_PARAMETERS_EXACT(); shape_ = shape; CpuTensor() = std::make_shared<_winml::Tensor>(shape, buffers); } template <> void SetBufferFromIterableOfBuffers<_winml::Half, float>( const std::vector& shape, wfc::IIterable const& buffers ) { // Ensure that this call is being called with the correct template parameters ASSERT_TEMPLATE_PARAMETERS<_winml::Half, float>(); shape_ = shape; CpuTensor() = std::make_shared<_winml::Tensor>(shape, buffers); } template <> void SetBufferFromIterableOfBuffers( const std::vector& shape, wfc::IIterable const& buffers ) { // Ensure that this call is being called with the correct template parameters ASSERT_TEMPLATE_PARAMETERS(); shape_ = shape; CpuTensor() = std::make_shared<_winml::Tensor>(shape, buffers); } // Specialized version to convert hstring to string template <> void SetBufferFromIterableOfBuffers< std::string, winrt::hstring>(const std::vector& /*shape*/, wfc::IIterable const& /*buffers*/) { // Ensure that this call is being called with the correct template parameters ASSERT_TEMPLATE_PARAMETERS(); WINML_THROW_HR_IF_TRUE_MSG( E_ILLEGAL_METHOD_CALL, (std::is_same::value), "TensorString objects cannot be created from IBuffers!" ); } /// /// SetBufferFromArray and parameterized specializations for MLFloat16, int8_t, and std::string /// template void SetBufferFromArray(winrt::array_view data) { // This adds compile time checks that ensure that the API can only be called when // the conditions of ASSERT_TEMPLATE_PARAMETERS_EXACT() are met. ASSERT_TEMPLATE_PARAMETERS_EXACT(); // Ensure that the Set APIs are only called when there is 1 buffer. // These APIs are only called when the tensor is being constructed from various collection and pointer public APIs. // They should always be backed by a single underlying buffer. FAIL_FAST_HR_IF(E_ILLEGAL_METHOD_CALL, CpuTensor()->num_buffers() != 1); // This method accepts data as an array, T[], from the caller. // This is a non-destructive API, so the caller data is // left untouched, and the data is copied into internal buffers. CpuTensor()->set(data.size(), data.data()); } // Specialized version to convert floats to float16 template <> void SetBufferFromArray<_winml::Half, float>(winrt::array_view data) { // Ensure that this call is being called with the correct template parameters ASSERT_TEMPLATE_PARAMETERS<_winml::Half, float>(); // Ensure that the Set APIs are only called when there is 1 buffer. // These APIs are only called when the tensor is being constructed from various collection and pointer public APIs. // They should always be backed by a single underlying buffer. FAIL_FAST_HR_IF(E_ILLEGAL_METHOD_CALL, CpuTensor()->num_buffers() != 1); auto buffer = CpuTensor()->buffer(); auto element_data = static_cast<_winml::Half*>(buffer.data()); THROW_HR_IF(E_UNEXPECTED, data.size() != buffer.size()); DirectX::PackedVector::XMConvertFloatToHalfStream( reinterpret_cast(element_data), sizeof(_winml::Half) /* output stride */, data.data(), sizeof(float) /* input stride */, data.size() ); } // Specialized version to convert uint8_t to int8_t template <> void SetBufferFromArray(winrt::array_view data) { // Ensure that this call is being called with the correct template parameters ASSERT_TEMPLATE_PARAMETERS(); // Ensure that the Set APIs are only called when there is 1 buffer. // These APIs are only called when the tensor is being constructed from various collection and pointer public APIs. // They should always be backed by a single underlying buffer. FAIL_FAST_HR_IF(E_ILLEGAL_METHOD_CALL, CpuTensor()->num_buffers() != 1); auto size = data.size(); auto pData = data.data(); CpuTensor()->set(size, reinterpret_cast(const_cast(pData))); } // Specialized version to convert hstring to string template <> void SetBufferFromArray(winrt::array_view data) { // Ensure that this call is being called with the correct template parameters ASSERT_TEMPLATE_PARAMETERS(); // Ensure that the Set APIs are only called when there is 1 buffer. // These APIs are only called when the tensor is being constructed from various collection and pointer public APIs. // They should always be backed by a single underlying buffer. FAIL_FAST_HR_IF(E_ILLEGAL_METHOD_CALL, CpuTensor()->num_buffers() != 1); auto string_data = std::static_pointer_cast<_winml::string_data>(CpuTensor()->get_data()); auto& string_vector = string_data->get_backing_vector(); THROW_HR_IF(E_UNEXPECTED, data.size() > string_vector.size()); // Convert and copy into the underlying buffer std::transform(data.begin(), data.end(), std::begin(string_vector), [](auto& element) mutable { return _winml::Strings::UTF8FromHString(element); }); } /// /// SetBufferFromIterable and parameterized specializations for MLFloat16, int8_t, and std::string /// template void SetBufferFromIterable(wfc::IIterable const& data) { // This adds compile time checks that ensure that the API can only be called when // the conditions of ASSERT_TEMPLATE_PARAMETERS_EXACT() are met. ASSERT_TEMPLATE_PARAMETERS_EXACT(); // Ensure that the Set APIs are only called when there is 1 buffer. // These APIs are only called when the tensor is being constructed from various collection and pointer public APIs. // They should always be backed by a single underlying buffer. FAIL_FAST_HR_IF(E_ILLEGAL_METHOD_CALL, CpuTensor()->num_buffers() != 1); auto buffer = CpuTensor()->buffer(); auto element_data = static_cast(buffer.data()); // This method accepts data as an IVectorView. // This is a non-destructive API, so the caller data is // left untouched, and the data is copied into internal buffers. std::copy(begin(data), end(data), element_data); } // Specialized version to convert floats to float16 template <> void SetBufferFromIterable<_winml::Half, float>(wfc::IIterable const& data) { // Ensure that this call is being called with the correct template parameters ASSERT_TEMPLATE_PARAMETERS<_winml::Half, float>(); // Ensure that the Set APIs are only called when there is 1 buffer. // These APIs are only called when the tensor is being constructed from various collection and pointer public APIs. // They should always be backed by a single underlying buffer. FAIL_FAST_HR_IF(E_ILLEGAL_METHOD_CALL, CpuTensor()->num_buffers() != 1); auto buffer = CpuTensor()->buffer(); auto element_data = static_cast<_winml::Half*>(buffer.data()); // Now that we take in IIterables and not vector views // how do we validate size??? // THROW_HR_IF(E_UNEXPECTED, data.Size() != size); std::transform( begin(data), end(data), reinterpret_cast(element_data), DirectX::PackedVector::XMConvertFloatToHalf ); } // Specialized version to convert uint8_t to int8_t template <> void SetBufferFromIterable(wfc::IIterable const& data) { // Ensure that this call is being called with the correct template parameters ASSERT_TEMPLATE_PARAMETERS(); // Ensure that the Set APIs are only called when there is 1 buffer. // These APIs are only called when the tensor is being constructed from various collection and pointer public APIs. // They should always be backed by a single underlying buffer. FAIL_FAST_HR_IF(E_ILLEGAL_METHOD_CALL, CpuTensor()->num_buffers() != 1); auto buffer = CpuTensor()->buffer(); auto element_data = static_cast(buffer.data()); std::transform(begin(data), end(data), element_data, [](auto element) { return static_cast(element); }); } // Specialized version to convert hstring to string template <> void SetBufferFromIterable(wfc::IIterable const& data) { // Ensure that this call is being called with the correct template parameters ASSERT_TEMPLATE_PARAMETERS(); // Ensure that the Set APIs are only called when there is 1 buffer. // These APIs are only called when the tensor is being constructed from various collection and pointer public APIs. // They should always be backed by a single underlying buffer. FAIL_FAST_HR_IF(E_ILLEGAL_METHOD_CALL, CpuTensor()->num_buffers() != 1); auto string_data = std::static_pointer_cast<_winml::string_data>(CpuTensor()->get_data()); auto& string_vector = string_data->get_backing_vector(); // Convert and copy into the underlying buffer std::transform(begin(data), end(data), std::begin(string_vector), [](const auto& element) { return _winml::Strings::UTF8FromHString(element); }); } std::shared_ptr<_winml::Tensor>& CpuTensor() { WINML_THROW_HR_IF_NULL_MSG( E_ILLEGAL_METHOD_CALL, resources_, "The tensor has been closed and its resources are detached!" ); return resources_->cpu_resource_; } winrt::com_ptr& GpuTensor() { WINML_THROW_HR_IF_NULL_MSG( E_ILLEGAL_METHOD_CALL, resources_, "The tensor has been closed and its resources are detached!" ); return resources_->gpu_resource_; } private: std::vector shape_; std::shared_ptr> resources_; std::vector>> outstanding_references_; bool m_isClosed = false; }; } // namespace _winml #ifdef _WIN32 #pragma warning(pop) #endif #pragma warning(pop)