2019-08-15 22:27:05 +00:00
// Copyright (c) Microsoft Corporation.
// Licensed under the MIT License.
# include "pch.h"
# include "ImageFeatureValue.h"
# include "LearningModelBinding.h"
# include "LearningModelDevice.h"
# include "LearningModelSession.h"
# include <windows.media.h>
# include <wrl\wrappers\corewrappers.h>
# include "LearningModelBinding.h"
# include "LearningModelSession.h"
# include "LearningModelDevice.h"
# include "ImageConversionTypes.h"
# include "ConverterResourceStore.h"
# include "ImageFeatureDescriptor.h"
# include "core/providers/dml/DmlExecutionProvider/inc/DmlExecutionProvider.h"
# include "core/session/onnxruntime_c_api.h"
# include "D3DDeviceCache.h"
# include "TensorFeatureDescriptor.h"
// Uncomment to enable DEBUG_IMAGE_TENSOR_RESOURCE and
// allow debugging the content of the resource
//#define DEBUG_IMAGE_TENSOR_RESOURCE
using namespace WinML ;
using namespace winrt : : Windows : : Graphics : : Imaging ;
using namespace winrt : : Windows : : Graphics : : DirectX : : Direct3D11 ;
using namespace winrt : : Windows : : Graphics : : DirectX ;
using namespace Windows : : AI : : MachineLearning : : Internal ;
using namespace winrt : : Windows : : Foundation : : Collections ;
namespace winrt : : Windows : : AI : : MachineLearning : : implementation {
struct ImageFeatureValue : : ImageResourceMetadata {
std : : vector < Windows : : Graphics : : Imaging : : BitmapBounds > Bounds ;
: : Windows : : AI : : MachineLearning : : Internal : : ImageTensorDescription TensorDescriptor ;
} ;
# ifdef ENABLE_IMAGE_FEATURE_VALUE_TENSOR_DUMP
static void DumpResourceToCPU (
ID3D12Resource * pResource ,
com_ptr < LearningModelSession > spSession ,
ImageTensorDescription tensorDescriptor ,
: : Windows : : AI : : MachineLearning : : Internal : : TensorToVideoFrameConverter * tensorToImageConverter ) {
auto spDevice = spSession - > Device ( ) . as < LearningModelDevice > ( ) ;
auto spD3DDevice = spDevice - > GetD3DDevice ( ) ;
auto spCommandQueue = spDevice - > GetDeviceQueue ( ) ;
auto pProvider = spSession - > GetExecutionProvider ( ) ;
UINT64 bufferbytesize = pResource - > GetDesc ( ) . Width ;
Dml : : FlushContext ( pProvider ) ;
D3D12_HEAP_PROPERTIES heapProperties = {
D3D12_HEAP_TYPE_READBACK ,
D3D12_CPU_PAGE_PROPERTY_UNKNOWN ,
D3D12_MEMORY_POOL_UNKNOWN ,
0 ,
0 } ;
D3D12_RESOURCE_DESC resourceDesc = {
D3D12_RESOURCE_DIMENSION_BUFFER ,
0 ,
bufferbytesize ,
1 ,
1 ,
1 ,
DXGI_FORMAT_UNKNOWN ,
{ 1 , 0 } ,
D3D12_TEXTURE_LAYOUT_ROW_MAJOR ,
D3D12_RESOURCE_FLAG_NONE } ;
ID3D12Resource * pCPUResource = nullptr ;
spD3DDevice - > CreateCommittedResource (
& heapProperties ,
D3D12_HEAP_FLAG_NONE ,
& resourceDesc ,
D3D12_RESOURCE_STATE_COPY_DEST ,
nullptr ,
IID_PPV_ARGS ( & pCPUResource ) ) ;
{
ScopedCommandList scopedCommandList ( spSession ) ;
// Record command list copy action
scopedCommandList . get ( ) - > CopyResource ( pCPUResource , pResource ) ;
scopedCommandList . get ( ) - > Close ( ) ;
ID3D12CommandList * pCommandLists [ ] = { scopedCommandList . get ( ) } ;
spCommandQueue - > ExecuteCommandLists ( ARRAYSIZE ( pCommandLists ) , pCommandLists ) ;
// TODO: Do we need to set a fence here and wait for completion before
// reading the resource in cpu memory?
}
D3D12_RANGE range = { 0 , static_cast < SIZE_T > ( bufferbytesize ) } ;
void * pData = nullptr ;
pCPUResource - > Map ( 0 , & range , reinterpret_cast < void * * > ( & pData ) ) ;
range . End = 0 ;
DebugBreak ( ) ;
SoftwareBitmap bitmap ( BitmapPixelFormat : : Bgra8 , 720 , 720 ) ;
Windows : : Media : : VideoFrame frame = Windows : : Media : : VideoFrame : : CreateWithSoftwareBitmap ( bitmap ) ;
tensorToImageConverter - > SoftwareTensorToVideoFrame (
spSession . as < winrt : : Windows : : AI : : MachineLearning : : LearningModelSession > ( ) ,
reinterpret_cast < BYTE * > ( pData ) ,
tensorDescriptor ,
frame ) ;
auto folder = Windows : : Storage : : StorageFolder : : GetFolderFromPathAsync ( L " C: \\ " ) . get ( ) ;
auto imagefile = folder . CreateFileAsync ( L " out.png " , Windows : : Storage : : CreationCollisionOption : : ReplaceExisting ) . get ( ) ;
auto stream = imagefile . OpenAsync ( Windows : : Storage : : FileAccessMode : : ReadWrite ) . get ( ) ;
auto encoder = BitmapEncoder : : CreateAsync ( BitmapEncoder : : JpegEncoderId ( ) , stream ) . get ( ) ;
encoder . SetSoftwareBitmap ( frame . SoftwareBitmap ( ) ) ;
encoder . FlushAsync ( ) ;
pResource - > Unmap ( 0 , & range ) ;
}
# endif
Windows : : AI : : MachineLearning : : ImageFeatureValue ImageFeatureValue : : Create (
uint32_t batchSize ,
BitmapPixelFormat format ,
uint32_t width ,
uint32_t height ) {
std : : vector < Windows : : Media : : VideoFrame > videoFrames = { } ;
for ( uint32_t i = 0 ; i < batchSize ; + + i ) {
SoftwareBitmap bitmap ( format , width , height ) ;
Windows : : Media : : VideoFrame frame = Windows : : Media : : VideoFrame : : CreateWithSoftwareBitmap ( bitmap ) ;
videoFrames . emplace_back ( frame ) ;
}
return make < ImageFeatureValue > ( winrt : : single_threaded_vector ( std : : move ( videoFrames ) ) ) ;
}
Windows : : AI : : MachineLearning : : ImageFeatureValue ImageFeatureValue : : CreateFromVideoFrame ( Windows : : Media : : VideoFrame const & image ) try {
return make < ImageFeatureValue > ( image ) ;
}
WINML_CATCH_ALL
void ImageFeatureValue : : Initialize ( ) {
m_batchSize = m_videoFrames . Size ( ) ;
for ( auto videoFrame : m_videoFrames ) {
// TODO: Check all videoFrames come from either CPU or GPU.
if ( auto surface = videoFrame . Direct3DSurface ( ) ) {
Direct3DSurfaceDescription description = surface . Description ( ) ;
m_widths . emplace_back ( description . Width ) ;
m_heights . emplace_back ( description . Height ) ;
} else {
ISoftwareBitmap softwarebitmap ( videoFrame . SoftwareBitmap ( ) ) ;
m_widths . emplace_back ( softwarebitmap . PixelWidth ( ) ) ;
m_heights . emplace_back ( softwarebitmap . PixelHeight ( ) ) ;
}
}
}
ImageFeatureValue : : ImageFeatureValue ( Windows : : Media : : VideoFrame const & image ) {
std : : vector < Windows : : Media : : VideoFrame > frame = { image } ;
m_videoFrames = winrt : : single_threaded_vector ( std : : move ( frame ) ) ;
Initialize ( ) ;
}
ImageFeatureValue : : ImageFeatureValue ( IVector < Windows : : Media : : VideoFrame > const & images ) : m_videoFrames ( images ) {
Initialize ( ) ;
}
ImageFeatureValue : : ImageFeatureValue ( IVectorView < Windows : : Media : : VideoFrame > const & images ) {
std : : vector < Windows : : Media : : VideoFrame > videoFrames = { } ;
for ( uint32_t i = 0 ; i < images . Size ( ) ; + + i ) {
videoFrames . emplace_back ( images . GetAt ( i ) ) ;
}
m_videoFrames = winrt : : single_threaded_vector ( std : : move ( videoFrames ) ) ;
Initialize ( ) ;
}
static std : : optional < BitmapPixelFormat > GetBitmapPixelFormatFromMetadata ( const IPropertySet & properties ) {
if ( properties ! = nullptr & & properties . HasKey ( L " BitmapPixelFormat " ) ) {
if ( auto pixelFormatInspectable = properties . Lookup ( L " BitmapPixelFormat " ) ) {
auto pixelFormatValue = pixelFormatInspectable . as < Windows : : Foundation : : IPropertyValue > ( ) ;
auto pixelFormat = static_cast < BitmapPixelFormat > ( pixelFormatValue . GetInt32 ( ) ) ;
WINML_THROW_HR_IF_FALSE_MSG (
WINML_ERR_INVALID_BINDING ,
pixelFormat = = BitmapPixelFormat : : Rgba8 | |
pixelFormat = = BitmapPixelFormat : : Bgra8 | |
pixelFormat = = BitmapPixelFormat : : Gray8 ,
" BitmapPixelFormat must be either Rgba8, Bgra8, or Gray8 " ) ;
return pixelFormat ;
}
}
return { } ;
}
static std : : optional < BitmapBounds > GetBoundsFromMetadata ( const IPropertySet & properties ) {
if ( properties ! = nullptr & & properties . HasKey ( L " BitmapBounds " ) ) {
if ( auto boundsInspectable = properties . Lookup ( L " BitmapBounds " ) ) {
auto boundsPropertyValue = boundsInspectable . as < Windows : : Foundation : : IPropertyValue > ( ) ;
WINML_THROW_HR_IF_FALSE_MSG (
WINML_ERR_INVALID_BINDING ,
boundsPropertyValue . Type ( ) = = Windows : : Foundation : : PropertyType : : UInt32Array ,
" BitmapBounds must reference a property value with type UInt32Array with 4 elements. " ) ;
com_array < uint32_t > bounds ;
boundsPropertyValue . GetUInt32Array ( bounds ) ;
WINML_THROW_HR_IF_FALSE_MSG (
WINML_ERR_INVALID_BINDING ,
bounds . size ( ) = = 4 ,
" BitmapBounds must reference a property value with type UInt32Array with 4 elements. " ) ;
return Windows : : Graphics : : Imaging : : BitmapBounds { bounds [ 0 ] , bounds [ 1 ] , bounds [ 2 ] , bounds [ 3 ] } ;
}
}
return { } ;
}
BitmapBounds ImageFeatureValue : : CenterAndCropBounds (
uint32_t idx ,
uint32_t desiredWidth ,
uint32_t desiredHeight ) {
BitmapBounds bounds = { } ;
float RequiredAspectRatio = static_cast < float > ( desiredWidth ) / static_cast < float > ( desiredHeight ) ;
// crop to center while maintaining size
if ( RequiredAspectRatio * m_heights [ idx ] < m_widths [ idx ] ) {
// actual width is too wide. Cut off left and right of image
bounds . Width = std : : min ( ( UINT ) ( RequiredAspectRatio * m_heights [ idx ] + 0.5f ) , m_widths [ idx ] ) ;
bounds . Height = m_heights [ idx ] ;
bounds . X = ( m_widths [ idx ] - bounds . Width ) / 2 ;
bounds . Y = 0 ;
} else {
// actual height is too long. Cut off top and bottom
bounds . Width = m_widths [ idx ] ;
bounds . Height = std : : min ( ( UINT ) ( m_widths [ idx ] / RequiredAspectRatio + 0.5f ) , m_heights [ idx ] ) ;
bounds . X = 0 ;
bounds . Y = ( m_heights [ idx ] - bounds . Height ) / 2 ;
}
// TODO: Do we allow smaller images?
WINML_THROW_HR_IF_FALSE_MSG (
WINML_ERR_INVALID_BINDING ,
( bounds . X > = 0 & & bounds . X < = m_widths [ idx ] ) & &
( bounds . Y > = 0 & & bounds . Y < = m_heights [ idx ] ) ,
" Failed to center crop the provided input image. The calculated bounds exceed the dimensions of the image, or do not match the model inputs dimensions. " ) ;
return bounds ;
}
static ImageTensorDataType GetTensorDataTypeFromTensorKind ( TensorKind kind ) {
switch ( kind ) {
case TensorKind : : Float :
return kImageTensorDataTypeFloat32 ;
case TensorKind : : Float16 :
return kImageTensorDataTypeFloat16 ;
default :
WINML_THROW_HR_IF_FALSE_MSG ( WINML_ERR_INVALID_BINDING , false , " Model image inputs must have tensor type of Float or Float16. " ) ;
}
FAIL_FAST_HR ( E_INVALIDARG ) ;
}
static unsigned GetSizeFromTensorDataType ( ImageTensorDataType type ) {
switch ( type ) {
case kImageTensorDataTypeFloat32 :
return sizeof ( float ) ;
case kImageTensorDataTypeFloat16 :
return sizeof ( uint16_t ) ;
default :
WINML_THROW_HR_IF_FALSE_MSG ( WINML_ERR_INVALID_BINDING , false , " Model image inputs must have tensor type of Float or Float16. " ) ;
}
FAIL_FAST_HR ( E_INVALIDARG ) ;
}
static ImageTensorDescription CreateImageTensorDescriptor ( TensorKind tensorKind , BitmapPixelFormat pixelFormat , uint32_t batchSize , uint32_t width , uint32_t height ) {
ImageTensorDescription tensorDescription = { } ;
tensorDescription . dataType = GetTensorDataTypeFromTensorKind ( tensorKind ) ;
tensorDescription . sizes [ 0 ] = batchSize ;
if ( pixelFormat = = Windows : : Graphics : : Imaging : : BitmapPixelFormat : : Rgba8 ) {
tensorDescription . channelType = kImageTensorChannelTypeRGB8 ;
tensorDescription . sizes [ 1 ] = 3 ;
} else if ( pixelFormat = = Windows : : Graphics : : Imaging : : BitmapPixelFormat : : Bgra8 ) {
tensorDescription . channelType = kImageTensorChannelTypeBGR8 ;
tensorDescription . sizes [ 1 ] = 3 ;
} else if ( pixelFormat = = Windows : : Graphics : : Imaging : : BitmapPixelFormat : : Gray8 ) {
tensorDescription . channelType = kImageTensorChannelTypeGRAY8 ;
tensorDescription . sizes [ 1 ] = 1 ;
} else {
THROW_HR ( E_NOTIMPL ) ;
}
tensorDescription . sizes [ 2 ] = height ;
tensorDescription . sizes [ 3 ] = width ;
return tensorDescription ;
}
static void CPUTensorize (
Windows : : Media : : IVideoFrame videoFrame ,
BitmapBounds bounds ,
ImageTensorDescription tensorDescriptor ,
com_ptr < LearningModelSession > spSession ,
void * pResource ) {
auto spDevice = spSession - > Device ( ) . as < LearningModelDevice > ( ) ;
ConverterResourceDescription descriptor = { } ;
descriptor . pixel_format = static_cast < DWORD > ( BitmapPixelFormat : : Bgra8 ) ;
2019-11-08 21:23:44 +00:00
descriptor . width = static_cast < int > ( tensorDescriptor . sizes [ 3 ] ) ;
descriptor . height = static_cast < int > ( tensorDescriptor . sizes [ 2 ] ) ;
2019-08-15 22:27:05 +00:00
descriptor . luid = { } ; // Converted image on CPU
auto pooledConverter = PoolObjectWrapper : : Create ( spDevice - > TensorizerStore ( ) - > Fetch ( descriptor ) ) ;
//apply tensorization
pooledConverter - > Get ( ) - > Tensorizer - > VideoFrameToSoftwareTensor (
videoFrame ,
bounds ,
tensorDescriptor ,
reinterpret_cast < BYTE * > ( pResource ) ) ;
// Software tensorization doesnt need to hold onto any resources beyond its scope, so we can
// return the converter to the pool on tensorization completion.
// (This happens automatically in the destruction of PoolObjectWrapper)
}
static void CPUTensorize (
IVector < Windows : : Media : : VideoFrame > videoFrames ,
std : : vector < BitmapBounds > bounds ,
ImageTensorDescription tensorDescriptor ,
com_ptr < LearningModelSession > spSession ,
void * pResource ,
unsigned int singleFrameBufferSize ) {
// Tensorize video frames one by one without extra copy.
BYTE * tempPResource = reinterpret_cast < BYTE * > ( pResource ) ;
for ( uint32_t batchIdx = 0 ; batchIdx < videoFrames . Size ( ) ; + + batchIdx ) {
CPUTensorize ( videoFrames . GetAt ( batchIdx ) , bounds [ batchIdx ] , tensorDescriptor , spSession , tempPResource ) ;
tempPResource + = singleFrameBufferSize ;
}
}
static void GPUTensorize (
IVector < Windows : : Media : : VideoFrame > videoFrames ,
std : : vector < BitmapBounds > bounds ,
ImageTensorDescription tensorDescriptor ,
com_ptr < LearningModelSession > spSession ,
void * pAllocatedResource ,
WinML : : BindingContext & context ) {
auto d3dResource =
2019-11-08 00:50:24 +00:00
_winmla : : GetD3D12ResourceFromAllocation (
spSession - > GetExecutionProvider ( ) ,
2019-08-15 22:27:05 +00:00
pAllocatedResource ) ;
auto spDevice = spSession - > Device ( ) . as < LearningModelDevice > ( ) ;
ConverterResourceDescription descriptor = { } ;
descriptor . pixel_format = static_cast < DWORD > ( DirectXPixelFormat : : B8G8R8X8UIntNormalized ) ;
2019-11-08 21:23:44 +00:00
descriptor . width = static_cast < int > ( tensorDescriptor . sizes [ 3 ] ) ;
descriptor . height = static_cast < int > ( tensorDescriptor . sizes [ 2 ] ) ;
2019-08-15 22:27:05 +00:00
descriptor . luid = spDevice - > GetD3DDevice ( ) - > GetAdapterLuid ( ) ; // Converted image on GPU
// Tensorize video frames one by one without extra copy.
for ( uint32_t batchIdx = 0 ; batchIdx < videoFrames . Size ( ) ; + + batchIdx ) {
auto pooledConverter = PoolObjectWrapper : : Create ( spDevice - > TensorizerStore ( ) - > Fetch ( descriptor ) ) ;
{
// Apply tensorization
auto session = spSession . as < winrt : : Windows : : AI : : MachineLearning : : LearningModelSession > ( ) ;
pooledConverter - > Get ( ) - > Tensorizer - > VideoFrameToDX12Tensor (
batchIdx ,
session ,
videoFrames . GetAt ( batchIdx ) ,
bounds [ batchIdx ] ,
tensorDescriptor ,
d3dResource ) ;
// Tensorization to a GPU tensor will run asynchronously and associated resources
// need to be kept alive until the gpu resources have been used in the queue.
//
// The PoolObjectWrapper needs to stay alive so that the underlying resources are
// not released to the cache.
//
// This object will be returned to the cache when evaluate has completed. So we cache this
// on the binding context.
context . converter = pooledConverter ;
}
}
# ifdef DEBUG_IMAGE_TENSOR_RESOURCE
DumpResourceToCPU ( d3dResource , spSession , tensorDescriptor ) ;
# endif
}
static OrtValue CreateMLValue (
com_ptr < LearningModelSession > spSession ,
const ImageTensorDescription & tensorDescriptor ) {
2019-11-08 00:50:24 +00:00
auto pTensor = _winmla : : CreateTensor (
tensorDescriptor . dataType = = kImageTensorDataTypeFloat32 ? TensorKind : : Float : TensorKind : : Float16 ,
& ( tensorDescriptor . sizes [ 0 ] ) ,
sizeof ( tensorDescriptor . sizes ) / sizeof ( tensorDescriptor . sizes [ 0 ] ) ,
spSession - > GetExecutionProvider ( ) ) ;
2019-08-15 22:27:05 +00:00
OrtValue ort_value ;
ort_value . Init ( pTensor ,
onnxruntime : : DataTypeImpl : : GetType < onnxruntime : : Tensor > ( ) ,
onnxruntime : : DataTypeImpl : : GetType < onnxruntime : : Tensor > ( ) - > GetDeleteFunc ( ) ) ;
return ort_value ;
}
std : : optional < ImageFeatureValue : : ImageResourceMetadata > ImageFeatureValue : : GetInputMetadata ( const WinML : : BindingContext & context ) {
uint32_t descriptorWidth ;
uint32_t descriptorHeight ;
TensorKind tensorKind = TensorKind : : Undefined ;
auto spImageDescriptor = context . descriptor . try_as < ImageFeatureDescriptor > ( ) ;
auto spTensorDescriptor = context . descriptor . try_as < TensorFeatureDescriptor > ( ) ;
// Set up descriptorWidth and descriptorHeight
if ( spImageDescriptor ) {
// If model expects free dimensions the descritpr will have MAXUINT32, and we use the supplied image
// If the width or height in model metadata is -1, which means free dimension.
// The the widths and heights of input data must be the same. Or the
// tensorDescriptor cannot describ the shape of the inputs.
if ( spImageDescriptor - > Width ( ) = = MAXUINT32 & &
! ( std : : adjacent_find ( m_widths . begin ( ) , m_widths . end ( ) , std : : not_equal_to < uint32_t > ( ) ) = = m_widths . end ( ) ) ) {
THROW_HR ( E_INVALIDARG ) ;
}
if ( spImageDescriptor - > Height ( ) = = MAXUINT32 & &
! ( std : : adjacent_find ( m_heights . begin ( ) , m_heights . end ( ) , std : : not_equal_to < uint32_t > ( ) ) = = m_heights . end ( ) ) ) {
THROW_HR ( E_INVALIDARG ) ;
}
descriptorWidth = ( spImageDescriptor - > Width ( ) = = MAXUINT32 ) ? m_widths [ 0 ] : spImageDescriptor - > Width ( ) ;
descriptorHeight = ( spImageDescriptor - > Height ( ) = = MAXUINT32 ) ? m_heights [ 0 ] : spImageDescriptor - > Height ( ) ;
tensorKind = spImageDescriptor - > TensorKind ( ) ;
} else if ( spTensorDescriptor ) {
// If model expects a tensor, use its shape
auto shape = spTensorDescriptor - > Shape ( ) ;
if ( shape . Size ( ) ! = 4 ) {
return { } ;
}
bool hasAccecptableChannelSize = ( shape . GetAt ( 1 ) = = 3 | | shape . GetAt ( 1 ) = = 1 ) ;
if ( ! hasAccecptableChannelSize ) {
return { } ;
}
if ( - 1 = = shape . GetAt ( 3 ) & &
! ( std : : adjacent_find ( m_widths . begin ( ) , m_widths . end ( ) , std : : not_equal_to < uint32_t > ( ) ) = = m_widths . end ( ) ) ) {
THROW_HR ( E_INVALIDARG ) ;
}
if ( - 1 = = shape . GetAt ( 2 ) & &
! ( std : : adjacent_find ( m_heights . begin ( ) , m_heights . end ( ) , std : : not_equal_to < uint32_t > ( ) ) = = m_heights . end ( ) ) ) {
THROW_HR ( E_INVALIDARG ) ;
}
descriptorWidth = ( - 1 = = shape . GetAt ( 3 ) ) ? m_widths [ 0 ] : static_cast < uint32_t > ( shape . GetAt ( 3 ) ) ;
descriptorHeight = ( - 1 = = shape . GetAt ( 2 ) ) ? m_heights [ 0 ] : static_cast < uint32_t > ( shape . GetAt ( 2 ) ) ;
tensorKind = spTensorDescriptor - > TensorKind ( ) ;
} else {
return { } ;
}
// Set up BitmapBounds
// For batch of images with different sizes, like { {1, 3, 1080, 1080}, {1, 3, 720, 720} },
// a vector of bounds is to record the result after cropped.
std : : vector < BitmapBounds > bounds = { } ;
for ( uint32_t i = 0 ; i < m_batchSize ; + + i ) {
auto tempBounds = GetBoundsFromMetadata ( context . properties ) ;
if ( ! tempBounds . has_value ( ) ) {
// If the user has not specified bounds, we need to infer the bounds
// from the combination of descriptor, and input value or output value
if ( context . type = = BindingType : : kInput ) {
// If unspecified output, get the crop with correct aspect ratio
tempBounds = CenterAndCropBounds ( i , descriptorWidth , descriptorHeight ) ;
} else {
// If given an unspecified output region, write into the top left portion of the output image.
tempBounds = BitmapBounds { 0 , 0 , m_widths [ i ] , m_heights [ i ] } ;
}
}
bounds . emplace_back ( tempBounds . value ( ) ) ;
}
// TODO: Validate Bounds
// Set up BitmapPixelFormat
auto pixelFormat = std : : optional < BitmapPixelFormat > { } ;
pixelFormat = GetBitmapPixelFormatFromMetadata ( context . properties ) ;
if ( ! pixelFormat . has_value ( ) & & spImageDescriptor ) {
pixelFormat = spImageDescriptor - > BitmapPixelFormat ( ) ;
} else if ( ! pixelFormat . has_value ( ) & & spTensorDescriptor ) {
auto shape = spTensorDescriptor - > Shape ( ) ;
int channelCount = static_cast < uint32_t > ( shape . GetAt ( 1 ) ) ;
if ( channelCount = = 1 ) {
// Assume Gray if no image descriptor is given and channelcount 1
pixelFormat = BitmapPixelFormat : : Gray8 ;
} else if ( channelCount = = 3 ) {
// Assume Bgra8 if no image descriptor is given
pixelFormat = BitmapPixelFormat : : Bgra8 ;
} else {
THROW_HR ( WINML_ERR_SIZE_MISMATCH ) ;
}
}
//NCHW layout
auto imageTensorDescriptor = CreateImageTensorDescriptor ( tensorKind , pixelFormat . value ( ) , m_batchSize , descriptorWidth , descriptorHeight ) ;
return ImageResourceMetadata { bounds , imageTensorDescriptor } ;
}
HRESULT ImageFeatureValue : : GetOrtValue ( WinML : : BindingContext & context , OrtValue * p_ort_value ) try {
FAIL_FAST_IF ( p_ort_value = = nullptr ) ;
FAIL_FAST_IF ( ! ( std : : all_of ( m_widths . begin ( ) , m_widths . end ( ) , [ ] ( int i ) { return i ! = 0 ; } ) ) ) ;
FAIL_FAST_IF ( ! ( std : : all_of ( m_heights . begin ( ) , m_heights . end ( ) , [ ] ( int i ) { return i ! = 0 ; } ) ) ) ;
// Get image metadata from the binding context
auto metadata = GetInputMetadata ( context ) ;
RETURN_HR_IF ( E_INVALIDARG , ! metadata ) ;
ImageResourceMetadata resourceMetadata = metadata . value ( ) ;
// Get the session
auto spSession = context . session . as < LearningModelSession > ( ) ;
auto spDevice = spSession - > Device ( ) . as < LearningModelDevice > ( ) ;
* p_ort_value = CreateMLValue ( spSession , resourceMetadata . TensorDescriptor ) ;
// Get the tensor
auto & tensor = p_ort_value - > Get < onnxruntime : : Tensor > ( ) ;
auto pAllocatedResource = const_cast < void * > ( tensor . DataRaw ( ) ) ;
if ( context . type = = BindingType : : kInput ) {
// Only tensorize inputs
2019-11-08 21:23:44 +00:00
auto bufferSize = std : : accumulate ( std : : begin ( resourceMetadata . TensorDescriptor . sizes ) , std : : end ( resourceMetadata . TensorDescriptor . sizes ) , static_cast < int64_t > ( 1 ) , std : : multiplies < int64_t > ( ) ) ;
2019-08-15 22:27:05 +00:00
auto bufferByteSize = GetSizeFromTensorDataType ( resourceMetadata . TensorDescriptor . dataType ) * bufferSize ;
auto singleFrameBufferSize = bufferByteSize / m_batchSize ;
if ( spDevice - > IsCpuDevice ( ) ) {
2019-11-08 21:23:44 +00:00
CPUTensorize ( m_videoFrames , resourceMetadata . Bounds , resourceMetadata . TensorDescriptor , spSession , pAllocatedResource , static_cast < unsigned int > ( singleFrameBufferSize ) ) ;
2019-08-15 22:27:05 +00:00
} else {
GPUTensorize ( m_videoFrames , resourceMetadata . Bounds , resourceMetadata . TensorDescriptor , spSession , pAllocatedResource , context ) ;
}
}
return S_OK ;
}
WINML_CATCH_ALL_COM
HRESULT ImageFeatureValue : : IsPlaceholder ( bool * pIsPlaceHolder ) {
FAIL_FAST_IF_NULL ( pIsPlaceHolder ) ;
* pIsPlaceHolder = false ;
return S_OK ;
}
HRESULT ImageFeatureValue : : UpdateSourceResourceData ( BindingContext & context , OrtValue & mlValue ) try {
// Get the device
auto spSession = context . session . as < LearningModelSession > ( ) ;
auto spDevice = spSession - > Device ( ) . as < LearningModelDevice > ( ) ;
// Get the output tensor
auto & tensor = mlValue . Get < onnxruntime : : Tensor > ( ) ;
auto pAllocatedResource = const_cast < void * > ( tensor . DataRaw ( ) ) ;
// Get the run context
auto metadata = GetInputMetadata ( context ) ;
ImageResourceMetadata resourceMetadata = metadata . value ( ) ;
ConverterResourceDescription descriptor = { } ;
2019-11-08 21:23:44 +00:00
descriptor . width = static_cast < int > ( resourceMetadata . TensorDescriptor . sizes [ 3 ] ) ;
descriptor . height = static_cast < int > ( resourceMetadata . TensorDescriptor . sizes [ 2 ] ) ;
2019-08-15 22:27:05 +00:00
if ( ! strcmp ( tensor . Location ( ) . name , onnxruntime : : CPU ) | |
tensor . Location ( ) . mem_type = = : : OrtMemType : : OrtMemTypeCPUOutput | |
tensor . Location ( ) . mem_type = = : : OrtMemType : : OrtMemTypeCPUInput ) {
descriptor . pixel_format = static_cast < DWORD > ( BitmapPixelFormat : : Bgra8 ) ;
descriptor . luid = { } ; // Converted image on CPU
auto pooledConverter = PoolObjectWrapper : : Create ( spDevice - > DetensorizerStore ( ) - > Fetch ( descriptor ) ) ;
2019-11-08 21:23:44 +00:00
auto bufferSize = std : : accumulate ( std : : begin ( resourceMetadata . TensorDescriptor . sizes ) , std : : end ( resourceMetadata . TensorDescriptor . sizes ) , static_cast < int64_t > ( 1 ) , std : : multiplies < int64_t > ( ) ) ;
2019-08-15 22:27:05 +00:00
auto bufferByteSize = GetSizeFromTensorDataType ( resourceMetadata . TensorDescriptor . dataType ) * bufferSize / m_batchSize ;
BYTE * tempPAllocatedResource = reinterpret_cast < BYTE * > ( pAllocatedResource ) ;
for ( uint32_t batchIdx = 0 ; batchIdx < m_batchSize ; + + batchIdx ) {
// Convert Software Tensor to VideoFrame one by one based on the buffer size.
auto videoFrame = m_videoFrames . GetAt ( batchIdx ) ;
pooledConverter - > Get ( ) - > Detensorizer - > SoftwareTensorToVideoFrame ( context . session , tempPAllocatedResource , resourceMetadata . TensorDescriptor , videoFrame ) ;
tempPAllocatedResource + = bufferByteSize ;
}
} else {
descriptor . pixel_format = static_cast < DWORD > ( DirectXPixelFormat : : B8G8R8X8UIntNormalized ) ;
descriptor . luid = spDevice - > GetD3DDevice ( ) - > GetAdapterLuid ( ) ; // Converted image on GPU
auto pooledConverter = PoolObjectWrapper : : Create ( spDevice - > DetensorizerStore ( ) - > Fetch ( descriptor ) ) ;
auto pProvider = spSession - > GetExecutionProvider ( ) ;
2019-11-08 00:50:24 +00:00
auto d3dResource = _winmla : : GetD3D12ResourceFromAllocation ( pProvider , pAllocatedResource ) ;
2019-08-15 22:27:05 +00:00
for ( uint32_t batchIdx = 0 ; batchIdx < m_batchSize ; + + batchIdx ) {
auto videoFrame = m_videoFrames . GetAt ( batchIdx ) ;
pooledConverter - > Get ( ) - > Detensorizer - > DX12TensorToVideoFrame (
batchIdx ,
context . session ,
d3dResource ,
resourceMetadata . TensorDescriptor ,
videoFrame ) ;
// Reset the Allocator before return to the Cache. Must Sync this background thread to that completion before we do.
spDevice - > GetD3DDeviceCache ( ) - > SyncD3D12ToCPU ( ) ;
pooledConverter - > Get ( ) - > Detensorizer - > ResetAllocator ( ) ;
}
# ifdef DEBUG_IMAGE_TENSOR_RESOURCE
DumpResourceToCPU ( d3dResource , spSession , resourceInfo . Metadata . TensorDescriptor ) ;
# endif
}
// Release any converters back to the pool by nulling out the wrapper.
context . converter = nullptr ;
return S_OK ;
}
WINML_CATCH_ALL_COM
HRESULT ImageFeatureValue : : AbiRepresentation ( winrt : : Windows : : Foundation : : IInspectable & abiRepresentation ) {
if ( IsBatch ( ) ) {
m_videoFrames . as ( abiRepresentation ) ;
} else {
winrt : : Windows : : AI : : MachineLearning : : ImageFeatureValue to = nullptr ;
RETURN_IF_FAILED ( this - > QueryInterface (
winrt : : guid_of < winrt : : Windows : : AI : : MachineLearning : : ImageFeatureValue > ( ) ,
reinterpret_cast < void * * > ( winrt : : put_abi ( to ) ) ) ) ;
to . as ( abiRepresentation ) ;
}
return S_OK ;
}
Windows : : AI : : MachineLearning : : LearningModelFeatureKind ImageFeatureValue : : Kind ( ) try {
return LearningModelFeatureKind : : Image ;
}
WINML_CATCH_ALL
Windows : : Media : : VideoFrame ImageFeatureValue : : VideoFrame ( ) try {
return m_videoFrames . GetAt ( 0 ) ;
}
WINML_CATCH_ALL
IIterable < Windows : : Media : : VideoFrame > ImageFeatureValue : : VideoFrames ( ) try {
return m_videoFrames . try_as < IIterable < Windows : : Media : : VideoFrame > > ( ) ;
}
WINML_CATCH_ALL
} // namespace winrt::Windows::AI::MachineLearning::implementation