onnxruntime/winml/adapter/DmlOrtSessionBuilder.cpp

// Copyright (c) Microsoft Corporation.
// Licensed under the MIT License.

#include "pch.h"

#ifdef USE_DML

// Needed to work around the fact that OnnxRuntime defines ERROR
#ifdef ERROR
#undef ERROR
#endif
#include "core/session/inference_session.h"
// Restore ERROR define
#define ERROR 0

#include "DmlOrtSessionBuilder.h"

// winml includes
#include "core/providers/dml/GraphTransformers/GraphTransformerHelpers.h"
#include "CustomRegistryHelper.h"
#include "core/providers/dml/DmlExecutionProvider/inc/DmlExecutionProvider.h"
#include "LearningModelDevice.h"
#include "core/providers/dml/DmlExecutionProvider/src/MLOperatorAuthorImpl.h"

// ort includes
#include "core/framework/op_kernel.h"
#include "core/framework/op_node_proto_helper.h"
#include "core/framework/customRegistry.h"
#include "core/framework/data_transfer.h"
#include "core/session/abi_session_options_impl.h"

using namespace Windows::AI::MachineLearning;

namespace Windows::AI::MachineLearning::Adapter {

DmlOrtSessionBuilder::DmlOrtSessionBuilder(
    ID3D12Device* device, 
    ID3D12CommandQueue* queue){
  device_.copy_from(device);
  queue_.copy_from(queue);
}

HRESULT
DmlOrtSessionBuilder::CreateSessionOptions(
    OrtSessionOptions** options) try {
  RETURN_HR_IF_NULL(E_POINTER, options);

  Ort::ThrowOnError(Ort::GetApi().CreateSessionOptions(options));
  std::unique_ptr<Ort::SessionOptions> session_options = std::make_unique<Ort::SessionOptions>(*options);

  // set the graph optimization level to all (used to be called level 3)
  session_options->SetGraphOptimizationLevel(GraphOptimizationLevel::ORT_ENABLE_ALL);

  // Disable the mem pattern session option for DML. It will cause problems with how memory is allocated.
  session_options->DisableMemPattern();

  // all done with the smart ptr
  session_options.release();
  return S_OK;
}
WINML_CATCH_ALL_COM

static HRESULT
RegisterCustomRegistry(
    onnxruntime::InferenceSession* p_session,
    IMLOperatorRegistry* registry) {
  if (registry != nullptr) {
    RETURN_HR_IF_NULL(E_POINTER, p_session);

    auto custom_registries = GetLotusCustomRegistries(registry);

    // Register
    for (auto& custom_registry : custom_registries) {
        ORT_THROW_IF_ERROR(p_session->RegisterCustomRegistry(custom_registry));
    }
  }

  return S_OK;
}

Microsoft::WRL::ComPtr<IDMLDevice> CreateDmlDevice(ID3D12Device* d3d12Device) {
  // Dynamically load DML to avoid WinML taking a static dependency on DirectML.dll
  wil::unique_hmodule dmlDll(LoadLibraryW(L"DirectML.dll"));
  THROW_LAST_ERROR_IF(!dmlDll);

  auto dmlCreateDevice1Fn = reinterpret_cast<decltype(&DMLCreateDevice1)>(
      GetProcAddress(dmlDll.get(), "DMLCreateDevice1"));
  THROW_LAST_ERROR_IF(!dmlCreateDevice1Fn);

  DML_CREATE_DEVICE_FLAGS dmlFlags = DML_CREATE_DEVICE_FLAG_NONE;

  // Enable the DML debug layer in DEBUG builds, if the D3D12 debug layer is also enabled
#if _DEBUG
  Microsoft::WRL::ComPtr<ID3D12DebugDevice> d3d12DebugDevice;
  if (SUCCEEDED(d3d12Device->QueryInterface(IID_PPV_ARGS(&d3d12DebugDevice)))) {
    d3d12DebugDevice = nullptr;
    dmlFlags |= DML_CREATE_DEVICE_FLAG_DEBUG;
  }
#endif

  Microsoft::WRL::ComPtr<IDMLDevice> dmlDevice;
  THROW_IF_FAILED(dmlCreateDevice1Fn(d3d12Device, dmlFlags, DML_FEATURE_LEVEL_2_0, IID_PPV_ARGS(&dmlDevice)));

  // Keep DirectML.dll loaded by leaking the handle. This is equivalent behavior to if we delay-loaded the DLL.
  dmlDll.release();

  return dmlDevice;
}

HRESULT DmlOrtSessionBuilder::CreateSession(
    OrtSessionOptions* options,
    winmla::IInferenceSession** p_session,
    onnxruntime::IExecutionProvider** pp_provider) try {
  RETURN_HR_IF_NULL(E_POINTER, p_session);
  RETURN_HR_IF_NULL(E_POINTER, pp_provider);
  RETURN_HR_IF(E_POINTER, *pp_provider != nullptr);

  auto p_d3d_device = device_.get();
  auto p_queue = queue_.get();

  Microsoft::WRL::ComPtr<IDMLDevice> dmlDevice = CreateDmlDevice(p_d3d_device);

  std::unique_ptr<onnxruntime::IExecutionProvider> gpu_provider = Dml::CreateExecutionProvider(dmlDevice.Get(), p_queue);
  auto session = std::make_unique<onnxruntime::InferenceSession>(options->value);

  // Cache the provider's raw pointer
  *pp_provider = gpu_provider.get();

  ORT_THROW_IF_ERROR(session->RegisterExecutionProvider(std::move(gpu_provider)));

  // assign the session to the out parameter
  auto sessionptr = wil::MakeOrThrow<winmla::InferenceSession>(session.release());
  RETURN_IF_FAILED(sessionptr.CopyTo(_uuidof(winmla::IInferenceSession), (void**)p_session));

  return S_OK;
}
WINML_CATCH_ALL_COM

HRESULT DmlOrtSessionBuilder::Initialize(
    winmla::IInferenceSession* p_session,
    onnxruntime::IExecutionProvider* p_provider) try {
  RETURN_HR_IF_NULL(E_INVALIDARG, p_session);
  RETURN_HR_IF_NULL(E_INVALIDARG, p_provider);

  // OnnxRuntime uses the default rounding mode when calling the session's allocator.
  // During initialization, OnnxRuntime allocates weights, which are permanent across session
  // lifetime and can be large, so shouldn't be rounded.
  Dml::SetDefaultRoundingMode(p_provider, AllocatorRoundingMode::Disabled);

  ORT_THROW_IF_ERROR(p_session->get()->Initialize());

  Dml::SetDefaultRoundingMode(p_provider, AllocatorRoundingMode::Enabled);

  // Flush the D3D12 work from the DML execution provider
  Dml::FlushContext(p_provider);

  return S_OK;
}
WINML_CATCH_ALL_COM

} // Windows::AI::MachineLearning::Adapter

#endif USE_DML
Initial Commit 2019-08-15 22:27:05 +00:00			`// Copyright (c) Microsoft Corporation.`
			`// Licensed under the MIT License.`

			`#include "pch.h"`

Layer dev paulm (#2533) * commetns for dml graph transformer fixed ort value passing using the allocatir info * fixed and coded maps and sequences across the abi * cleaned up w4's cleaned up the model info ABI delayload directml.dll from winml * cleaned up namepsace aliases. renamed _winmla to winmla this was good PR feedback from tiago a while back. * moved files from inc to lib\api.core cleaned up some of the cmake * staged changes 2019-12-03 23:31:22 +00:00			`#ifdef USE_DML`

Initial Commit 2019-08-15 22:27:05 +00:00			`// Needed to work around the fact that OnnxRuntime defines ERROR`
			`#ifdef ERROR`
			`#undef ERROR`
			`#endif`
			`#include "core/session/inference_session.h"`
			`// Restore ERROR define`
			`#define ERROR 0`

			`#include "DmlOrtSessionBuilder.h"`

			`// winml includes`
			`#include "core/providers/dml/GraphTransformers/GraphTransformerHelpers.h"`
Layer dev paulm (#2533) * commetns for dml graph transformer fixed ort value passing using the allocatir info * fixed and coded maps and sequences across the abi * cleaned up w4's cleaned up the model info ABI delayload directml.dll from winml * cleaned up namepsace aliases. renamed _winmla to winmla this was good PR feedback from tiago a while back. * moved files from inc to lib\api.core cleaned up some of the cmake * staged changes 2019-12-03 23:31:22 +00:00			`#include "CustomRegistryHelper.h"`
Initial Commit 2019-08-15 22:27:05 +00:00			`#include "core/providers/dml/DmlExecutionProvider/inc/DmlExecutionProvider.h"`
			`#include "LearningModelDevice.h"`
			`#include "core/providers/dml/DmlExecutionProvider/src/MLOperatorAuthorImpl.h"`

			`// ort includes`
			`#include "core/framework/op_kernel.h"`
			`#include "core/framework/op_node_proto_helper.h"`
			`#include "core/framework/customRegistry.h"`
			`#include "core/framework/data_transfer.h"`
Moved SessionOptions over to the abi 2019-11-20 02:15:47 +00:00			`#include "core/session/abi_session_options_impl.h"`
Initial Commit 2019-08-15 22:27:05 +00:00
			`using namespace Windows::AI::MachineLearning;`

LearningModelSession is cleaned up to use the adapter, and parts of b… (#2382) this is a big PR. we are going to move it up to layer_dev , which is still a L3 so we are still safe to do work there agile. we are going to move this into the L3 so that ryan can start doing intergration testing. we will pause for a full code review and integration test result prior to going into the L2. >>>> raw comments from previous commits >>> * LearningModelSession is cleaned up to use the adapter, and parts of binding are. * moved everything in the winmladapter made it all nano-com using, WRL to construct objects in the ORT side. base interfaces for everythign for winml to call cleaned up a bunch of winml to use the base interfaces. * more pieces * GetData across the abi. * renamed some namepsace cleaned up OrtValue cleaned up Tensor cleaned up custom ops. everything but learnignmodel should be clean * make sure it's building. winml.dll is still a monolith. 2019-11-15 01:44:07 +00:00			`namespace Windows::AI::MachineLearning::Adapter {`

Initial Commit 2019-08-15 22:27:05 +00:00			`DmlOrtSessionBuilder::DmlOrtSessionBuilder(`
LearningModelSession is cleaned up to use the adapter, and parts of b… (#2382) this is a big PR. we are going to move it up to layer_dev , which is still a L3 so we are still safe to do work there agile. we are going to move this into the L3 so that ryan can start doing intergration testing. we will pause for a full code review and integration test result prior to going into the L2. >>>> raw comments from previous commits >>> * LearningModelSession is cleaned up to use the adapter, and parts of binding are. * moved everything in the winmladapter made it all nano-com using, WRL to construct objects in the ORT side. base interfaces for everythign for winml to call cleaned up a bunch of winml to use the base interfaces. * more pieces * GetData across the abi. * renamed some namepsace cleaned up OrtValue cleaned up Tensor cleaned up custom ops. everything but learnignmodel should be clean * make sure it's building. winml.dll is still a monolith. 2019-11-15 01:44:07 +00:00			`ID3D12Device* device,`
			`ID3D12CommandQueue* queue){`
more snipping to get core into ort 2019-11-08 21:23:44 +00:00			`device_.copy_from(device);`
			`queue_.copy_from(queue);`
			`}`
Initial Commit 2019-08-15 22:27:05 +00:00
			`HRESULT`
			`DmlOrtSessionBuilder::CreateSessionOptions(`
Handle exception thrown from all apis in WinMLAdapter (#2539) 2019-12-04 22:08:16 +00:00			`OrtSessionOptions** options) try {`
Moved SessionOptions over to the abi 2019-11-20 02:15:47 +00:00			`RETURN_HR_IF_NULL(E_POINTER, options);`
Initial Commit 2019-08-15 22:27:05 +00:00
Moved SessionOptions over to the abi 2019-11-20 02:15:47 +00:00			`Ort::ThrowOnError(Ort::GetApi().CreateSessionOptions(options));`
			`std::unique_ptr<Ort::SessionOptions> session_options = std::make_unique<Ort::SessionOptions>(*options);`
Layer dev paulm (#2423) * model moved over. everything builds clean. step ! * weak ref comment * added a wrapper for RoGetActivationFactory to hook back into winml for creating winml objects. fixes model load. * fixed some lifetime management. fixed the debug build. squeezenet passes using winmlrunner for CPU and GPU 2019-11-18 17:51:39 +00:00
Moved SessionOptions over to the abi 2019-11-20 02:15:47 +00:00			`// set the graph optimization level to all (used to be called level 3)`
			`session_options->SetGraphOptimizationLevel(GraphOptimizationLevel::ORT_ENABLE_ALL);`
Initial Commit 2019-08-15 22:27:05 +00:00
			`// Disable the mem pattern session option for DML. It will cause problems with how memory is allocated.`
Moved SessionOptions over to the abi 2019-11-20 02:15:47 +00:00			`session_options->DisableMemPattern();`
Initial Commit 2019-08-15 22:27:05 +00:00
Moved SessionOptions over to the abi 2019-11-20 02:15:47 +00:00			`// all done with the smart ptr`
			`session_options.release();`
Initial Commit 2019-08-15 22:27:05 +00:00			`return S_OK;`
			`}`
Handle exception thrown from all apis in WinMLAdapter (#2539) 2019-12-04 22:08:16 +00:00			`WINML_CATCH_ALL_COM`
Initial Commit 2019-08-15 22:27:05 +00:00
			`static HRESULT`
			`RegisterCustomRegistry(`
			`onnxruntime::InferenceSession* p_session,`
			`IMLOperatorRegistry* registry) {`
			`if (registry != nullptr) {`
			`RETURN_HR_IF_NULL(E_POINTER, p_session);`

			`auto custom_registries = GetLotusCustomRegistries(registry);`

			`// Register`
			`for (auto& custom_registry : custom_registries) {`
Task 23998197: add winml_lib_core into onnnxruntime.dll (#2368) * Task 23998197: add winml_lib_core into onnnxruntime.dll * PR feedback build break on perf_test 2019-11-11 22:34:19 +00:00			`ORT_THROW_IF_ERROR(p_session->RegisterCustomRegistry(custom_registry));`
Initial Commit 2019-08-15 22:27:05 +00:00			`}`
			`}`

			`return S_OK;`
			`}`

			`Microsoft::WRL::ComPtr<IDMLDevice> CreateDmlDevice(ID3D12Device* d3d12Device) {`
			`// Dynamically load DML to avoid WinML taking a static dependency on DirectML.dll`
			`wil::unique_hmodule dmlDll(LoadLibraryW(L"DirectML.dll"));`
			`THROW_LAST_ERROR_IF(!dmlDll);`

			`auto dmlCreateDevice1Fn = reinterpret_cast<decltype(&DMLCreateDevice1)>(`
			`GetProcAddress(dmlDll.get(), "DMLCreateDevice1"));`
			`THROW_LAST_ERROR_IF(!dmlCreateDevice1Fn);`

			`DML_CREATE_DEVICE_FLAGS dmlFlags = DML_CREATE_DEVICE_FLAG_NONE;`

			`// Enable the DML debug layer in DEBUG builds, if the D3D12 debug layer is also enabled`
			`#if _DEBUG`
			`Microsoft::WRL::ComPtr<ID3D12DebugDevice> d3d12DebugDevice;`
			`if (SUCCEEDED(d3d12Device->QueryInterface(IID_PPV_ARGS(&d3d12DebugDevice)))) {`
			`d3d12DebugDevice = nullptr;`
			`dmlFlags \|= DML_CREATE_DEVICE_FLAG_DEBUG;`
			`}`
			`#endif`

			`Microsoft::WRL::ComPtr<IDMLDevice> dmlDevice;`
			`THROW_IF_FAILED(dmlCreateDevice1Fn(d3d12Device, dmlFlags, DML_FEATURE_LEVEL_2_0, IID_PPV_ARGS(&dmlDevice)));`

			`// Keep DirectML.dll loaded by leaking the handle. This is equivalent behavior to if we delay-loaded the DLL.`
			`dmlDll.release();`

			`return dmlDevice;`
			`}`

			`HRESULT DmlOrtSessionBuilder::CreateSession(`
Moved SessionOptions over to the abi 2019-11-20 02:15:47 +00:00			`OrtSessionOptions* options,`
Layer dev paulm (#2507) * commetns for dml graph transformer fixed ort value passing using the allocatir info * fixed and coded maps and sequences across the abi * cleaned up w4's cleaned up the model info ABI delayload directml.dll from winml * cleaned up namepsace aliases. renamed _winmla to winmla this was good PR feedback from tiago a while back. 2019-11-27 23:50:49 +00:00			`winmla::IInferenceSession** p_session,`
Handle exception thrown from all apis in WinMLAdapter (#2539) 2019-12-04 22:08:16 +00:00			`onnxruntime::IExecutionProvider** pp_provider) try {`
Initial Commit 2019-08-15 22:27:05 +00:00			`RETURN_HR_IF_NULL(E_POINTER, p_session);`
			`RETURN_HR_IF_NULL(E_POINTER, pp_provider);`
			`RETURN_HR_IF(E_POINTER, *pp_provider != nullptr);`

more snipping to get core into ort 2019-11-08 21:23:44 +00:00			`auto p_d3d_device = device_.get();`
			`auto p_queue = queue_.get();`
Initial Commit 2019-08-15 22:27:05 +00:00
			`Microsoft::WRL::ComPtr<IDMLDevice> dmlDevice = CreateDmlDevice(p_d3d_device);`

			`std::unique_ptr<onnxruntime::IExecutionProvider> gpu_provider = Dml::CreateExecutionProvider(dmlDevice.Get(), p_queue);`
Moved SessionOptions over to the abi 2019-11-20 02:15:47 +00:00			`auto session = std::make_unique<onnxruntime::InferenceSession>(options->value);`
Initial Commit 2019-08-15 22:27:05 +00:00
			`// Cache the provider's raw pointer`
			`*pp_provider = gpu_provider.get();`

Task 23998197: add winml_lib_core into onnnxruntime.dll (#2368) * Task 23998197: add winml_lib_core into onnnxruntime.dll * PR feedback build break on perf_test 2019-11-11 22:34:19 +00:00			`ORT_THROW_IF_ERROR(session->RegisterExecutionProvider(std::move(gpu_provider)));`
Initial Commit 2019-08-15 22:27:05 +00:00
LearningModelSession is cleaned up to use the adapter, and parts of b… (#2382) this is a big PR. we are going to move it up to layer_dev , which is still a L3 so we are still safe to do work there agile. we are going to move this into the L3 so that ryan can start doing intergration testing. we will pause for a full code review and integration test result prior to going into the L2. >>>> raw comments from previous commits >>> * LearningModelSession is cleaned up to use the adapter, and parts of binding are. * moved everything in the winmladapter made it all nano-com using, WRL to construct objects in the ORT side. base interfaces for everythign for winml to call cleaned up a bunch of winml to use the base interfaces. * more pieces * GetData across the abi. * renamed some namepsace cleaned up OrtValue cleaned up Tensor cleaned up custom ops. everything but learnignmodel should be clean * make sure it's building. winml.dll is still a monolith. 2019-11-15 01:44:07 +00:00			`// assign the session to the out parameter`
Layer dev paulm (#2507) * commetns for dml graph transformer fixed ort value passing using the allocatir info * fixed and coded maps and sequences across the abi * cleaned up w4's cleaned up the model info ABI delayload directml.dll from winml * cleaned up namepsace aliases. renamed _winmla to winmla this was good PR feedback from tiago a while back. 2019-11-27 23:50:49 +00:00			`auto sessionptr = wil::MakeOrThrow<winmla::InferenceSession>(session.release());`
			`RETURN_IF_FAILED(sessionptr.CopyTo(_uuidof(winmla::IInferenceSession), (void**)p_session));`
Initial Commit 2019-08-15 22:27:05 +00:00
			`return S_OK;`
			`}`
Handle exception thrown from all apis in WinMLAdapter (#2539) 2019-12-04 22:08:16 +00:00			`WINML_CATCH_ALL_COM`
Initial Commit 2019-08-15 22:27:05 +00:00
			`HRESULT DmlOrtSessionBuilder::Initialize(`
Layer dev paulm (#2507) * commetns for dml graph transformer fixed ort value passing using the allocatir info * fixed and coded maps and sequences across the abi * cleaned up w4's cleaned up the model info ABI delayload directml.dll from winml * cleaned up namepsace aliases. renamed _winmla to winmla this was good PR feedback from tiago a while back. 2019-11-27 23:50:49 +00:00			`winmla::IInferenceSession* p_session,`
Handle exception thrown from all apis in WinMLAdapter (#2539) 2019-12-04 22:08:16 +00:00			`onnxruntime::IExecutionProvider* p_provider) try {`
Initial Commit 2019-08-15 22:27:05 +00:00			`RETURN_HR_IF_NULL(E_INVALIDARG, p_session);`
			`RETURN_HR_IF_NULL(E_INVALIDARG, p_provider);`

			`// OnnxRuntime uses the default rounding mode when calling the session's allocator.`
			`// During initialization, OnnxRuntime allocates weights, which are permanent across session`
			`// lifetime and can be large, so shouldn't be rounded.`
			`Dml::SetDefaultRoundingMode(p_provider, AllocatorRoundingMode::Disabled);`

LearningModelSession is cleaned up to use the adapter, and parts of b… (#2382) this is a big PR. we are going to move it up to layer_dev , which is still a L3 so we are still safe to do work there agile. we are going to move this into the L3 so that ryan can start doing intergration testing. we will pause for a full code review and integration test result prior to going into the L2. >>>> raw comments from previous commits >>> * LearningModelSession is cleaned up to use the adapter, and parts of binding are. * moved everything in the winmladapter made it all nano-com using, WRL to construct objects in the ORT side. base interfaces for everythign for winml to call cleaned up a bunch of winml to use the base interfaces. * more pieces * GetData across the abi. * renamed some namepsace cleaned up OrtValue cleaned up Tensor cleaned up custom ops. everything but learnignmodel should be clean * make sure it's building. winml.dll is still a monolith. 2019-11-15 01:44:07 +00:00			`ORT_THROW_IF_ERROR(p_session->get()->Initialize());`
Initial Commit 2019-08-15 22:27:05 +00:00
			`Dml::SetDefaultRoundingMode(p_provider, AllocatorRoundingMode::Enabled);`

			`// Flush the D3D12 work from the DML execution provider`
			`Dml::FlushContext(p_provider);`

			`return S_OK;`
LearningModelSession is cleaned up to use the adapter, and parts of b… (#2382) this is a big PR. we are going to move it up to layer_dev , which is still a L3 so we are still safe to do work there agile. we are going to move this into the L3 so that ryan can start doing intergration testing. we will pause for a full code review and integration test result prior to going into the L2. >>>> raw comments from previous commits >>> * LearningModelSession is cleaned up to use the adapter, and parts of binding are. * moved everything in the winmladapter made it all nano-com using, WRL to construct objects in the ORT side. base interfaces for everythign for winml to call cleaned up a bunch of winml to use the base interfaces. * more pieces * GetData across the abi. * renamed some namepsace cleaned up OrtValue cleaned up Tensor cleaned up custom ops. everything but learnignmodel should be clean * make sure it's building. winml.dll is still a monolith. 2019-11-15 01:44:07 +00:00			`}`
Handle exception thrown from all apis in WinMLAdapter (#2539) 2019-12-04 22:08:16 +00:00			`WINML_CATCH_ALL_COM`
LearningModelSession is cleaned up to use the adapter, and parts of b… (#2382) this is a big PR. we are going to move it up to layer_dev , which is still a L3 so we are still safe to do work there agile. we are going to move this into the L3 so that ryan can start doing intergration testing. we will pause for a full code review and integration test result prior to going into the L2. >>>> raw comments from previous commits >>> * LearningModelSession is cleaned up to use the adapter, and parts of binding are. * moved everything in the winmladapter made it all nano-com using, WRL to construct objects in the ORT side. base interfaces for everythign for winml to call cleaned up a bunch of winml to use the base interfaces. * more pieces * GetData across the abi. * renamed some namepsace cleaned up OrtValue cleaned up Tensor cleaned up custom ops. everything but learnignmodel should be clean * make sure it's building. winml.dll is still a monolith. 2019-11-15 01:44:07 +00:00
Layer dev paulm (#2533) * commetns for dml graph transformer fixed ort value passing using the allocatir info * fixed and coded maps and sequences across the abi * cleaned up w4's cleaned up the model info ABI delayload directml.dll from winml * cleaned up namepsace aliases. renamed _winmla to winmla this was good PR feedback from tiago a while back. * moved files from inc to lib\api.core cleaned up some of the cmake * staged changes 2019-12-03 23:31:22 +00:00			`} // Windows::AI::MachineLearning::Adapter`

			`#endif USE_DML`