mirror of
https://github.com/saymrwulf/onnxruntime.git
synced 2026-05-19 21:32:23 +00:00
### Description This PR is to refactor ExecutionProvider API for memory management, which is to move allocators from EP level to SessionState level and indexed by OrtDevice ### Motivation and Context <!-- - Why is this change required? What problem does it solve? - If it fixes an open issue, please link to the issue here. --> This PR is to refactor ExecutionProvider API for memory management, which is to move allocators from EP level to SessionState level and indexed by OrtDevice. By this change, EP level will shift the burden of maintaining allocators, which will be user friendly for EP developers --------- Co-authored-by: Lei Cao <leca@microsoft.com@orttrainingdev8.d32nl1ml4oruzj4qz3bqlggovf.px.internal.cloudapp.net>
348 lines
12 KiB
C++
348 lines
12 KiB
C++
// Copyright (c) Microsoft Corporation. All rights reserved.
|
|
// Licensed under the MIT License.
|
|
#include "core/graph/onnx_protobuf.h"
|
|
|
|
#include "core/session/inference_session.h"
|
|
|
|
#include <algorithm>
|
|
#include <functional>
|
|
#include <iterator>
|
|
#include <thread>
|
|
|
|
#include "core/common/logging/logging.h"
|
|
#include "core/framework/customregistry.h"
|
|
#include "core/framework/execution_provider.h"
|
|
#include "core/framework/op_kernel.h"
|
|
#include "core/framework/session_state.h"
|
|
#include "core/graph/graph_viewer.h"
|
|
#include "core/graph/model.h"
|
|
#include "core/graph/op.h"
|
|
#include "core/graph/schema_registry.h"
|
|
#include "core/providers/cpu/cpu_execution_provider.h"
|
|
#include "core/providers/cpu/math/element_wise_ops.h"
|
|
#include "core/framework/tensorprotoutils.h"
|
|
#include "test/capturing_sink.h"
|
|
#include "test/test_environment.h"
|
|
#include "test/util/include/asserts.h"
|
|
#include "test_utils.h"
|
|
#include "gtest/gtest.h"
|
|
|
|
using namespace ONNX_NAMESPACE;
|
|
using namespace onnxruntime::common;
|
|
|
|
namespace onnxruntime {
|
|
namespace test {
|
|
|
|
// Foo kernel which is doing Add
|
|
template <typename T>
|
|
class FooKernel : public OpKernel {
|
|
public:
|
|
FooKernel(const OpKernelInfo& info) : OpKernel(info) {}
|
|
|
|
Status Compute(OpKernelContext* context) const {
|
|
const auto* X = context->Input<Tensor>(0);
|
|
const auto* W = context->Input<Tensor>(1);
|
|
|
|
auto X_Data = X->Data<T>();
|
|
auto W_Data = W->Data<T>();
|
|
|
|
auto shape = X->Shape().GetDims();
|
|
|
|
auto* Y = context->Output(0, shape);
|
|
auto* Y_Data = Y->MutableData<T>();
|
|
|
|
size_t size = 1;
|
|
for (size_t i = 0; i < shape.size(); i++) {
|
|
size *= shape[i];
|
|
}
|
|
|
|
for (size_t i = 0; i < size; i++) {
|
|
Y_Data[i] = X_Data[i] + W_Data[i];
|
|
}
|
|
|
|
return Status::OK();
|
|
}
|
|
};
|
|
|
|
ONNX_NAMESPACE::OpSchema GetFooSchema() {
|
|
ONNX_NAMESPACE::OpSchema schema("Foo", "unknown", 0);
|
|
schema.SetDomain("test");
|
|
schema.Input(0,
|
|
"A",
|
|
"First operand, should share the type with the second operand.",
|
|
"T");
|
|
schema.Input(
|
|
1,
|
|
"B",
|
|
"Second operand. With broadcasting can be of smaller size than A. "
|
|
"If broadcasting is disabled it should be of the same size.",
|
|
"T");
|
|
schema.Output(0, "C", "Result, has same dimensions and type as A", "T");
|
|
schema.TypeConstraint(
|
|
"T",
|
|
OpSchema::numeric_types_for_math_reduction(),
|
|
"Constrain input and output types to high-precision numeric tensors.");
|
|
schema.SinceVersion(1);
|
|
return schema;
|
|
}
|
|
|
|
KernelDefBuilder FooKernelDef() {
|
|
KernelDefBuilder def;
|
|
def.SetName("Foo")
|
|
.SetDomain("test")
|
|
.SinceVersion(1)
|
|
.Provider(onnxruntime::kCpuExecutionProvider)
|
|
.TypeConstraint("T", DataTypeImpl::GetTensorType<float>());
|
|
return def;
|
|
}
|
|
|
|
Status CreateFooKernel(FuncManager&, const OpKernelInfo& kernel_info, std::unique_ptr<OpKernel>& out) {
|
|
out = std::make_unique<FooKernel<float>>(kernel_info);
|
|
return Status::OK();
|
|
}
|
|
|
|
// kernel with optional outputs
|
|
KernelDefBuilder OptionalKernelDef() {
|
|
KernelDefBuilder def;
|
|
def.SetName("OptionalOp")
|
|
.SetDomain("test")
|
|
.SinceVersion(1)
|
|
.Provider(onnxruntime::kCpuExecutionProvider)
|
|
.TypeConstraint("T", DataTypeImpl::GetTensorType<float>());
|
|
return def;
|
|
}
|
|
|
|
ONNX_NAMESPACE::OpSchema GetOptionalOpSchema() {
|
|
ONNX_NAMESPACE::OpSchema schema("OptionalOp", "unknown", 0);
|
|
schema.SetDomain("test");
|
|
schema.Input(0,
|
|
"X",
|
|
"First operand, should share the type with the second operand.",
|
|
"T");
|
|
schema.Input(
|
|
1,
|
|
"W",
|
|
"Second operand. If provided, add it to the output",
|
|
"T",
|
|
OpSchema::Optional);
|
|
schema.Output(0, "Y", "Result, has same dimensions and type as A", "T");
|
|
schema.Output(1, "Y2", "Result, has same dimensions and type as A", "T", OpSchema::Optional);
|
|
schema.TypeConstraint(
|
|
"T",
|
|
OpSchema::numeric_types_for_math_reduction(),
|
|
"Constrain input and output types to high-precision numeric tensors.");
|
|
schema.SinceVersion(1);
|
|
return schema;
|
|
}
|
|
|
|
template <typename T>
|
|
class OptionalOpKernel : public OpKernel {
|
|
public:
|
|
OptionalOpKernel(const OpKernelInfo& info) : OpKernel(info) {}
|
|
|
|
Status Compute(OpKernelContext* context) const {
|
|
const auto* X = context->Input<Tensor>(0);
|
|
const auto* W = context->Input<Tensor>(1);
|
|
|
|
auto* X_Data = X->Data<T>();
|
|
auto shape = X->Shape().GetDims();
|
|
auto* Y = context->Output(0, shape);
|
|
auto* Y_Data = Y->MutableData<T>();
|
|
size_t size = 1;
|
|
for (size_t i = 0; i < shape.size(); i++) {
|
|
size *= shape[i];
|
|
}
|
|
|
|
for (size_t i = 0; i < size; i++) {
|
|
Y_Data[i] = X_Data[i];
|
|
}
|
|
|
|
auto* Y2 = context->Output(1, shape);
|
|
// Y2 is used or not
|
|
if (Y2) {
|
|
auto Y2_Data = Y2->MutableData<T>();
|
|
for (size_t i = 0; i < size; i++) {
|
|
Y2_Data[i] = X_Data[i];
|
|
}
|
|
}
|
|
|
|
// W is used or not
|
|
if (W) {
|
|
auto* W_Data = W->Data<T>();
|
|
for (size_t i = 0; i < size; i++) {
|
|
Y_Data[i] += W_Data[i];
|
|
}
|
|
if (Y2) {
|
|
auto* Y2_Data = Y2->MutableData<T>();
|
|
for (size_t i = 0; i < size; i++) {
|
|
Y2_Data[i] += W_Data[i];
|
|
}
|
|
}
|
|
}
|
|
|
|
return Status::OK();
|
|
}
|
|
};
|
|
|
|
Status CreateOptionalOpKernel(FuncManager&, const OpKernelInfo& kernel_info, std::unique_ptr<OpKernel>& out) {
|
|
out = std::make_unique<OptionalOpKernel<float>>(kernel_info);
|
|
return Status::OK();
|
|
}
|
|
|
|
static const std::string MUL_MODEL_URI = "testdata/mul_1.onnx";
|
|
static const std::string FOO_MODEL_URI = "testdata/foo_1.onnx";
|
|
static const std::string FOO_CLIP_MODEL_URI = "testdata/foo_1_clip_11.onnx";
|
|
static const std::string OPTIONAL_MODEL1_URI = "testdata/optional_1.onnx";
|
|
|
|
void RunSession(InferenceSession& session_object,
|
|
std::vector<int64_t>& dims_x,
|
|
std::vector<float>& values_x,
|
|
std::vector<int64_t>& dims_y,
|
|
std::vector<float>& values_y) {
|
|
// prepare inputs
|
|
OrtValue ml_value;
|
|
CreateMLValue<float>(TestCPUExecutionProvider()->CreatePreferredAllocators()[0], dims_x, values_x, &ml_value);
|
|
NameMLValMap feeds;
|
|
feeds.insert(std::make_pair("X", ml_value));
|
|
|
|
// prepare outputs
|
|
std::vector<std::string> output_names;
|
|
output_names.push_back("Y");
|
|
std::vector<OrtValue> fetches;
|
|
|
|
// Now run
|
|
EXPECT_STATUS_OK(session_object.Run(RunOptions{}, feeds, output_names, &fetches));
|
|
ASSERT_EQ(1u, fetches.size());
|
|
auto& rtensor = fetches.front().Get<Tensor>();
|
|
TensorShape expected_shape(dims_y);
|
|
EXPECT_EQ(expected_shape, rtensor.Shape());
|
|
const std::vector<float> found(rtensor.Data<float>(), rtensor.Data<float>() + expected_shape.Size());
|
|
ASSERT_EQ(values_y, found);
|
|
}
|
|
|
|
// This tests that a custom op can override an ONNX operator implemented by ORT.
|
|
TEST(CustomKernelTests, CustomKernelWithBuiltInSchema) {
|
|
SessionOptions so;
|
|
so.session_logid = "CustomKernelWithBuiltInSchema";
|
|
|
|
// Register a custom kernel that matches the ONNX Mul but is implemented to do an Add so we can validate the
|
|
// custom kernel overrides the ORT Mul kernel
|
|
KernelDefBuilder def;
|
|
def.SetName("Mul")
|
|
.SetDomain(onnxruntime::kOnnxDomain)
|
|
.SinceVersion(7)
|
|
.Provider(onnxruntime::kCpuExecutionProvider)
|
|
.TypeConstraint("T", DataTypeImpl::GetTensorType<float>());
|
|
|
|
std::shared_ptr<CustomRegistry> registry = std::make_shared<CustomRegistry>();
|
|
EXPECT_STATUS_OK(registry->RegisterCustomKernel(def, CreateFooKernel));
|
|
|
|
InferenceSession session_object{so, GetEnvironment()};
|
|
EXPECT_STATUS_OK(session_object.RegisterCustomRegistry(registry));
|
|
EXPECT_STATUS_OK(session_object.Load(MUL_MODEL_URI));
|
|
EXPECT_STATUS_OK(session_object.Initialize());
|
|
|
|
// prepare inputs
|
|
std::vector<int64_t> dims_x = {3, 2};
|
|
std::vector<float> values_x = {1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f};
|
|
|
|
// prepare expected inputs and outputs
|
|
std::vector<int64_t> expected_dims_y = {3, 2};
|
|
// now the expected value should be Add's result.
|
|
std::vector<float> expected_values_y = {2.0f, 4.0f, 6.0f, 8.0f, 10.0f, 12.0f};
|
|
|
|
// Now run
|
|
RunSession(session_object, dims_x, values_x, expected_dims_y, expected_values_y);
|
|
}
|
|
|
|
// Test registering a custom kernel with custom schema
|
|
TEST(CustomKernelTests, CustomKernelWithCustomSchema) {
|
|
SessionOptions so;
|
|
|
|
so.session_logid = "CustomKernelWithCustomSchema";
|
|
|
|
// register foo schema
|
|
std::shared_ptr<CustomRegistry> registry = std::make_shared<CustomRegistry>();
|
|
std::vector<OpSchema> schemas = {GetFooSchema()};
|
|
auto def = FooKernelDef();
|
|
|
|
EXPECT_STATUS_OK(registry->RegisterOpSet(schemas, "test", 1, 1000));
|
|
EXPECT_STATUS_OK(registry->RegisterCustomKernel(def, CreateFooKernel));
|
|
|
|
InferenceSession session_object{so, GetEnvironment()};
|
|
EXPECT_STATUS_OK(session_object.RegisterCustomRegistry(registry));
|
|
EXPECT_STATUS_OK(session_object.Load(FOO_MODEL_URI));
|
|
EXPECT_STATUS_OK(session_object.Initialize());
|
|
|
|
// prepare inputs
|
|
std::vector<int64_t> dims_x = {3, 2};
|
|
std::vector<float> values_x = {1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f};
|
|
|
|
// prepare expected inputs and outputs
|
|
std::vector<int64_t> expected_dims_y = {3, 2};
|
|
// now the expected value should be Add's result.
|
|
std::vector<float> expected_values_y = {2.0f, 4.0f, 6.0f, 8.0f, 10.0f, 12.0f};
|
|
|
|
// Now run
|
|
RunSession(session_object, dims_x, values_x, expected_dims_y, expected_values_y);
|
|
}
|
|
|
|
TEST(CustomKernelTests, CustomKernelWithOptionalOutput) {
|
|
SessionOptions so;
|
|
so.session_logid = "CustomKernelWithOptionalOutput";
|
|
|
|
// register optional schema
|
|
std::shared_ptr<CustomRegistry> registry = std::make_shared<CustomRegistry>();
|
|
std::vector<OpSchema> schemas = {GetOptionalOpSchema()};
|
|
auto def = OptionalKernelDef();
|
|
|
|
EXPECT_STATUS_OK(registry->RegisterOpSet(schemas, "test", 1, 1000));
|
|
EXPECT_STATUS_OK(registry->RegisterCustomKernel(def, CreateOptionalOpKernel));
|
|
|
|
InferenceSession session_object{so, GetEnvironment()};
|
|
EXPECT_STATUS_OK(session_object.RegisterCustomRegistry(registry));
|
|
EXPECT_STATUS_OK(session_object.Load(OPTIONAL_MODEL1_URI));
|
|
EXPECT_STATUS_OK(session_object.Initialize());
|
|
|
|
// prepare inputs
|
|
std::vector<int64_t> dims_x = {3, 2};
|
|
std::vector<float> values_x = {1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f};
|
|
|
|
// prepare expected inputs and outputs
|
|
std::vector<int64_t> expected_dims_y = {3, 2};
|
|
// now the expected value should be equal result.
|
|
std::vector<float> expected_values_y = {1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f};
|
|
|
|
// Now run
|
|
RunSession(session_object, dims_x, values_x, expected_dims_y, expected_values_y);
|
|
}
|
|
|
|
// Regression test for OnnxRuntimeOpSchemaRegistry::GetSchemaAndHistory needing to reset `version` before
|
|
// falling through to the ONNX schema lookup.
|
|
//
|
|
// If there is a custom registry that matches the ONNX domain but not the current op, we fall though but need to
|
|
// use the original opset version and ignore any version values found in the custom registry.
|
|
//
|
|
// If we regress we will match Clip(1) which only had one input. The model uses Clip(11) and has two inputs. The ONNX
|
|
// checker will fail if this happens.
|
|
TEST(CustomKernelTests, CustomOnnxKernelSchemaLookup) {
|
|
SessionOptions so;
|
|
so.session_logid = "CustomOnnxKernelSchemaLookup";
|
|
|
|
auto schema = GetFooSchema();
|
|
auto def = FooKernelDef();
|
|
schema.SetDomain(onnxruntime::kOnnxDomain);
|
|
def.SetDomain(onnxruntime::kOnnxDomain);
|
|
|
|
std::vector<OpSchema> schemas = {schema};
|
|
std::shared_ptr<CustomRegistry> registry = std::make_shared<CustomRegistry>();
|
|
EXPECT_STATUS_OK(registry->RegisterOpSet(schemas, onnxruntime::kOnnxDomain, 1, 1000));
|
|
EXPECT_STATUS_OK(registry->RegisterCustomKernel(def, CreateFooKernel));
|
|
|
|
InferenceSession session_object{so, GetEnvironment()};
|
|
EXPECT_STATUS_OK(session_object.RegisterCustomRegistry(registry));
|
|
EXPECT_STATUS_OK(session_object.Load(FOO_CLIP_MODEL_URI));
|
|
EXPECT_STATUS_OK(session_object.Initialize());
|
|
}
|
|
} // namespace test
|
|
} // namespace onnxruntime
|