onnxruntime/onnxruntime/test/framework/execution_frame_test.cc

264 lines
12 KiB
C++
Raw Normal View History

2018-11-20 00:48:22 +00:00
// Copyright (c) Microsoft Corporation. All rights reserved.
// Licensed under the MIT License.
#include "core/framework/execution_frame.h"
#include "core/framework/op_kernel.h"
#include "core/framework/session_state.h"
#include "core/graph/model.h"
#include "core/providers/cpu/cpu_execution_provider.h"
#include "test_utils.h"
#include "gtest/gtest.h"
using namespace ONNX_NAMESPACE;
using namespace std;
namespace onnxruntime {
namespace test {
typedef std::vector<onnxruntime::NodeArg*> ArgMap;
std::shared_ptr<onnxruntime::Model> DummyGraphWithClip() {
auto model = std::make_shared<onnxruntime::Model>("test");
onnxruntime::Graph& graph = model->MainGraph();
TypeProto tensor_float;
tensor_float.mutable_tensor_type()->set_elem_type(TensorProto_DataType_FLOAT);
onnxruntime::NodeArg input_def("X", &tensor_float), output_def("Y", &tensor_float);
graph.AddNode("node1", "Clip", "clip operator", ArgMap{&input_def}, ArgMap{&output_def});
return model;
}
std::unique_ptr<IExecutionProvider> CreateCPUExecutionProvider() {
CPUExecutionProviderInfo info;
return std::make_unique<CPUExecutionProvider>(info);
}
TEST(ExecutionFrameTest, TensorAllocationTest) {
onnxruntime::Model model("test");
onnxruntime::Graph& graph = model.MainGraph();
TypeProto tensor_float;
tensor_float.mutable_tensor_type()->set_elem_type(TensorProto_DataType_FLOAT);
onnxruntime::NodeArg input_def("X", &tensor_float), output_def("Y", &tensor_float);
graph.AddNode("node1", "Clip", "Clip operator", ArgMap{&input_def}, ArgMap{&output_def});
onnxruntime::Node* node = graph.GetNode(graph.NumberOfNodes() - 1);
Status status = graph.Resolve();
EXPECT_TRUE(status.IsOK()) << status.ErrorMessage();
auto cpu_xp = CreateCPUExecutionProvider();
auto xp_typ = cpu_xp->Type();
ExecutionProviders execution_providers;
execution_providers.Add(xp_typ, std::move(cpu_xp));
2019-02-20 19:57:36 +00:00
KernelRegistryManager kernel_registry_manager;
status = kernel_registry_manager.RegisterKernels(execution_providers);
EXPECT_TRUE(status.IsOK()) << status.ErrorMessage();
2018-11-20 00:48:22 +00:00
SessionState state{execution_providers, true};
2018-11-20 00:48:22 +00:00
state.SetGraphViewer(std::make_unique<GraphViewer>(graph));
2019-06-04 00:29:55 +00:00
OrtValueNameIdxMap& mlvalue_name_idx_map{state.GetOrtValueNameIdxMap()};
2018-11-20 00:48:22 +00:00
mlvalue_name_idx_map.Add("X");
mlvalue_name_idx_map.Add("Y");
node->SetExecutionProviderType(xp_typ);
std::unique_ptr<SequentialExecutionPlan> p_seq_exec_plan;
// TODO below line is for testing only. In production use SequentialPlanner::CreatePlan()
SequentialPlannerContext context(false);
status = SequentialPlanner::CreatePlan(nullptr, GraphViewer(graph), {}, execution_providers, kernel_registry_manager,
mlvalue_name_idx_map, context, p_seq_exec_plan);
2018-11-20 00:48:22 +00:00
EXPECT_TRUE(status.IsOK()) << status.ErrorMessage();
state.SetExecutionPlan(std::move(p_seq_exec_plan));
state.CalculateNodeIndexInfo();
vector<OrtValue> outputs;
Various optimizations to reduce the setup and device copying cost outside of the call to ExecuteGraph. (#470) * Various optimizations to reduce the setup and execution cost. Cache information about the feeds and fetches, and any device copies required to execute the graph so we minimize checking for later calls to ExecuteGraph using the same input/output. - enable use of caching in Loop and Scan - make use of caching optional for InferenceSession::Run - handle calls to Run with different feeds and fetches to support scenarios where there may be a truncated sequence in some calls Take the feed names and MLValue instances as vectors so the order is deterministic. Add unit tests Update onnxruntime_perf_test to enable caching. * Couple of tweaks. Fix shared library unit test failure. Attempt to workaround MacOS build failure due to VC++ bug around including reaching scope values in a lambda automatically. * Rework order of init in Run so we get nice error messages about invalid feed/output names. * Refine logic around copying MLValue using execution provider so common code can be used. Simplify the logic due to this change. Split the paths for executing with/without cached info so we can be more const correct with how FeedsFetchesManager is passed in. This makes it clearer when a shared instance can be used due to it being const. Cache the FeedsFetchesManager instances in the control flow nodes. They can be re-used across calls to Compute. * Removed unused local variable to fix some builds. * Fix build issue by cleaning up some more unused params. * Check names when using cache entry from SessionState. Add unit test.
2019-02-20 02:12:17 +00:00
ExecutionFrame frame({}, {}, {}, outputs, {}, state);
2018-11-20 00:48:22 +00:00
int start_index = frame.GetNodeOffset(node->Index());
2018-11-20 00:48:22 +00:00
EXPECT_EQ(start_index, 0);
TensorShape shape(std::vector<int64_t>{2, 3});
OrtValue& mlvalue0 = *frame.GetMutableNodeInputOrOutputMLValue(start_index);
status = frame.AllocateMLValueTensorSelfOwnBuffer(mlvalue0, start_index, DataTypeImpl::GetType<float>(),
execution_providers.Get(xp_typ)->GetAllocator(0, OrtMemTypeDefault)->Info(), shape);
2018-11-20 00:48:22 +00:00
EXPECT_TRUE(status.IsOK()) << status.ErrorMessage();
OrtValue* p_ml_value = frame.GetMutableNodeInputOrOutputMLValue(0);
2018-11-20 00:48:22 +00:00
Tensor* p_tensor = p_ml_value ? p_ml_value->GetMutable<Tensor>() : nullptr;
EXPECT_TRUE(p_tensor);
EXPECT_EQ(p_tensor->Shape(), shape);
EXPECT_EQ(p_tensor->DataType(), DataTypeImpl::GetType<float>());
//test share memory from tensor
TensorShape shape2(std::vector<int64_t>{3, 2});
OrtValue& mlvalue1 = *frame.GetMutableNodeInputOrOutputMLValue(start_index + 1);
status = frame.AllocateMLValueTensorPreAllocateBuffer(mlvalue1,
start_index,
DataTypeImpl::GetType<float>(),
p_tensor->Location(),
shape2);
2018-11-20 00:48:22 +00:00
EXPECT_TRUE(status.IsOK()) << status.ErrorMessage();
const OrtValue* p_ml_value_const = frame.GetNodeInputOrOutputMLValue(1);
2018-11-20 00:48:22 +00:00
auto tensor2 = p_ml_value_const ? &(p_ml_value_const->Get<Tensor>()) : nullptr;
EXPECT_TRUE(tensor2);
EXPECT_EQ(tensor2->Shape(), shape2);
EXPECT_EQ(tensor2->template Data<float>(), p_tensor->template Data<float>());
}
TEST(ExecutionFrameTest, FeedInDataTest) {
onnxruntime::Model model("test");
onnxruntime::Graph& graph = model.MainGraph();
TypeProto tensor_float;
tensor_float.mutable_tensor_type()->set_elem_type(TensorProto_DataType_FLOAT);
onnxruntime::NodeArg input_def("X", &tensor_float), output_def("Y", &tensor_float);
graph.AddNode("node1", "Clip", "Clip operator", ArgMap{&input_def}, ArgMap{&output_def});
graph.Resolve();
auto cpu_allocator = TestCPUExecutionProvider()->GetAllocator(0, OrtMemTypeDefault);
2018-11-20 00:48:22 +00:00
auto element_type = DataTypeImpl::GetType<float>();
TensorShape shape({3, 2});
//create fake ml value with owned buffer.
std::unique_ptr<Tensor> p_tensor = std::make_unique<Tensor>(element_type,
shape,
cpu_allocator);
OrtValue value;
2018-11-20 00:48:22 +00:00
value.Init(p_tensor.release(),
DataTypeImpl::GetType<Tensor>(),
DataTypeImpl::GetType<Tensor>()->GetDeleteFunc());
auto cpu_xp = CreateCPUExecutionProvider();
auto xp_typ = cpu_xp->Type();
KernelRegistryManager kernel_registry_manager;
ExecutionProviders execution_providers;
2019-02-20 19:57:36 +00:00
execution_providers.Add(xp_typ, std::move(cpu_xp));
EXPECT_TRUE(kernel_registry_manager.RegisterKernels(execution_providers).IsOK());
2018-11-20 00:48:22 +00:00
SessionState state{execution_providers, true};
2018-11-20 00:48:22 +00:00
state.SetGraphViewer(std::make_unique<GraphViewer>(graph));
2019-06-04 00:29:55 +00:00
OrtValueNameIdxMap& mlvalue_name_idx_map{state.GetOrtValueNameIdxMap()};
Various optimizations to reduce the setup and device copying cost outside of the call to ExecuteGraph. (#470) * Various optimizations to reduce the setup and execution cost. Cache information about the feeds and fetches, and any device copies required to execute the graph so we minimize checking for later calls to ExecuteGraph using the same input/output. - enable use of caching in Loop and Scan - make use of caching optional for InferenceSession::Run - handle calls to Run with different feeds and fetches to support scenarios where there may be a truncated sequence in some calls Take the feed names and MLValue instances as vectors so the order is deterministic. Add unit tests Update onnxruntime_perf_test to enable caching. * Couple of tweaks. Fix shared library unit test failure. Attempt to workaround MacOS build failure due to VC++ bug around including reaching scope values in a lambda automatically. * Rework order of init in Run so we get nice error messages about invalid feed/output names. * Refine logic around copying MLValue using execution provider so common code can be used. Simplify the logic due to this change. Split the paths for executing with/without cached info so we can be more const correct with how FeedsFetchesManager is passed in. This makes it clearer when a shared instance can be used due to it being const. Cache the FeedsFetchesManager instances in the control flow nodes. They can be re-used across calls to Compute. * Removed unused local variable to fix some builds. * Fix build issue by cleaning up some more unused params. * Check names when using cache entry from SessionState. Add unit test.
2019-02-20 02:12:17 +00:00
auto x_idx = mlvalue_name_idx_map.Add("X");
auto y_idx = mlvalue_name_idx_map.Add("Y");
2018-11-20 00:48:22 +00:00
state.CalculateNodeIndexInfo();
vector<OrtValue> outputs;
Various optimizations to reduce the setup and device copying cost outside of the call to ExecuteGraph. (#470) * Various optimizations to reduce the setup and execution cost. Cache information about the feeds and fetches, and any device copies required to execute the graph so we minimize checking for later calls to ExecuteGraph using the same input/output. - enable use of caching in Loop and Scan - make use of caching optional for InferenceSession::Run - handle calls to Run with different feeds and fetches to support scenarios where there may be a truncated sequence in some calls Take the feed names and MLValue instances as vectors so the order is deterministic. Add unit tests Update onnxruntime_perf_test to enable caching. * Couple of tweaks. Fix shared library unit test failure. Attempt to workaround MacOS build failure due to VC++ bug around including reaching scope values in a lambda automatically. * Rework order of init in Run so we get nice error messages about invalid feed/output names. * Refine logic around copying MLValue using execution provider so common code can be used. Simplify the logic due to this change. Split the paths for executing with/without cached info so we can be more const correct with how FeedsFetchesManager is passed in. This makes it clearer when a shared instance can be used due to it being const. Cache the FeedsFetchesManager instances in the control flow nodes. They can be re-used across calls to Compute. * Removed unused local variable to fix some builds. * Fix build issue by cleaning up some more unused params. * Check names when using cache entry from SessionState. Add unit test.
2019-02-20 02:12:17 +00:00
ExecutionFrame frame({x_idx}, {value}, {y_idx}, outputs, {}, state);
2018-11-20 00:48:22 +00:00
OrtValue* p_ml_value = frame.GetMutableNodeInputOrOutputMLValue(0);
2018-11-20 00:48:22 +00:00
Tensor* p_tensor_arg_0 = p_ml_value ? p_ml_value->GetMutable<Tensor>() : nullptr;
EXPECT_TRUE(p_tensor_arg_0);
EXPECT_EQ(p_tensor_arg_0->Shape(), shape);
EXPECT_EQ(p_tensor_arg_0->DataType(), DataTypeImpl::GetType<float>());
EXPECT_EQ(p_tensor_arg_0->MutableData<float>(), value.GetMutable<Tensor>()->MutableData<float>());
2018-11-20 00:48:22 +00:00
}
TEST(ExecutionFrameTest, MemPatternTest) {
auto cpu_xp = CreateCPUExecutionProvider();
auto xp_type = cpu_xp->Type();
std::unordered_map<std::string, int> domain_to_version;
domain_to_version[onnxruntime::kOnnxDomain] = 7;
onnxruntime::Model model("test", true, ModelMetaData(), IOnnxRuntimeOpSchemaRegistryList(), domain_to_version);
onnxruntime::Graph& graph = model.MainGraph();
TypeProto tensor_float;
tensor_float.mutable_tensor_type()->set_elem_type(TensorProto_DataType_FLOAT);
onnxruntime::NodeArg input_def1("X1", &tensor_float),
input_def2("X2", &tensor_float),
input_def3("X3", &tensor_float),
gemm1_out_def("T1", &tensor_float),
gemm2_out_def("T2", &tensor_float),
clip_out_def("T3", &tensor_float);
graph.AddNode("node1", "MatMul", "gemm1", ArgMap{&input_def1, &input_def2}, ArgMap{&gemm1_out_def})
.SetExecutionProviderType(xp_type);
2018-11-20 00:48:22 +00:00
graph.AddNode("node2", "MatMul", "gemm2", ArgMap{&gemm1_out_def, &input_def3}, ArgMap{&gemm2_out_def})
.SetExecutionProviderType(xp_type);
2018-11-20 00:48:22 +00:00
graph.AddNode("node3", "Clip", "clip1", ArgMap{&gemm2_out_def}, ArgMap{&clip_out_def})
.SetExecutionProviderType(xp_type);
2018-11-20 00:48:22 +00:00
auto status = graph.Resolve();
EXPECT_TRUE(status.IsOK()) << status.ErrorMessage();
KernelRegistryManager kernel_registry_manager;
ExecutionProviders execution_providers;
execution_providers.Add(xp_type, std::move(cpu_xp));
kernel_registry_manager.RegisterKernels(execution_providers);
2018-11-20 00:48:22 +00:00
//1. prepare input
SessionState state{execution_providers, true};
2018-11-20 00:48:22 +00:00
state.SetGraphViewer(std::make_unique<GraphViewer>(graph));
2019-06-04 00:29:55 +00:00
OrtValueNameIdxMap& mlvalue_name_idx_map{state.GetOrtValueNameIdxMap()};
2018-11-20 00:48:22 +00:00
Various optimizations to reduce the setup and device copying cost outside of the call to ExecuteGraph. (#470) * Various optimizations to reduce the setup and execution cost. Cache information about the feeds and fetches, and any device copies required to execute the graph so we minimize checking for later calls to ExecuteGraph using the same input/output. - enable use of caching in Loop and Scan - make use of caching optional for InferenceSession::Run - handle calls to Run with different feeds and fetches to support scenarios where there may be a truncated sequence in some calls Take the feed names and MLValue instances as vectors so the order is deterministic. Add unit tests Update onnxruntime_perf_test to enable caching. * Couple of tweaks. Fix shared library unit test failure. Attempt to workaround MacOS build failure due to VC++ bug around including reaching scope values in a lambda automatically. * Rework order of init in Run so we get nice error messages about invalid feed/output names. * Refine logic around copying MLValue using execution provider so common code can be used. Simplify the logic due to this change. Split the paths for executing with/without cached info so we can be more const correct with how FeedsFetchesManager is passed in. This makes it clearer when a shared instance can be used due to it being const. Cache the FeedsFetchesManager instances in the control flow nodes. They can be re-used across calls to Compute. * Removed unused local variable to fix some builds. * Fix build issue by cleaning up some more unused params. * Check names when using cache entry from SessionState. Add unit test.
2019-02-20 02:12:17 +00:00
auto x1_idx = mlvalue_name_idx_map.Add("X1");
auto x2_idx = mlvalue_name_idx_map.Add("X2");
auto x3_idx = mlvalue_name_idx_map.Add("X3");
2018-11-20 00:48:22 +00:00
mlvalue_name_idx_map.Add("T1");
mlvalue_name_idx_map.Add("T2");
Various optimizations to reduce the setup and device copying cost outside of the call to ExecuteGraph. (#470) * Various optimizations to reduce the setup and execution cost. Cache information about the feeds and fetches, and any device copies required to execute the graph so we minimize checking for later calls to ExecuteGraph using the same input/output. - enable use of caching in Loop and Scan - make use of caching optional for InferenceSession::Run - handle calls to Run with different feeds and fetches to support scenarios where there may be a truncated sequence in some calls Take the feed names and MLValue instances as vectors so the order is deterministic. Add unit tests Update onnxruntime_perf_test to enable caching. * Couple of tweaks. Fix shared library unit test failure. Attempt to workaround MacOS build failure due to VC++ bug around including reaching scope values in a lambda automatically. * Rework order of init in Run so we get nice error messages about invalid feed/output names. * Refine logic around copying MLValue using execution provider so common code can be used. Simplify the logic due to this change. Split the paths for executing with/without cached info so we can be more const correct with how FeedsFetchesManager is passed in. This makes it clearer when a shared instance can be used due to it being const. Cache the FeedsFetchesManager instances in the control flow nodes. They can be re-used across calls to Compute. * Removed unused local variable to fix some builds. * Fix build issue by cleaning up some more unused params. * Check names when using cache entry from SessionState. Add unit test.
2019-02-20 02:12:17 +00:00
auto t3_idx = mlvalue_name_idx_map.Add("T3");
2018-11-20 00:48:22 +00:00
auto cpu_allocator = execution_providers.Get(xp_type)->GetAllocator(0, OrtMemTypeDefault);
2018-11-20 00:48:22 +00:00
OrtValue v1, v2, v3;
2018-11-20 00:48:22 +00:00
CreateMLValue<float>(cpu_allocator,
std::vector<int64_t>{1, 2},
std::vector<float>{1.0f, 1.0f}, &v1);
CreateMLValue<float>(cpu_allocator,
std::vector<int64_t>{2, 2},
std::vector<float>(4, 1.0f), &v2);
CreateMLValue<float>(cpu_allocator,
std::vector<int64_t>{2, 3},
std::vector<float>(6, 1.0f), &v3);
std::unique_ptr<SequentialExecutionPlan> p_seq_exec_plan = std::make_unique<SequentialExecutionPlan>();
SequentialPlannerContext context(false);
status = SequentialPlanner::CreatePlan(nullptr, GraphViewer(graph), {}, execution_providers, kernel_registry_manager,
mlvalue_name_idx_map, context, p_seq_exec_plan);
2018-11-20 00:48:22 +00:00
EXPECT_TRUE(status.IsOK()) << status.ErrorMessage();
state.SetExecutionPlan(std::move(p_seq_exec_plan));
state.CalculateNodeIndexInfo();
vector<OrtValue> outputs;
Various optimizations to reduce the setup and device copying cost outside of the call to ExecuteGraph. (#470) * Various optimizations to reduce the setup and execution cost. Cache information about the feeds and fetches, and any device copies required to execute the graph so we minimize checking for later calls to ExecuteGraph using the same input/output. - enable use of caching in Loop and Scan - make use of caching optional for InferenceSession::Run - handle calls to Run with different feeds and fetches to support scenarios where there may be a truncated sequence in some calls Take the feed names and MLValue instances as vectors so the order is deterministic. Add unit tests Update onnxruntime_perf_test to enable caching. * Couple of tweaks. Fix shared library unit test failure. Attempt to workaround MacOS build failure due to VC++ bug around including reaching scope values in a lambda automatically. * Rework order of init in Run so we get nice error messages about invalid feed/output names. * Refine logic around copying MLValue using execution provider so common code can be used. Simplify the logic due to this change. Split the paths for executing with/without cached info so we can be more const correct with how FeedsFetchesManager is passed in. This makes it clearer when a shared instance can be used due to it being const. Cache the FeedsFetchesManager instances in the control flow nodes. They can be re-used across calls to Compute. * Removed unused local variable to fix some builds. * Fix build issue by cleaning up some more unused params. * Check names when using cache entry from SessionState. Add unit test.
2019-02-20 02:12:17 +00:00
ExecutionFrame frame({x1_idx, x2_idx, x3_idx}, {v1, v2, v3}, {t3_idx}, outputs, {}, state);
2018-11-20 00:48:22 +00:00
OrtValue& mlvalue3 = *frame.GetMutableNodeInputOrOutputMLValue(3);
OrtValue& mlvalue4 = *frame.GetMutableNodeInputOrOutputMLValue(4);
OrtValue& mlvalue5 = *frame.GetMutableNodeInputOrOutputMLValue(5);
status = frame.AllocateMLValueTensorSelfOwnBuffer(mlvalue3, 3,
2018-11-20 00:48:22 +00:00
DataTypeImpl::GetType<float>(),
cpu_allocator->Info(),
TensorShape(std::vector<int64_t>{2, 2}));
EXPECT_TRUE(status.IsOK()) << status.ErrorMessage();
status = frame.AllocateMLValueTensorSelfOwnBuffer(mlvalue4, 4,
2018-11-20 00:48:22 +00:00
DataTypeImpl::GetType<float>(),
cpu_allocator->Info(),
TensorShape(std::vector<int64_t>{2, 3}));
EXPECT_TRUE(status.IsOK()) << status.ErrorMessage();
status = frame.AllocateMLValueTensorSelfOwnBuffer(mlvalue5, 5,
2018-11-20 00:48:22 +00:00
DataTypeImpl::GetType<float>(),
cpu_allocator->Info(),
TensorShape(std::vector<int64_t>{2, 3}));
EXPECT_TRUE(status.IsOK()) << status.ErrorMessage();
MemoryPatternGroup pattern;
status = frame.GeneratePatterns(&pattern);
EXPECT_TRUE(status.IsOK()) << status.ErrorMessage();
EXPECT_EQ(pattern.patterns.size(), pattern.locations.size());
EXPECT_EQ(pattern.patterns.size(), 1);
auto p = pattern.GetPatterns(cpu_allocator->Info());
EXPECT_EQ(p->PeakSize(), 2 * 64); // each allocation is 64-byte aligned
2018-11-20 00:48:22 +00:00
EXPECT_EQ(p->GetBlock(3)->offset_, 0);
EXPECT_EQ(p->GetBlock(4)->offset_, 64);
2018-11-20 00:48:22 +00:00
}
} // namespace test
} // namespace onnxruntime