mirror of
https://github.com/saymrwulf/onnxruntime.git
synced 2026-05-18 21:21:17 +00:00
Create OptimizerExecutionFrame for graph optimization (#526)
* Create OptimizerExecutionFrame for optimizer With this change, optimizer can easily invoke CPU kernels for graph optimization.
This commit is contained in:
parent
290c472839
commit
8a59287c46
7 changed files with 326 additions and 6 deletions
|
|
@ -89,6 +89,11 @@ file(GLOB onnxruntime_test_ir_src
|
|||
"${TEST_SRC_DIR}/ir/*.h"
|
||||
)
|
||||
|
||||
file(GLOB onnxruntime_test_optimizer_src
|
||||
"${TEST_SRC_DIR}/optimizer/*.cc"
|
||||
"${TEST_SRC_DIR}/optimizer/*.h"
|
||||
)
|
||||
|
||||
set(onnxruntime_test_framework_src_patterns
|
||||
"${TEST_SRC_DIR}/framework/*.cc"
|
||||
"${TEST_SRC_DIR}/framework/*.h"
|
||||
|
|
@ -134,9 +139,16 @@ set(onnxruntime_test_ir_libs
|
|||
onnxruntime_common
|
||||
)
|
||||
|
||||
set(onnxruntime_test_optimizer_libs
|
||||
onnxruntime_test_utils
|
||||
onnxruntime_framework
|
||||
onnxruntime_util
|
||||
onnxruntime_graph
|
||||
onnxruntime_common
|
||||
)
|
||||
|
||||
set(onnxruntime_test_framework_libs
|
||||
onnxruntime_test_utils_for_framework
|
||||
onnxruntime_optimizer
|
||||
onnxruntime_framework
|
||||
onnxruntime_util
|
||||
onnxruntime_graph
|
||||
|
|
@ -221,7 +233,7 @@ target_include_directories(onnxruntime_test_utils PUBLIC "${TEST_SRC_DIR}/util/i
|
|||
|
||||
|
||||
if (SingleUnitTestProject)
|
||||
set(all_tests ${onnxruntime_test_common_src} ${onnxruntime_test_ir_src} ${onnxruntime_test_framework_src} ${onnxruntime_test_providers_src})
|
||||
set(all_tests ${onnxruntime_test_common_src} ${onnxruntime_test_ir_src} ${onnxruntime_test_optimizer_src} ${onnxruntime_test_framework_src} ${onnxruntime_test_providers_src})
|
||||
set(all_dependencies ${onnxruntime_test_providers_dependencies} )
|
||||
|
||||
if (onnxruntime_USE_TVM)
|
||||
|
|
@ -267,6 +279,13 @@ else()
|
|||
DEPENDS ${onnxruntime_EXTERNAL_DEPENDENCIES}
|
||||
)
|
||||
|
||||
AddTest(
|
||||
TARGET onnxruntime_test_optimizer
|
||||
SOURCES ${onnxruntime_test_optimizer_src}
|
||||
LIBS ${onnxruntime_test_optimizer_libs}
|
||||
DEPENDS ${onnxruntime_EXTERNAL_DEPENDENCIES}
|
||||
)
|
||||
|
||||
AddTest(
|
||||
TARGET onnxruntime_test_framework
|
||||
SOURCES ${onnxruntime_test_framework_src}
|
||||
|
|
|
|||
|
|
@ -20,7 +20,7 @@ IExecutionFrame::IExecutionFrame(const std::vector<int>& feed_mlvalue_idxs,
|
|||
const std::vector<MLValue>& feeds,
|
||||
const std::unordered_map<int, MLValue>& initializers,
|
||||
const std::vector<int>& fetch_mlvalue_idxs,
|
||||
std::vector<MLValue>& fetches,
|
||||
const std::vector<MLValue>& fetches,
|
||||
const MLValueNameIdxMap& mlvalue_idx_map,
|
||||
const NodeIndexInfo& node_index_info)
|
||||
: node_index_info_{node_index_info}, fetch_mlvalue_idxs_{fetch_mlvalue_idxs} {
|
||||
|
|
@ -163,7 +163,7 @@ bool IExecutionFrame::IsOutput(int mlvalue_idx) const {
|
|||
ExecutionFrame::ExecutionFrame(const std::vector<int>& feed_mlvalue_idxs,
|
||||
const std::vector<MLValue>& feeds,
|
||||
const std::vector<int>& fetch_mlvalue_idxs,
|
||||
std::vector<MLValue>& fetches,
|
||||
const std::vector<MLValue>& fetches,
|
||||
const std::unordered_map<size_t, IExecutor::CustomAllocator>& fetch_allocators,
|
||||
const SessionState& session_state)
|
||||
: IExecutionFrame(feed_mlvalue_idxs, feeds, session_state.GetInitializedTensors(), fetch_mlvalue_idxs, fetches,
|
||||
|
|
|
|||
|
|
@ -29,7 +29,7 @@ class IExecutionFrame {
|
|||
const std::vector<MLValue>& feeds,
|
||||
const std::unordered_map<int, MLValue>& initializers,
|
||||
const std::vector<int>& fetch_mlvalue_idxs,
|
||||
std::vector<MLValue>& fetches,
|
||||
const std::vector<MLValue>& fetches,
|
||||
const MLValueNameIdxMap& mlvalue_idx_map,
|
||||
const NodeIndexInfo& node_index_info);
|
||||
|
||||
|
|
@ -103,7 +103,7 @@ class ExecutionFrame final : public IExecutionFrame {
|
|||
ExecutionFrame(const std::vector<int>& feed_mlvalue_idxs,
|
||||
const std::vector<MLValue>& feeds,
|
||||
const std::vector<int>& fetch_mlvalue_idxs,
|
||||
std::vector<MLValue>& fetches,
|
||||
const std::vector<MLValue>& fetches,
|
||||
// optional custom allocators. key is index in fetches
|
||||
const std::unordered_map<size_t, IExecutor::CustomAllocator>& fetch_allocators,
|
||||
const SessionState& session_state);
|
||||
|
|
|
|||
134
onnxruntime/core/optimizer/optimizer_execution_frame.cc
Normal file
134
onnxruntime/core/optimizer/optimizer_execution_frame.cc
Normal file
|
|
@ -0,0 +1,134 @@
|
|||
|
||||
#include "core/common/common.h"
|
||||
#include "core/common/status.h"
|
||||
#include "core/common/logging/logging.h"
|
||||
#include "core/common/logging/macros.h"
|
||||
#include "core/framework/tensorprotoutils.h"
|
||||
#include "core/framework/data_types.h"
|
||||
#include "core/framework/mldata_type_utils.h"
|
||||
#include "core/framework/kernel_registry.h"
|
||||
#include "core/framework/fuse_nodes_funcs.h"
|
||||
#include "core/optimizer/optimizer_execution_frame.h"
|
||||
|
||||
namespace onnxruntime {
|
||||
|
||||
OptimizerExecutionFrame::Info::Info(const std::vector<const Node*>& nodes,
|
||||
const InitializedTensorSet& initialized_tensor_set) {
|
||||
// Create CPU execution provider
|
||||
// For now, CPU execution provider will be created every time when initilizing Info.
|
||||
// Later, it will be changed to pass by Info ctor.
|
||||
cpu_execution_provider_ = std::make_unique<CPUExecutionProvider>(CPUExecutionProviderInfo());
|
||||
allocator_ptr_ = cpu_execution_provider_->GetAllocator(device_id_, mem_type_);
|
||||
ORT_ENFORCE(allocator_ptr_ != nullptr, "Failed to get allocator for optimizer");
|
||||
|
||||
// Create MLValues related maps
|
||||
auto initialize_maps = [this, &initialized_tensor_set](const NodeArg& arg, size_t /*index*/) -> Status {
|
||||
int idx = mlvalue_name_idx_map_.Add(arg.Name());
|
||||
mlvalue_idx_nodearg_map_[idx] = &arg;
|
||||
|
||||
// Only create MLValue instances for initializers used by an array of nodes.
|
||||
InitializedTensorSet::const_iterator it = initialized_tensor_set.find(arg.Name());
|
||||
if (it != initialized_tensor_set.cend()) {
|
||||
MLValue mlvalue;
|
||||
utils::TensorProtoToMLValue(*(it->second), allocator_ptr_, nullptr, 0, mlvalue);
|
||||
initializers_[idx] = mlvalue;
|
||||
}
|
||||
|
||||
return Status::OK();
|
||||
};
|
||||
|
||||
// TODO: node->ImplicitInputDefs() need to be added here for control flow nodes.
|
||||
for (auto* node : nodes) {
|
||||
onnxruntime::Node::ForEachWithIndex(node->InputDefs(), initialize_maps);
|
||||
onnxruntime::Node::ForEachWithIndex(node->OutputDefs(), initialize_maps);
|
||||
}
|
||||
|
||||
node_index_info_ = std::make_unique<NodeIndexInfo>(nodes, mlvalue_name_idx_map_);
|
||||
|
||||
// create kernels for these nodes
|
||||
for (auto* node : nodes) {
|
||||
std::unique_ptr<OpKernel> op_kernel;
|
||||
std::shared_ptr<KernelRegistry> kernel_registry = cpu_execution_provider_->GetKernelRegistry();
|
||||
auto status = kernel_registry->TryCreateKernel(*node,
|
||||
*cpu_execution_provider_,
|
||||
initializers_,
|
||||
mlvalue_name_idx_map_,
|
||||
FuncManager(),
|
||||
op_kernel);
|
||||
kernels_[node->Index()] = std::move(op_kernel);
|
||||
}
|
||||
}
|
||||
|
||||
const OpKernel* OptimizerExecutionFrame::Info::GetKernel(NodeIndex node_id) const {
|
||||
if (kernels_.find(node_id) == kernels_.cend()) {
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
return kernels_.at(node_id).get();
|
||||
}
|
||||
|
||||
// For optimizer, probably no need to pass feed_mlvalue_idxs, feeds to initialize IExecutionFrame.
|
||||
// If needed, the parameters of OptimizerExecutionFrame ctor can be changed later.
|
||||
OptimizerExecutionFrame::OptimizerExecutionFrame(const Info& info,
|
||||
const std::vector<int>& fetch_mlvalue_idxs)
|
||||
: IExecutionFrame(std::vector<int>(),
|
||||
std::vector<MLValue>(),
|
||||
info.GetInitializers(),
|
||||
fetch_mlvalue_idxs,
|
||||
std::vector<MLValue>(),
|
||||
info.GetMLValueNameIdxMap(),
|
||||
info.GetNodeIndexInfo()),
|
||||
info_(info) {
|
||||
}
|
||||
|
||||
OptimizerExecutionFrame::~OptimizerExecutionFrame() = default;
|
||||
|
||||
AllocatorPtr OptimizerExecutionFrame::GetAllocatorImpl(const OrtAllocatorInfo& info) const {
|
||||
return info_.GetAllocator(info);
|
||||
}
|
||||
|
||||
// This method is not thread safe!
|
||||
// Return S_OK and nullptr if index map to an value that is an unused optional input/output
|
||||
Status OptimizerExecutionFrame::CreateNodeOutputMLValueImpl(MLValue& mlvalue, int mlvalue_idx, const TensorShape* shape) {
|
||||
const DataTypeImpl* ml_type = utils::GetMLDataType(*(info_.GetMLValueIdxNodeArgMap().at(mlvalue_idx)));
|
||||
if (ml_type == nullptr)
|
||||
return Status(common::ONNXRUNTIME, common::INVALID_ARGUMENT,
|
||||
"Tried to allocate without valid type information, mlvalue index=" + std::to_string(mlvalue_idx));
|
||||
if (!ml_type->IsTensorType()) {
|
||||
const NonTensorTypeBase* non_tensor_type = static_cast<const NonTensorTypeBase*>(ml_type);
|
||||
auto creator = non_tensor_type->GetCreateFunc();
|
||||
mlvalue.Init(creator(),
|
||||
non_tensor_type,
|
||||
non_tensor_type->GetDeleteFunc());
|
||||
return Status::OK();
|
||||
}
|
||||
|
||||
// tensors
|
||||
auto element_type = static_cast<const TensorTypeBase*>(ml_type)->GetElementType();
|
||||
AllocatorPtr allocator_ptr = info_.GetAllocator();
|
||||
OrtAllocatorInfo allocator_info = allocator_ptr->Info();
|
||||
|
||||
int64_t len = shape->Size();
|
||||
if (len < 0) {
|
||||
return ORT_MAKE_STATUS(ONNXRUNTIME, INVALID_ARGUMENT, "Tensor shape cannot contain any negative value. Shape was:", *shape);
|
||||
}
|
||||
size_t size;
|
||||
if (!IAllocator::CalcMemSizeForArrayWithAlignment<64>(len, element_type->Size(), &size)) {
|
||||
return Status(common::ONNXRUNTIME, common::FAIL, "size overflow");
|
||||
}
|
||||
|
||||
void* buffer = size == 0 ? nullptr : allocator_ptr->Alloc(size);
|
||||
std::unique_ptr<Tensor> p_tensor = std::make_unique<Tensor>(element_type,
|
||||
*shape,
|
||||
buffer,
|
||||
allocator_info,
|
||||
allocator_ptr);
|
||||
|
||||
mlvalue.Init(p_tensor.release(),
|
||||
DataTypeImpl::GetType<Tensor>(),
|
||||
DataTypeImpl::GetType<Tensor>()->GetDeleteFunc());
|
||||
|
||||
return Status::OK();
|
||||
}
|
||||
|
||||
}
|
||||
73
onnxruntime/core/optimizer/optimizer_execution_frame.h
Normal file
73
onnxruntime/core/optimizer/optimizer_execution_frame.h
Normal file
|
|
@ -0,0 +1,73 @@
|
|||
// Copyright (c) Microsoft Corporation. All rights reserved.
|
||||
// Licensed under the MIT License.
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "core/graph/graph.h"
|
||||
#include "core/providers/cpu/cpu_execution_provider.h"
|
||||
#include "core/framework/execution_frame.h"
|
||||
#include "core/framework/mlvalue_name_idx_map.h"
|
||||
#include "core/framework/ml_value.h"
|
||||
|
||||
namespace onnxruntime {
|
||||
|
||||
class OptimizerExecutionFrame final : public IExecutionFrame {
|
||||
public:
|
||||
class Info {
|
||||
public:
|
||||
Info(const std::vector<const Node*>& nodes,
|
||||
const InitializedTensorSet& initialized_tensor_set);
|
||||
|
||||
AllocatorPtr GetAllocator(const OrtAllocatorInfo& info) const {
|
||||
return cpu_execution_provider_->GetAllocator(info.id, info.mem_type);
|
||||
}
|
||||
|
||||
AllocatorPtr GetAllocator() const {
|
||||
return allocator_ptr_;
|
||||
}
|
||||
|
||||
const MLValueNameIdxMap& GetMLValueNameIdxMap() const noexcept { return mlvalue_name_idx_map_; }
|
||||
const std::unordered_map<int, const NodeArg*>& GetMLValueIdxNodeArgMap() const noexcept { return mlvalue_idx_nodearg_map_; }
|
||||
const std::unordered_map<int, MLValue>& GetInitializers() const noexcept { return initializers_; }
|
||||
const NodeIndexInfo& GetNodeIndexInfo() const { return *node_index_info_; }
|
||||
int GetMLValueIndex(const std::string& name) const {
|
||||
int index = -1;
|
||||
if (mlvalue_name_idx_map_.GetIdx(name, index) == Status::OK()) {
|
||||
return index;
|
||||
}
|
||||
return -1;
|
||||
}
|
||||
|
||||
const OpKernel* GetKernel(NodeIndex node_id) const;
|
||||
|
||||
private:
|
||||
// The optimizer is running on CPU execution provider by default.
|
||||
std::unique_ptr<CPUExecutionProvider> cpu_execution_provider_;
|
||||
const int device_id_{0};
|
||||
const OrtMemType mem_type_{OrtMemTypeDefault};
|
||||
AllocatorPtr allocator_ptr_;
|
||||
|
||||
// MLValues for optimizer
|
||||
MLValueNameIdxMap mlvalue_name_idx_map_;
|
||||
std::unordered_map<int, const NodeArg*> mlvalue_idx_nodearg_map_;
|
||||
std::unordered_map<int, MLValue> initializers_;
|
||||
std::unique_ptr<NodeIndexInfo> node_index_info_;
|
||||
|
||||
std::unordered_map<onnxruntime::NodeIndex, std::unique_ptr<OpKernel>> kernels_;
|
||||
};
|
||||
|
||||
OptimizerExecutionFrame(const Info& info,
|
||||
const std::vector<int>& fetch_mlvalue_idxs);
|
||||
|
||||
~OptimizerExecutionFrame();
|
||||
|
||||
private:
|
||||
ORT_DISALLOW_COPY_ASSIGNMENT_AND_MOVE(OptimizerExecutionFrame);
|
||||
|
||||
AllocatorPtr GetAllocatorImpl(const OrtAllocatorInfo& info) const override;
|
||||
Status CreateNodeOutputMLValueImpl(MLValue& mlvalue, int mlvalue_idx, const TensorShape* shape) override;
|
||||
|
||||
const Info& info_;
|
||||
};
|
||||
|
||||
} // namespace onnxruntime
|
||||
94
onnxruntime/test/optimizer/optimizer_test.cc
Normal file
94
onnxruntime/test/optimizer/optimizer_test.cc
Normal file
|
|
@ -0,0 +1,94 @@
|
|||
// Copyright (c) Microsoft Corporation. All rights reserved.
|
||||
// Licensed under the MIT License.
|
||||
|
||||
#include "core/common/logging/logging.h"
|
||||
#include "core/graph/graph_viewer.h"
|
||||
#include "core/graph/model.h"
|
||||
#include "core/optimizer/optimizer_execution_frame.h"
|
||||
#include "core/optimizer/graph_transformer.h"
|
||||
#include "core/optimizer/graph_transformer_mgr.h"
|
||||
#include "core/framework/data_types.h"
|
||||
#include "core/framework/ml_value.h"
|
||||
#include "core/framework/op_kernel.h"
|
||||
#include "core/util/math.h"
|
||||
#include "core/platform/env.h"
|
||||
#include "test/framework/test_utils.h"
|
||||
#include "test/capturing_sink.h"
|
||||
#include "test/test_environment.h"
|
||||
#include "gtest/gtest.h"
|
||||
|
||||
using namespace std;
|
||||
using namespace ONNX_NAMESPACE;
|
||||
|
||||
using namespace onnx;
|
||||
|
||||
namespace onnxruntime {
|
||||
namespace test {
|
||||
|
||||
static const std::string MODEL_FOLDER = "testdata/transform/";
|
||||
|
||||
TEST(OptimizerTest, Basic) {
|
||||
Model model("OptimizerBasic");
|
||||
auto& graph = model.MainGraph();
|
||||
|
||||
const int tensor_dim = 10;
|
||||
const int input_num = 2;
|
||||
TensorProto initializer_tensor[input_num];
|
||||
std::vector<std::unique_ptr<NodeArg>> inputs(input_num);
|
||||
std::vector<std::unique_ptr<NodeArg>> outputs(1);
|
||||
InitializedTensorSet initialized_tensor_set;
|
||||
|
||||
TypeProto tensor_int32;
|
||||
tensor_int32.mutable_tensor_type()->set_elem_type(TensorProto_DataType_INT32);
|
||||
tensor_int32.mutable_tensor_type()->mutable_shape()->add_dim()->set_dim_value(tensor_dim);
|
||||
|
||||
for (int i = 0; i < input_num; i++) {
|
||||
string name("input_" + std::to_string(i));
|
||||
inputs[i] = std::make_unique<NodeArg>(name, &tensor_int32);
|
||||
|
||||
initializer_tensor[i].set_name(inputs[i]->Name());
|
||||
initializer_tensor[i].add_dims(tensor_dim);
|
||||
initializer_tensor[i].set_data_type(onnx::TensorProto_DataType_INT32);
|
||||
for (int j = 0; j < tensor_dim; j++) {
|
||||
initializer_tensor[i].add_int32_data((i + 1) * j);
|
||||
}
|
||||
initialized_tensor_set[name] = &initializer_tensor[i];
|
||||
}
|
||||
outputs[0] = std::make_unique<NodeArg>("out", &tensor_int32);
|
||||
|
||||
std::vector<NodeArg*> tmp_inputs{inputs[0].get(), inputs[1].get()};
|
||||
std::vector<NodeArg*> tmp_outputs{outputs[0].get()};
|
||||
graph.AddNode("a", "Add", "a", tmp_inputs, tmp_outputs);
|
||||
graph.Resolve();
|
||||
|
||||
std::vector<const Node*> nodes;
|
||||
for (auto& node : graph.Nodes()) {
|
||||
nodes.push_back(&node);
|
||||
}
|
||||
|
||||
OptimizerExecutionFrame::Info info(nodes, initialized_tensor_set);
|
||||
std::vector<int> fetch_mlvalue_idxs{info.GetMLValueIndex("out")};
|
||||
OptimizerExecutionFrame frame(info, fetch_mlvalue_idxs);
|
||||
const logging::Logger& logger = ::onnxruntime::test::DefaultLoggingManager().DefaultLogger();
|
||||
|
||||
for (auto& node : graph.Nodes()) {
|
||||
auto* kernel = info.GetKernel(node.Index());
|
||||
|
||||
OpKernelContext op_kernel_context(&frame, kernel, logger);
|
||||
|
||||
kernel->Compute(&op_kernel_context);
|
||||
|
||||
std::vector<MLValue> fetches;
|
||||
frame.GetOutputs(fetches);
|
||||
auto& tensor = fetches[0].Get<Tensor>();
|
||||
const std::vector<int32_t> found(tensor.template Data<int32_t>(), tensor.template Data<int32_t>() + tensor_dim);
|
||||
std::vector<int32_t> expected;
|
||||
for (int j = 0; j < tensor_dim; j++) {
|
||||
expected.push_back(3 * j);
|
||||
}
|
||||
ASSERT_EQ(expected, found);
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace test
|
||||
} // namespace onnxruntime
|
||||
Loading…
Reference in a new issue