diff --git a/cmake/onnxruntime_unittests.cmake b/cmake/onnxruntime_unittests.cmake index 8ac52f5ff3..ff010542c5 100644 --- a/cmake/onnxruntime_unittests.cmake +++ b/cmake/onnxruntime_unittests.cmake @@ -89,6 +89,11 @@ file(GLOB onnxruntime_test_ir_src "${TEST_SRC_DIR}/ir/*.h" ) +file(GLOB onnxruntime_test_optimizer_src + "${TEST_SRC_DIR}/optimizer/*.cc" + "${TEST_SRC_DIR}/optimizer/*.h" + ) + set(onnxruntime_test_framework_src_patterns "${TEST_SRC_DIR}/framework/*.cc" "${TEST_SRC_DIR}/framework/*.h" @@ -134,9 +139,16 @@ set(onnxruntime_test_ir_libs onnxruntime_common ) +set(onnxruntime_test_optimizer_libs + onnxruntime_test_utils + onnxruntime_framework + onnxruntime_util + onnxruntime_graph + onnxruntime_common +) + set(onnxruntime_test_framework_libs onnxruntime_test_utils_for_framework - onnxruntime_optimizer onnxruntime_framework onnxruntime_util onnxruntime_graph @@ -221,7 +233,7 @@ target_include_directories(onnxruntime_test_utils PUBLIC "${TEST_SRC_DIR}/util/i if (SingleUnitTestProject) - set(all_tests ${onnxruntime_test_common_src} ${onnxruntime_test_ir_src} ${onnxruntime_test_framework_src} ${onnxruntime_test_providers_src}) + set(all_tests ${onnxruntime_test_common_src} ${onnxruntime_test_ir_src} ${onnxruntime_test_optimizer_src} ${onnxruntime_test_framework_src} ${onnxruntime_test_providers_src}) set(all_dependencies ${onnxruntime_test_providers_dependencies} ) if (onnxruntime_USE_TVM) @@ -267,6 +279,13 @@ else() DEPENDS ${onnxruntime_EXTERNAL_DEPENDENCIES} ) + AddTest( + TARGET onnxruntime_test_optimizer + SOURCES ${onnxruntime_test_optimizer_src} + LIBS ${onnxruntime_test_optimizer_libs} + DEPENDS ${onnxruntime_EXTERNAL_DEPENDENCIES} + ) + AddTest( TARGET onnxruntime_test_framework SOURCES ${onnxruntime_test_framework_src} diff --git a/onnxruntime/core/framework/execution_frame.cc b/onnxruntime/core/framework/execution_frame.cc index bb98207aec..8d96a160e3 100644 --- a/onnxruntime/core/framework/execution_frame.cc +++ b/onnxruntime/core/framework/execution_frame.cc @@ -20,7 +20,7 @@ IExecutionFrame::IExecutionFrame(const std::vector& feed_mlvalue_idxs, const std::vector& feeds, const std::unordered_map& initializers, const std::vector& fetch_mlvalue_idxs, - std::vector& fetches, + const std::vector& fetches, const MLValueNameIdxMap& mlvalue_idx_map, const NodeIndexInfo& node_index_info) : node_index_info_{node_index_info}, fetch_mlvalue_idxs_{fetch_mlvalue_idxs} { @@ -163,7 +163,7 @@ bool IExecutionFrame::IsOutput(int mlvalue_idx) const { ExecutionFrame::ExecutionFrame(const std::vector& feed_mlvalue_idxs, const std::vector& feeds, const std::vector& fetch_mlvalue_idxs, - std::vector& fetches, + const std::vector& fetches, const std::unordered_map& fetch_allocators, const SessionState& session_state) : IExecutionFrame(feed_mlvalue_idxs, feeds, session_state.GetInitializedTensors(), fetch_mlvalue_idxs, fetches, diff --git a/onnxruntime/core/framework/execution_frame.h b/onnxruntime/core/framework/execution_frame.h index ca0e144260..3182274687 100644 --- a/onnxruntime/core/framework/execution_frame.h +++ b/onnxruntime/core/framework/execution_frame.h @@ -29,7 +29,7 @@ class IExecutionFrame { const std::vector& feeds, const std::unordered_map& initializers, const std::vector& fetch_mlvalue_idxs, - std::vector& fetches, + const std::vector& fetches, const MLValueNameIdxMap& mlvalue_idx_map, const NodeIndexInfo& node_index_info); @@ -103,7 +103,7 @@ class ExecutionFrame final : public IExecutionFrame { ExecutionFrame(const std::vector& feed_mlvalue_idxs, const std::vector& feeds, const std::vector& fetch_mlvalue_idxs, - std::vector& fetches, + const std::vector& fetches, // optional custom allocators. key is index in fetches const std::unordered_map& fetch_allocators, const SessionState& session_state); diff --git a/onnxruntime/core/optimizer/optimizer_execution_frame.cc b/onnxruntime/core/optimizer/optimizer_execution_frame.cc new file mode 100644 index 0000000000..f0289f8041 --- /dev/null +++ b/onnxruntime/core/optimizer/optimizer_execution_frame.cc @@ -0,0 +1,134 @@ + +#include "core/common/common.h" +#include "core/common/status.h" +#include "core/common/logging/logging.h" +#include "core/common/logging/macros.h" +#include "core/framework/tensorprotoutils.h" +#include "core/framework/data_types.h" +#include "core/framework/mldata_type_utils.h" +#include "core/framework/kernel_registry.h" +#include "core/framework/fuse_nodes_funcs.h" +#include "core/optimizer/optimizer_execution_frame.h" + +namespace onnxruntime { + +OptimizerExecutionFrame::Info::Info(const std::vector& nodes, + const InitializedTensorSet& initialized_tensor_set) { + // Create CPU execution provider + // For now, CPU execution provider will be created every time when initilizing Info. + // Later, it will be changed to pass by Info ctor. + cpu_execution_provider_ = std::make_unique(CPUExecutionProviderInfo()); + allocator_ptr_ = cpu_execution_provider_->GetAllocator(device_id_, mem_type_); + ORT_ENFORCE(allocator_ptr_ != nullptr, "Failed to get allocator for optimizer"); + + // Create MLValues related maps + auto initialize_maps = [this, &initialized_tensor_set](const NodeArg& arg, size_t /*index*/) -> Status { + int idx = mlvalue_name_idx_map_.Add(arg.Name()); + mlvalue_idx_nodearg_map_[idx] = &arg; + + // Only create MLValue instances for initializers used by an array of nodes. + InitializedTensorSet::const_iterator it = initialized_tensor_set.find(arg.Name()); + if (it != initialized_tensor_set.cend()) { + MLValue mlvalue; + utils::TensorProtoToMLValue(*(it->second), allocator_ptr_, nullptr, 0, mlvalue); + initializers_[idx] = mlvalue; + } + + return Status::OK(); + }; + + // TODO: node->ImplicitInputDefs() need to be added here for control flow nodes. + for (auto* node : nodes) { + onnxruntime::Node::ForEachWithIndex(node->InputDefs(), initialize_maps); + onnxruntime::Node::ForEachWithIndex(node->OutputDefs(), initialize_maps); + } + + node_index_info_ = std::make_unique(nodes, mlvalue_name_idx_map_); + + // create kernels for these nodes + for (auto* node : nodes) { + std::unique_ptr op_kernel; + std::shared_ptr kernel_registry = cpu_execution_provider_->GetKernelRegistry(); + auto status = kernel_registry->TryCreateKernel(*node, + *cpu_execution_provider_, + initializers_, + mlvalue_name_idx_map_, + FuncManager(), + op_kernel); + kernels_[node->Index()] = std::move(op_kernel); + } +} + +const OpKernel* OptimizerExecutionFrame::Info::GetKernel(NodeIndex node_id) const { + if (kernels_.find(node_id) == kernels_.cend()) { + return nullptr; + } + + return kernels_.at(node_id).get(); +} + +// For optimizer, probably no need to pass feed_mlvalue_idxs, feeds to initialize IExecutionFrame. +// If needed, the parameters of OptimizerExecutionFrame ctor can be changed later. +OptimizerExecutionFrame::OptimizerExecutionFrame(const Info& info, + const std::vector& fetch_mlvalue_idxs) + : IExecutionFrame(std::vector(), + std::vector(), + info.GetInitializers(), + fetch_mlvalue_idxs, + std::vector(), + info.GetMLValueNameIdxMap(), + info.GetNodeIndexInfo()), + info_(info) { +} + +OptimizerExecutionFrame::~OptimizerExecutionFrame() = default; + +AllocatorPtr OptimizerExecutionFrame::GetAllocatorImpl(const OrtAllocatorInfo& info) const { + return info_.GetAllocator(info); +} + +// This method is not thread safe! +// Return S_OK and nullptr if index map to an value that is an unused optional input/output +Status OptimizerExecutionFrame::CreateNodeOutputMLValueImpl(MLValue& mlvalue, int mlvalue_idx, const TensorShape* shape) { + const DataTypeImpl* ml_type = utils::GetMLDataType(*(info_.GetMLValueIdxNodeArgMap().at(mlvalue_idx))); + if (ml_type == nullptr) + return Status(common::ONNXRUNTIME, common::INVALID_ARGUMENT, + "Tried to allocate without valid type information, mlvalue index=" + std::to_string(mlvalue_idx)); + if (!ml_type->IsTensorType()) { + const NonTensorTypeBase* non_tensor_type = static_cast(ml_type); + auto creator = non_tensor_type->GetCreateFunc(); + mlvalue.Init(creator(), + non_tensor_type, + non_tensor_type->GetDeleteFunc()); + return Status::OK(); + } + + // tensors + auto element_type = static_cast(ml_type)->GetElementType(); + AllocatorPtr allocator_ptr = info_.GetAllocator(); + OrtAllocatorInfo allocator_info = allocator_ptr->Info(); + + int64_t len = shape->Size(); + if (len < 0) { + return ORT_MAKE_STATUS(ONNXRUNTIME, INVALID_ARGUMENT, "Tensor shape cannot contain any negative value. Shape was:", *shape); + } + size_t size; + if (!IAllocator::CalcMemSizeForArrayWithAlignment<64>(len, element_type->Size(), &size)) { + return Status(common::ONNXRUNTIME, common::FAIL, "size overflow"); + } + + void* buffer = size == 0 ? nullptr : allocator_ptr->Alloc(size); + std::unique_ptr p_tensor = std::make_unique(element_type, + *shape, + buffer, + allocator_info, + allocator_ptr); + + mlvalue.Init(p_tensor.release(), + DataTypeImpl::GetType(), + DataTypeImpl::GetType()->GetDeleteFunc()); + + return Status::OK(); +} + +} \ No newline at end of file diff --git a/onnxruntime/core/optimizer/optimizer_execution_frame.h b/onnxruntime/core/optimizer/optimizer_execution_frame.h new file mode 100644 index 0000000000..63b5a43e81 --- /dev/null +++ b/onnxruntime/core/optimizer/optimizer_execution_frame.h @@ -0,0 +1,73 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +#pragma once + +#include "core/graph/graph.h" +#include "core/providers/cpu/cpu_execution_provider.h" +#include "core/framework/execution_frame.h" +#include "core/framework/mlvalue_name_idx_map.h" +#include "core/framework/ml_value.h" + +namespace onnxruntime { + +class OptimizerExecutionFrame final : public IExecutionFrame { + public: + class Info { + public: + Info(const std::vector& nodes, + const InitializedTensorSet& initialized_tensor_set); + + AllocatorPtr GetAllocator(const OrtAllocatorInfo& info) const { + return cpu_execution_provider_->GetAllocator(info.id, info.mem_type); + } + + AllocatorPtr GetAllocator() const { + return allocator_ptr_; + } + + const MLValueNameIdxMap& GetMLValueNameIdxMap() const noexcept { return mlvalue_name_idx_map_; } + const std::unordered_map& GetMLValueIdxNodeArgMap() const noexcept { return mlvalue_idx_nodearg_map_; } + const std::unordered_map& GetInitializers() const noexcept { return initializers_; } + const NodeIndexInfo& GetNodeIndexInfo() const { return *node_index_info_; } + int GetMLValueIndex(const std::string& name) const { + int index = -1; + if (mlvalue_name_idx_map_.GetIdx(name, index) == Status::OK()) { + return index; + } + return -1; + } + + const OpKernel* GetKernel(NodeIndex node_id) const; + + private: + // The optimizer is running on CPU execution provider by default. + std::unique_ptr cpu_execution_provider_; + const int device_id_{0}; + const OrtMemType mem_type_{OrtMemTypeDefault}; + AllocatorPtr allocator_ptr_; + + // MLValues for optimizer + MLValueNameIdxMap mlvalue_name_idx_map_; + std::unordered_map mlvalue_idx_nodearg_map_; + std::unordered_map initializers_; + std::unique_ptr node_index_info_; + + std::unordered_map> kernels_; + }; + + OptimizerExecutionFrame(const Info& info, + const std::vector& fetch_mlvalue_idxs); + + ~OptimizerExecutionFrame(); + + private: + ORT_DISALLOW_COPY_ASSIGNMENT_AND_MOVE(OptimizerExecutionFrame); + + AllocatorPtr GetAllocatorImpl(const OrtAllocatorInfo& info) const override; + Status CreateNodeOutputMLValueImpl(MLValue& mlvalue, int mlvalue_idx, const TensorShape* shape) override; + + const Info& info_; +}; + +} // namespace onnxruntime \ No newline at end of file diff --git a/onnxruntime/test/ir/graph_transform_test.cc b/onnxruntime/test/optimizer/graph_transform_test.cc similarity index 100% rename from onnxruntime/test/ir/graph_transform_test.cc rename to onnxruntime/test/optimizer/graph_transform_test.cc diff --git a/onnxruntime/test/optimizer/optimizer_test.cc b/onnxruntime/test/optimizer/optimizer_test.cc new file mode 100644 index 0000000000..268b908bb9 --- /dev/null +++ b/onnxruntime/test/optimizer/optimizer_test.cc @@ -0,0 +1,94 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +#include "core/common/logging/logging.h" +#include "core/graph/graph_viewer.h" +#include "core/graph/model.h" +#include "core/optimizer/optimizer_execution_frame.h" +#include "core/optimizer/graph_transformer.h" +#include "core/optimizer/graph_transformer_mgr.h" +#include "core/framework/data_types.h" +#include "core/framework/ml_value.h" +#include "core/framework/op_kernel.h" +#include "core/util/math.h" +#include "core/platform/env.h" +#include "test/framework/test_utils.h" +#include "test/capturing_sink.h" +#include "test/test_environment.h" +#include "gtest/gtest.h" + +using namespace std; +using namespace ONNX_NAMESPACE; + +using namespace onnx; + +namespace onnxruntime { +namespace test { + +static const std::string MODEL_FOLDER = "testdata/transform/"; + +TEST(OptimizerTest, Basic) { + Model model("OptimizerBasic"); + auto& graph = model.MainGraph(); + + const int tensor_dim = 10; + const int input_num = 2; + TensorProto initializer_tensor[input_num]; + std::vector> inputs(input_num); + std::vector> outputs(1); + InitializedTensorSet initialized_tensor_set; + + TypeProto tensor_int32; + tensor_int32.mutable_tensor_type()->set_elem_type(TensorProto_DataType_INT32); + tensor_int32.mutable_tensor_type()->mutable_shape()->add_dim()->set_dim_value(tensor_dim); + + for (int i = 0; i < input_num; i++) { + string name("input_" + std::to_string(i)); + inputs[i] = std::make_unique(name, &tensor_int32); + + initializer_tensor[i].set_name(inputs[i]->Name()); + initializer_tensor[i].add_dims(tensor_dim); + initializer_tensor[i].set_data_type(onnx::TensorProto_DataType_INT32); + for (int j = 0; j < tensor_dim; j++) { + initializer_tensor[i].add_int32_data((i + 1) * j); + } + initialized_tensor_set[name] = &initializer_tensor[i]; + } + outputs[0] = std::make_unique("out", &tensor_int32); + + std::vector tmp_inputs{inputs[0].get(), inputs[1].get()}; + std::vector tmp_outputs{outputs[0].get()}; + graph.AddNode("a", "Add", "a", tmp_inputs, tmp_outputs); + graph.Resolve(); + + std::vector nodes; + for (auto& node : graph.Nodes()) { + nodes.push_back(&node); + } + + OptimizerExecutionFrame::Info info(nodes, initialized_tensor_set); + std::vector fetch_mlvalue_idxs{info.GetMLValueIndex("out")}; + OptimizerExecutionFrame frame(info, fetch_mlvalue_idxs); + const logging::Logger& logger = ::onnxruntime::test::DefaultLoggingManager().DefaultLogger(); + + for (auto& node : graph.Nodes()) { + auto* kernel = info.GetKernel(node.Index()); + + OpKernelContext op_kernel_context(&frame, kernel, logger); + + kernel->Compute(&op_kernel_context); + + std::vector fetches; + frame.GetOutputs(fetches); + auto& tensor = fetches[0].Get(); + const std::vector found(tensor.template Data(), tensor.template Data() + tensor_dim); + std::vector expected; + for (int j = 0; j < tensor_dim; j++) { + expected.push_back(3 * j); + } + ASSERT_EQ(expected, found); + } +} + +} // namespace test +} // namespace onnxruntime