Create OptimizerExecutionFrame for graph optimization (#526)

* Create OptimizerExecutionFrame for optimizer With this change, optimizer can easily invoke CPU kernels for graph optimization.
2026-07-03 03:58:54 +00:00 · 2019-03-04 10:56:41 -08:00 · 2019-03-04 10:56:41 -08:00 · 8a59287c46
commit 8a59287c46
parent 290c472839
7 changed files with 326 additions and 6 deletions
--- a/cmake/onnxruntime_unittests.cmake
+++ b/cmake/onnxruntime_unittests.cmake
@ -89,6 +89,11 @@ file(GLOB onnxruntime_test_ir_src
  "${TEST_SRC_DIR}/ir/*.h"
  )

+file(GLOB onnxruntime_test_optimizer_src
+  "${TEST_SRC_DIR}/optimizer/*.cc"
+  "${TEST_SRC_DIR}/optimizer/*.h"  
+  )
+
 set(onnxruntime_test_framework_src_patterns
  "${TEST_SRC_DIR}/framework/*.cc"
  "${TEST_SRC_DIR}/framework/*.h"
@ -134,9 +139,16 @@ set(onnxruntime_test_ir_libs
  onnxruntime_common
 )

+set(onnxruntime_test_optimizer_libs
+  onnxruntime_test_utils
+  onnxruntime_framework
+  onnxruntime_util
+  onnxruntime_graph
+  onnxruntime_common
+)
+
 set(onnxruntime_test_framework_libs
  onnxruntime_test_utils_for_framework
-  onnxruntime_optimizer
  onnxruntime_framework
  onnxruntime_util
  onnxruntime_graph
@ -221,7 +233,7 @@ target_include_directories(onnxruntime_test_utils PUBLIC "${TEST_SRC_DIR}/util/i


 if (SingleUnitTestProject)
-  set(all_tests ${onnxruntime_test_common_src} ${onnxruntime_test_ir_src} ${onnxruntime_test_framework_src} ${onnxruntime_test_providers_src})
+  set(all_tests ${onnxruntime_test_common_src} ${onnxruntime_test_ir_src} ${onnxruntime_test_optimizer_src} ${onnxruntime_test_framework_src} ${onnxruntime_test_providers_src})
  set(all_dependencies ${onnxruntime_test_providers_dependencies} )

  if (onnxruntime_USE_TVM)
@ -267,6 +279,13 @@ else()
    DEPENDS ${onnxruntime_EXTERNAL_DEPENDENCIES}
  )

+  AddTest(
+    TARGET onnxruntime_test_optimizer
+    SOURCES ${onnxruntime_test_optimizer_src}
+    LIBS ${onnxruntime_test_optimizer_libs}
+    DEPENDS ${onnxruntime_EXTERNAL_DEPENDENCIES}
+  )
+
  AddTest(
    TARGET onnxruntime_test_framework
    SOURCES ${onnxruntime_test_framework_src}
--- a/onnxruntime/core/framework/execution_frame.cc
+++ b/onnxruntime/core/framework/execution_frame.cc
@ -20,7 +20,7 @@ IExecutionFrame::IExecutionFrame(const std::vector<int>& feed_mlvalue_idxs,
                                 const std::vector<MLValue>& feeds,
                                 const std::unordered_map<int, MLValue>& initializers,
                                 const std::vector<int>& fetch_mlvalue_idxs,
-                                 std::vector<MLValue>& fetches,
+                                 const std::vector<MLValue>& fetches,
                                 const MLValueNameIdxMap& mlvalue_idx_map,
                                 const NodeIndexInfo& node_index_info)
    : node_index_info_{node_index_info}, fetch_mlvalue_idxs_{fetch_mlvalue_idxs} {
@ -163,7 +163,7 @@ bool IExecutionFrame::IsOutput(int mlvalue_idx) const {
 ExecutionFrame::ExecutionFrame(const std::vector<int>& feed_mlvalue_idxs,
                               const std::vector<MLValue>& feeds,
                               const std::vector<int>& fetch_mlvalue_idxs,
-                               std::vector<MLValue>& fetches,
+                               const std::vector<MLValue>& fetches,
                               const std::unordered_map<size_t, IExecutor::CustomAllocator>& fetch_allocators,
                               const SessionState& session_state)
    : IExecutionFrame(feed_mlvalue_idxs, feeds, session_state.GetInitializedTensors(), fetch_mlvalue_idxs, fetches,
--- a/onnxruntime/core/framework/execution_frame.h
+++ b/onnxruntime/core/framework/execution_frame.h
@ -29,7 +29,7 @@ class IExecutionFrame {
                  const std::vector<MLValue>& feeds,
                  const std::unordered_map<int, MLValue>& initializers,
                  const std::vector<int>& fetch_mlvalue_idxs,
-                  std::vector<MLValue>& fetches,
+                  const std::vector<MLValue>& fetches,
                  const MLValueNameIdxMap& mlvalue_idx_map,
                  const NodeIndexInfo& node_index_info);

@ -103,7 +103,7 @@ class ExecutionFrame final : public IExecutionFrame {
  ExecutionFrame(const std::vector<int>& feed_mlvalue_idxs,
                 const std::vector<MLValue>& feeds,
                 const std::vector<int>& fetch_mlvalue_idxs,
-                 std::vector<MLValue>& fetches,
+                 const std::vector<MLValue>& fetches,
                 // optional custom allocators. key is index in fetches
                 const std::unordered_map<size_t, IExecutor::CustomAllocator>& fetch_allocators,
                 const SessionState& session_state);
--- a/onnxruntime/core/optimizer/optimizer_execution_frame.cc
+++ b/onnxruntime/core/optimizer/optimizer_execution_frame.cc
@ -0,0 +1,134 @@
+
+#include "core/common/common.h"
+#include "core/common/status.h"
+#include "core/common/logging/logging.h"
+#include "core/common/logging/macros.h"
+#include "core/framework/tensorprotoutils.h"
+#include "core/framework/data_types.h"
+#include "core/framework/mldata_type_utils.h"
+#include "core/framework/kernel_registry.h"
+#include "core/framework/fuse_nodes_funcs.h"
+#include "core/optimizer/optimizer_execution_frame.h"
+
+namespace onnxruntime {
+
+OptimizerExecutionFrame::Info::Info(const std::vector<const Node*>& nodes,
+                                    const InitializedTensorSet& initialized_tensor_set) {
+  // Create CPU execution provider
+  // For now, CPU execution provider will be created every time when initilizing Info.
+  // Later, it will be changed to pass by Info ctor.
+  cpu_execution_provider_ = std::make_unique<CPUExecutionProvider>(CPUExecutionProviderInfo());
+  allocator_ptr_ = cpu_execution_provider_->GetAllocator(device_id_, mem_type_);
+  ORT_ENFORCE(allocator_ptr_ != nullptr, "Failed to get allocator for optimizer");
+
+  // Create MLValues related maps
+  auto initialize_maps = [this, &initialized_tensor_set](const NodeArg& arg, size_t /*index*/) -> Status {
+    int idx = mlvalue_name_idx_map_.Add(arg.Name());
+    mlvalue_idx_nodearg_map_[idx] = &arg;
+
+    // Only create MLValue instances for initializers used by an array of nodes.
+    InitializedTensorSet::const_iterator it = initialized_tensor_set.find(arg.Name());
+    if (it != initialized_tensor_set.cend()) {
+      MLValue mlvalue;
+      utils::TensorProtoToMLValue(*(it->second), allocator_ptr_, nullptr, 0, mlvalue);
+      initializers_[idx] = mlvalue;
+    }
+
+    return Status::OK();
+  };
+
+  // TODO: node->ImplicitInputDefs() need to be added here for control flow nodes.
+  for (auto* node : nodes) {
+    onnxruntime::Node::ForEachWithIndex(node->InputDefs(), initialize_maps);
+    onnxruntime::Node::ForEachWithIndex(node->OutputDefs(), initialize_maps);
+  }
+
+  node_index_info_ = std::make_unique<NodeIndexInfo>(nodes, mlvalue_name_idx_map_);
+
+  // create kernels for these nodes
+  for (auto* node : nodes) {
+    std::unique_ptr<OpKernel> op_kernel;
+    std::shared_ptr<KernelRegistry> kernel_registry = cpu_execution_provider_->GetKernelRegistry();
+    auto status = kernel_registry->TryCreateKernel(*node,
+                                                   *cpu_execution_provider_,
+                                                   initializers_,
+                                                   mlvalue_name_idx_map_,
+                                                   FuncManager(),
+                                                   op_kernel);
+    kernels_[node->Index()] = std::move(op_kernel);
+  }
+}
+
+const OpKernel* OptimizerExecutionFrame::Info::GetKernel(NodeIndex node_id) const {
+  if (kernels_.find(node_id) == kernels_.cend()) {
+    return nullptr;
+  }
+
+  return kernels_.at(node_id).get();
+}
+
+// For optimizer, probably no need to pass feed_mlvalue_idxs, feeds to initialize IExecutionFrame.
+// If needed, the parameters of OptimizerExecutionFrame ctor can be changed later.
+OptimizerExecutionFrame::OptimizerExecutionFrame(const Info& info,
+                                                 const std::vector<int>& fetch_mlvalue_idxs)
+    : IExecutionFrame(std::vector<int>(),
+                      std::vector<MLValue>(),
+                      info.GetInitializers(),
+                      fetch_mlvalue_idxs,
+                      std::vector<MLValue>(),
+                      info.GetMLValueNameIdxMap(),
+                      info.GetNodeIndexInfo()),
+                      info_(info) {
+}
+
+OptimizerExecutionFrame::~OptimizerExecutionFrame() = default;
+
+AllocatorPtr OptimizerExecutionFrame::GetAllocatorImpl(const OrtAllocatorInfo& info) const {
+  return info_.GetAllocator(info);
+}
+
+// This method is not thread safe!
+// Return S_OK and nullptr if index map to an value that is an unused optional input/output
+Status OptimizerExecutionFrame::CreateNodeOutputMLValueImpl(MLValue& mlvalue, int mlvalue_idx, const TensorShape* shape) {
+  const DataTypeImpl* ml_type = utils::GetMLDataType(*(info_.GetMLValueIdxNodeArgMap().at(mlvalue_idx)));
+  if (ml_type == nullptr)
+    return Status(common::ONNXRUNTIME, common::INVALID_ARGUMENT,
+                  "Tried to allocate without valid type information, mlvalue index=" + std::to_string(mlvalue_idx));
+  if (!ml_type->IsTensorType()) {
+    const NonTensorTypeBase* non_tensor_type = static_cast<const NonTensorTypeBase*>(ml_type);
+    auto creator = non_tensor_type->GetCreateFunc();
+    mlvalue.Init(creator(),
+                 non_tensor_type,
+                 non_tensor_type->GetDeleteFunc());
+    return Status::OK();
+  }
+
+  // tensors
+  auto element_type = static_cast<const TensorTypeBase*>(ml_type)->GetElementType();
+  AllocatorPtr allocator_ptr = info_.GetAllocator();
+  OrtAllocatorInfo allocator_info = allocator_ptr->Info();
+
+  int64_t len = shape->Size();
+  if (len < 0) {
+    return ORT_MAKE_STATUS(ONNXRUNTIME, INVALID_ARGUMENT, "Tensor shape cannot contain any negative value. Shape was:", *shape);
+  }
+  size_t size;
+  if (!IAllocator::CalcMemSizeForArrayWithAlignment<64>(len, element_type->Size(), &size)) {
+    return Status(common::ONNXRUNTIME, common::FAIL, "size overflow");
+  }
+  
+  void* buffer = size == 0 ? nullptr : allocator_ptr->Alloc(size);
+  std::unique_ptr<Tensor> p_tensor = std::make_unique<Tensor>(element_type,
+                                                              *shape,
+                                                              buffer,
+                                                              allocator_info,
+                                                              allocator_ptr);
+
+  mlvalue.Init(p_tensor.release(),
+               DataTypeImpl::GetType<Tensor>(),
+               DataTypeImpl::GetType<Tensor>()->GetDeleteFunc());
+
+  return Status::OK();
+}
+
+}
--- a/onnxruntime/core/optimizer/optimizer_execution_frame.h
+++ b/onnxruntime/core/optimizer/optimizer_execution_frame.h
@ -0,0 +1,73 @@
+// Copyright (c) Microsoft Corporation. All rights reserved.
+// Licensed under the MIT License.
+
+#pragma once
+
+#include "core/graph/graph.h"
+#include "core/providers/cpu/cpu_execution_provider.h"
+#include "core/framework/execution_frame.h"
+#include "core/framework/mlvalue_name_idx_map.h"
+#include "core/framework/ml_value.h"
+
+namespace onnxruntime {
+
+class OptimizerExecutionFrame final : public IExecutionFrame {
+ public:
+  class Info {
+   public:
+    Info(const std::vector<const Node*>& nodes,
+         const InitializedTensorSet& initialized_tensor_set);
+
+    AllocatorPtr GetAllocator(const OrtAllocatorInfo& info) const {
+      return cpu_execution_provider_->GetAllocator(info.id, info.mem_type);
+    }
+
+    AllocatorPtr GetAllocator() const {
+      return allocator_ptr_;
+    }
+
+    const MLValueNameIdxMap& GetMLValueNameIdxMap() const noexcept { return mlvalue_name_idx_map_; }
+    const std::unordered_map<int, const NodeArg*>& GetMLValueIdxNodeArgMap() const noexcept { return mlvalue_idx_nodearg_map_; }
+    const std::unordered_map<int, MLValue>& GetInitializers() const noexcept { return initializers_; }
+    const NodeIndexInfo& GetNodeIndexInfo() const { return *node_index_info_; }
+    int GetMLValueIndex(const std::string& name) const {
+      int index = -1;
+      if (mlvalue_name_idx_map_.GetIdx(name, index) == Status::OK()) {
+        return index;
+      }
+      return -1;
+    }
+
+    const OpKernel* GetKernel(NodeIndex node_id) const;
+
+   private:
+    // The optimizer is running on CPU execution provider by default.
+    std::unique_ptr<CPUExecutionProvider> cpu_execution_provider_;
+    const int device_id_{0};
+    const OrtMemType mem_type_{OrtMemTypeDefault};
+    AllocatorPtr allocator_ptr_;
+
+    // MLValues for optimizer
+    MLValueNameIdxMap mlvalue_name_idx_map_;
+    std::unordered_map<int, const NodeArg*> mlvalue_idx_nodearg_map_;
+    std::unordered_map<int, MLValue> initializers_;
+    std::unique_ptr<NodeIndexInfo> node_index_info_;
+
+    std::unordered_map<onnxruntime::NodeIndex, std::unique_ptr<OpKernel>> kernels_;
+  };
+
+  OptimizerExecutionFrame(const Info& info,
+                          const std::vector<int>& fetch_mlvalue_idxs);
+
+  ~OptimizerExecutionFrame();
+
+ private:
+  ORT_DISALLOW_COPY_ASSIGNMENT_AND_MOVE(OptimizerExecutionFrame);
+
+  AllocatorPtr GetAllocatorImpl(const OrtAllocatorInfo& info) const override;
+  Status CreateNodeOutputMLValueImpl(MLValue& mlvalue, int mlvalue_idx, const TensorShape* shape) override;
+
+  const Info& info_;
+};
+
+}  // namespace onnxruntime
--- a/onnxruntime/test/optimizer/graph_transform_test.cc
+++ b/onnxruntime/test/optimizer/graph_transform_test.cc
--- a/onnxruntime/test/optimizer/optimizer_test.cc
+++ b/onnxruntime/test/optimizer/optimizer_test.cc
@ -0,0 +1,94 @@
+// Copyright (c) Microsoft Corporation. All rights reserved.
+// Licensed under the MIT License.
+
+#include "core/common/logging/logging.h"
+#include "core/graph/graph_viewer.h"
+#include "core/graph/model.h"
+#include "core/optimizer/optimizer_execution_frame.h"
+#include "core/optimizer/graph_transformer.h"
+#include "core/optimizer/graph_transformer_mgr.h"
+#include "core/framework/data_types.h"
+#include "core/framework/ml_value.h"
+#include "core/framework/op_kernel.h"
+#include "core/util/math.h"
+#include "core/platform/env.h"
+#include "test/framework/test_utils.h"
+#include "test/capturing_sink.h"
+#include "test/test_environment.h"
+#include "gtest/gtest.h"
+
+using namespace std;
+using namespace ONNX_NAMESPACE;
+
+using namespace onnx;
+
+namespace onnxruntime {
+namespace test {
+
+static const std::string MODEL_FOLDER = "testdata/transform/";
+
+TEST(OptimizerTest, Basic) {
+  Model model("OptimizerBasic");
+  auto& graph = model.MainGraph();
+
+  const int tensor_dim = 10;
+  const int input_num = 2;
+  TensorProto initializer_tensor[input_num];
+  std::vector<std::unique_ptr<NodeArg>> inputs(input_num);
+  std::vector<std::unique_ptr<NodeArg>> outputs(1);
+  InitializedTensorSet initialized_tensor_set;
+
+  TypeProto tensor_int32;
+  tensor_int32.mutable_tensor_type()->set_elem_type(TensorProto_DataType_INT32);
+  tensor_int32.mutable_tensor_type()->mutable_shape()->add_dim()->set_dim_value(tensor_dim);
+
+  for (int i = 0; i < input_num; i++) {
+    string name("input_" + std::to_string(i));
+    inputs[i] = std::make_unique<NodeArg>(name, &tensor_int32);
+
+    initializer_tensor[i].set_name(inputs[i]->Name());
+    initializer_tensor[i].add_dims(tensor_dim);
+    initializer_tensor[i].set_data_type(onnx::TensorProto_DataType_INT32);
+    for (int j = 0; j < tensor_dim; j++) {
+      initializer_tensor[i].add_int32_data((i + 1) * j);
+    }
+    initialized_tensor_set[name] = &initializer_tensor[i];
+  }
+  outputs[0] = std::make_unique<NodeArg>("out", &tensor_int32);
+
+  std::vector<NodeArg*> tmp_inputs{inputs[0].get(), inputs[1].get()};
+  std::vector<NodeArg*> tmp_outputs{outputs[0].get()};
+  graph.AddNode("a", "Add", "a", tmp_inputs, tmp_outputs);
+  graph.Resolve();
+
+  std::vector<const Node*> nodes;
+  for (auto& node : graph.Nodes()) {
+    nodes.push_back(&node);
+  }
+
+  OptimizerExecutionFrame::Info info(nodes, initialized_tensor_set);
+  std::vector<int> fetch_mlvalue_idxs{info.GetMLValueIndex("out")};
+  OptimizerExecutionFrame frame(info, fetch_mlvalue_idxs);
+  const logging::Logger& logger = ::onnxruntime::test::DefaultLoggingManager().DefaultLogger();
+
+  for (auto& node : graph.Nodes()) {
+    auto* kernel = info.GetKernel(node.Index());
+
+    OpKernelContext op_kernel_context(&frame, kernel, logger);
+
+    kernel->Compute(&op_kernel_context);
+
+    std::vector<MLValue> fetches;
+    frame.GetOutputs(fetches);
+    auto& tensor = fetches[0].Get<Tensor>();
+    const std::vector<int32_t> found(tensor.template Data<int32_t>(), tensor.template Data<int32_t>() + tensor_dim);
+    std::vector<int32_t> expected;
+    for (int j = 0; j < tensor_dim; j++) {
+      expected.push_back(3 * j);
+    }
+    ASSERT_EQ(expected, found);
+  }
+}
+
+}  // namespace test
+}  // namespace onnxruntime