Fix reuse logic in allocation planner. (#2393)

* Fix reuse logic in allocation planner. * PR comments * Add helpful comments * Don't allow reuse across string tensors.
2026-06-28 03:20:58 +00:00 · 2019-11-13 22:51:12 -08:00 · 2019-11-13 22:51:12 -08:00 · 7e164eaa6a
commit 7e164eaa6a
parent b90d55b7ea
3 changed files with 133 additions and 14 deletions
--- a/onnxruntime/core/framework/allocation_planner.cc
+++ b/onnxruntime/core/framework/allocation_planner.cc
@ -258,7 +258,7 @@ class PlannerImpl {
      const auto& val1 = shape1.dim(i);
      const auto& val2 = shape2.dim(i);
      if (utils::HasDimValue(val1) && utils::HasDimValue(val2) &&
-         (val1.dim_value() == val2.dim_value()))
+          (val1.dim_value() == val2.dim_value()))
        continue;  // same known dimension
      if (utils::HasDimParam(val1) && utils::HasDimParam(val2)) {
        const auto& val1_param = val1.dim_param();
@ -281,9 +281,20 @@ class PlannerImpl {
    return elt_type->Size();
  }

-  static bool SameSize(const TensorShapeProto& shape1, const DataType& ptype1, const TensorShapeProto& shape2,
-                       const DataType& ptype2) {
-    return (GetElementSize(ptype1) == GetElementSize(ptype2)) && SameShape(shape1, shape2);
+  static bool SameSize(const TensorShapeProto& shape1, const onnxruntime::NodeArg& arg1, const TensorShapeProto& shape2, const onnxruntime::NodeArg& arg2) {
+    const auto& ptype1 = arg1.Type();
+    const auto& ptype2 = arg2.Type();
+    auto type1_size = GetElementSize(ptype1);
+    auto type2_size = GetElementSize(ptype2);
+    bool is_type1_string = arg1.TypeAsProto()->tensor_type().elem_type() == ONNX_NAMESPACE::TensorProto_DataType_STRING;
+    bool is_type2_string = arg2.TypeAsProto()->tensor_type().elem_type() == ONNX_NAMESPACE::TensorProto_DataType_STRING;
+
+    // sizeof(std::string) = sizeof(double) on gcc 4.8.x on CentOS. This causes the allocation planner to reuse
+    // a tensor of type double. This won't work for string tensors since they need to be placement new'ed.
+    // If either of the tensors is a string, don't treat them the same. Moreover, reusing a string tensor for a string
+    // tensor without releasing the previous memory can cause memory leaks; hence we don't allow reuse across string
+    // tensors as well.
+    return !(is_type1_string || is_type2_string) && (type1_size == type2_size) && SameShape(shape1, shape2);

    /* TODO: we can improve this if the concrete shapes are known for both as below.
       Unclear whether this is worthwhile though.
@ -305,14 +316,13 @@ class PlannerImpl {
    auto p_shape2 = context_.GetShape(arg2);
    // If the shapes are unknown, we conservatively assume they may be of different size.
    if ((nullptr == p_shape1) || (nullptr == p_shape2)) return false;
-    return SameSize(*p_shape1, arg1.Type(), *p_shape2, arg2.Type());
+    return SameSize(*p_shape1, arg1, *p_shape2, arg2);
  }

  // Find if freelist contains a buffer of the same size as output_arg
  bool FindReusableTensor(const onnxruntime::NodeArg& output_arg, OrtValueIndex* reusable_tensor) {
    auto p_required_buffer_shape = context_.GetShape(output_arg);
    if (nullptr == p_required_buffer_shape) return false;
-    auto required_buffer_type = output_arg.Type();
    auto& required_memory_info = AllocPlan(output_arg.Name()).location;

    for (auto it = freelist_.begin(); it != freelist_.end(); ++it) {
@ -322,9 +332,8 @@ class PlannerImpl {
      if (!(available_memory_info == required_memory_info)) continue;
      auto p_available_buffer_shape = context_.GetShape(*p_node_arg);
      if (nullptr != p_available_buffer_shape) {
-        auto available_buffer_type = p_node_arg->Type();
-        if (SameSize(*p_available_buffer_shape, available_buffer_type,
-                     *p_required_buffer_shape, required_buffer_type)) {
+        if (SameSize(*p_available_buffer_shape, *p_node_arg,
+                     *p_required_buffer_shape, output_arg)) {
          *reusable_tensor = it->ml_value;
          freelist_.erase(it);
          return true;
--- a/onnxruntime/test/framework/inference_session_test.cc
+++ b/onnxruntime/test/framework/inference_session_test.cc
@ -172,15 +172,21 @@ static void CreateMatMulModel(std::unique_ptr<onnxruntime::Model>& p_model, Prov
  ASSERT_TRUE(status.IsOK()) << status.ErrorMessage();
 }

+template <typename T = float>
+void VerifyOutputs(const Tensor& tensor, const std::vector<int64_t>& expected_dims,
+                   const std::vector<T>& expected_values) {
+  TensorShape expected_shape(expected_dims);
+  ASSERT_EQ(expected_shape, tensor.Shape());
+  const std::vector<T> found(tensor.template Data<T>(),
+                             tensor.template Data<T>() + expected_values.size());
+  ASSERT_EQ(expected_values, found);
+}
+
 void VerifyOutputs(const std::vector<OrtValue>& fetches, const std::vector<int64_t>& expected_dims,
                   const std::vector<float>& expected_values) {
  ASSERT_EQ(1, fetches.size());
  auto& rtensor = fetches.front().Get<Tensor>();
-  TensorShape expected_shape(expected_dims);
-  ASSERT_EQ(expected_shape, rtensor.Shape());
-  const std::vector<float> found(rtensor.template Data<float>(),
-                                 rtensor.template Data<float>() + expected_values.size());
-  ASSERT_EQ(expected_values, found);
+  VerifyOutputs(rtensor, expected_dims, expected_values);
 }

 void RunModel(InferenceSession& session_object,
@ -1536,5 +1542,60 @@ TEST(InferenceSessionTests, TestParallelExecutionWithCudaProvider) {

 #endif

+// The model being tested here triggers a case where the allocation planner (AP) tries to reuse a tensor of type
+// double for a string tensor. The reuse logic of AP works correctly on Windows and Ubuntu 16.x
+// since there the sizeof(double) != sizeof(std::string). However, on CentOS (gcc 4.8.x), the 2 sizes are equal.
+TEST(InferenceSessionTests, ModelThatTriggersAllocationPlannerToReuseDoubleTensorForStringTensor) {
+  SessionOptions so;
+
+  so.session_logid = "InferenceSessionTests.ModelThatTriggersAllocationPlannerBug";
+
+  InferenceSession session_object{so, &DefaultLoggingManager()};
+  Status st;
+  ASSERT_TRUE((st = session_object.Load("testdata/test_cast_back_to_back_non_const_mixed_types_origin.onnx")).IsOK())
+      << st.ErrorMessage();
+  ASSERT_TRUE((st = session_object.Initialize()).IsOK()) << st.ErrorMessage();
+
+  RunOptions run_options;
+  run_options.run_tag = "one session/one tag";
+
+  // prepare inputs
+  std::vector<int64_t> dims_x = {1, 2, 3};
+  std::vector<float> values_x = {1.6f, -0.6f, -0.5f, -1.0f, 0.8f, -2.3f};
+  OrtValue ml_value;
+  CreateMLValue<float>(TestCPUExecutionProvider()->GetAllocator(0, OrtMemTypeDefault), dims_x, values_x,
+                       &ml_value);
+  NameMLValMap feeds;
+  feeds.insert(std::make_pair("u", ml_value));
+
+  // prepare outputs
+  std::vector<std::string> output_names;
+  output_names.push_back("res");
+  output_names.push_back("res2");
+  output_names.push_back("res3");
+  std::vector<OrtValue> fetches;
+
+  // prepare expected inputs and outputs
+  std::vector<int64_t> expected_dims_res = {1, 2, 3};
+  std::vector<int64_t> expected_values_res = {1, 0, 0, -1, 0, -2};
+
+  std::vector<int64_t> expected_dims_res2 = {1, 2, 3};
+  std::vector<int64_t> expected_values_res2 = {1, 0, 0, -1, 0, -2};
+
+  std::vector<int64_t> expected_dims_res3 = {1, 2, 3};
+  std::vector<int8_t> expected_values_res3 = {1, 0, 0, 1, 0, 1};
+
+  // Now run
+  st = session_object.Run(run_options, feeds, output_names, &fetches);
+  if (!st.IsOK()) {
+    std::cout << "Run returned status: " << st.ErrorMessage() << std::endl;
+  }
+  ASSERT_TRUE(st.IsOK());
+  ASSERT_EQ(3, fetches.size());
+  VerifyOutputs(fetches[0].Get<Tensor>(), expected_dims_res, expected_values_res);
+  VerifyOutputs(fetches[1].Get<Tensor>(), expected_dims_res2, expected_values_res2);
+  VerifyOutputs(fetches[2].Get<Tensor>(), expected_dims_res3, expected_values_res3);
+}
+
 }  // namespace test
 }  // namespace onnxruntime
--- a/onnxruntime/test/testdata/test_cast_back_to_back_non_const_mixed_types_origin.onnx
+++ b/onnxruntime/test/testdata/test_cast_back_to_back_non_const_mixed_types_origin.onnx
@ -0,0 +1,49 @@
+
+onnx-tests:Å
+
+uvcast_0"Cast*	
+to 
+
+vwcast_1"Cast*	
+to 
+
+wxcast_2"Cast*	
+to 
+!
+xrescast_3"Cast*	
+to 
+ 
+ww2cast_4"Cast*	
+to 
+#
+w2res2cast_5"Cast*	
+to 
+ 
+xx2cast_6"Cast*	
+to	 
+!
+x2x3cast_7"Cast*	
+to 
+#
+x3res3cast_8"Cast*	
+to  test-cast-back-to-back-non-constZ
+u
+
+
+
+b
+res
+
+
+
+b
+res2
+
+
+
+b
+res3
+
+
+
+B