diff --git a/onnxruntime/core/framework/allocation_planner.cc b/onnxruntime/core/framework/allocation_planner.cc index 169c2fc876..3c77ada710 100644 --- a/onnxruntime/core/framework/allocation_planner.cc +++ b/onnxruntime/core/framework/allocation_planner.cc @@ -258,7 +258,7 @@ class PlannerImpl { const auto& val1 = shape1.dim(i); const auto& val2 = shape2.dim(i); if (utils::HasDimValue(val1) && utils::HasDimValue(val2) && - (val1.dim_value() == val2.dim_value())) + (val1.dim_value() == val2.dim_value())) continue; // same known dimension if (utils::HasDimParam(val1) && utils::HasDimParam(val2)) { const auto& val1_param = val1.dim_param(); @@ -281,9 +281,20 @@ class PlannerImpl { return elt_type->Size(); } - static bool SameSize(const TensorShapeProto& shape1, const DataType& ptype1, const TensorShapeProto& shape2, - const DataType& ptype2) { - return (GetElementSize(ptype1) == GetElementSize(ptype2)) && SameShape(shape1, shape2); + static bool SameSize(const TensorShapeProto& shape1, const onnxruntime::NodeArg& arg1, const TensorShapeProto& shape2, const onnxruntime::NodeArg& arg2) { + const auto& ptype1 = arg1.Type(); + const auto& ptype2 = arg2.Type(); + auto type1_size = GetElementSize(ptype1); + auto type2_size = GetElementSize(ptype2); + bool is_type1_string = arg1.TypeAsProto()->tensor_type().elem_type() == ONNX_NAMESPACE::TensorProto_DataType_STRING; + bool is_type2_string = arg2.TypeAsProto()->tensor_type().elem_type() == ONNX_NAMESPACE::TensorProto_DataType_STRING; + + // sizeof(std::string) = sizeof(double) on gcc 4.8.x on CentOS. This causes the allocation planner to reuse + // a tensor of type double. This won't work for string tensors since they need to be placement new'ed. + // If either of the tensors is a string, don't treat them the same. Moreover, reusing a string tensor for a string + // tensor without releasing the previous memory can cause memory leaks; hence we don't allow reuse across string + // tensors as well. + return !(is_type1_string || is_type2_string) && (type1_size == type2_size) && SameShape(shape1, shape2); /* TODO: we can improve this if the concrete shapes are known for both as below. Unclear whether this is worthwhile though. @@ -305,14 +316,13 @@ class PlannerImpl { auto p_shape2 = context_.GetShape(arg2); // If the shapes are unknown, we conservatively assume they may be of different size. if ((nullptr == p_shape1) || (nullptr == p_shape2)) return false; - return SameSize(*p_shape1, arg1.Type(), *p_shape2, arg2.Type()); + return SameSize(*p_shape1, arg1, *p_shape2, arg2); } // Find if freelist contains a buffer of the same size as output_arg bool FindReusableTensor(const onnxruntime::NodeArg& output_arg, OrtValueIndex* reusable_tensor) { auto p_required_buffer_shape = context_.GetShape(output_arg); if (nullptr == p_required_buffer_shape) return false; - auto required_buffer_type = output_arg.Type(); auto& required_memory_info = AllocPlan(output_arg.Name()).location; for (auto it = freelist_.begin(); it != freelist_.end(); ++it) { @@ -322,9 +332,8 @@ class PlannerImpl { if (!(available_memory_info == required_memory_info)) continue; auto p_available_buffer_shape = context_.GetShape(*p_node_arg); if (nullptr != p_available_buffer_shape) { - auto available_buffer_type = p_node_arg->Type(); - if (SameSize(*p_available_buffer_shape, available_buffer_type, - *p_required_buffer_shape, required_buffer_type)) { + if (SameSize(*p_available_buffer_shape, *p_node_arg, + *p_required_buffer_shape, output_arg)) { *reusable_tensor = it->ml_value; freelist_.erase(it); return true; diff --git a/onnxruntime/test/framework/inference_session_test.cc b/onnxruntime/test/framework/inference_session_test.cc index 2ecfc15e9b..22ab613b0c 100644 --- a/onnxruntime/test/framework/inference_session_test.cc +++ b/onnxruntime/test/framework/inference_session_test.cc @@ -172,15 +172,21 @@ static void CreateMatMulModel(std::unique_ptr& p_model, Prov ASSERT_TRUE(status.IsOK()) << status.ErrorMessage(); } +template +void VerifyOutputs(const Tensor& tensor, const std::vector& expected_dims, + const std::vector& expected_values) { + TensorShape expected_shape(expected_dims); + ASSERT_EQ(expected_shape, tensor.Shape()); + const std::vector found(tensor.template Data(), + tensor.template Data() + expected_values.size()); + ASSERT_EQ(expected_values, found); +} + void VerifyOutputs(const std::vector& fetches, const std::vector& expected_dims, const std::vector& expected_values) { ASSERT_EQ(1, fetches.size()); auto& rtensor = fetches.front().Get(); - TensorShape expected_shape(expected_dims); - ASSERT_EQ(expected_shape, rtensor.Shape()); - const std::vector found(rtensor.template Data(), - rtensor.template Data() + expected_values.size()); - ASSERT_EQ(expected_values, found); + VerifyOutputs(rtensor, expected_dims, expected_values); } void RunModel(InferenceSession& session_object, @@ -1536,5 +1542,60 @@ TEST(InferenceSessionTests, TestParallelExecutionWithCudaProvider) { #endif +// The model being tested here triggers a case where the allocation planner (AP) tries to reuse a tensor of type +// double for a string tensor. The reuse logic of AP works correctly on Windows and Ubuntu 16.x +// since there the sizeof(double) != sizeof(std::string). However, on CentOS (gcc 4.8.x), the 2 sizes are equal. +TEST(InferenceSessionTests, ModelThatTriggersAllocationPlannerToReuseDoubleTensorForStringTensor) { + SessionOptions so; + + so.session_logid = "InferenceSessionTests.ModelThatTriggersAllocationPlannerBug"; + + InferenceSession session_object{so, &DefaultLoggingManager()}; + Status st; + ASSERT_TRUE((st = session_object.Load("testdata/test_cast_back_to_back_non_const_mixed_types_origin.onnx")).IsOK()) + << st.ErrorMessage(); + ASSERT_TRUE((st = session_object.Initialize()).IsOK()) << st.ErrorMessage(); + + RunOptions run_options; + run_options.run_tag = "one session/one tag"; + + // prepare inputs + std::vector dims_x = {1, 2, 3}; + std::vector values_x = {1.6f, -0.6f, -0.5f, -1.0f, 0.8f, -2.3f}; + OrtValue ml_value; + CreateMLValue(TestCPUExecutionProvider()->GetAllocator(0, OrtMemTypeDefault), dims_x, values_x, + &ml_value); + NameMLValMap feeds; + feeds.insert(std::make_pair("u", ml_value)); + + // prepare outputs + std::vector output_names; + output_names.push_back("res"); + output_names.push_back("res2"); + output_names.push_back("res3"); + std::vector fetches; + + // prepare expected inputs and outputs + std::vector expected_dims_res = {1, 2, 3}; + std::vector expected_values_res = {1, 0, 0, -1, 0, -2}; + + std::vector expected_dims_res2 = {1, 2, 3}; + std::vector expected_values_res2 = {1, 0, 0, -1, 0, -2}; + + std::vector expected_dims_res3 = {1, 2, 3}; + std::vector expected_values_res3 = {1, 0, 0, 1, 0, 1}; + + // Now run + st = session_object.Run(run_options, feeds, output_names, &fetches); + if (!st.IsOK()) { + std::cout << "Run returned status: " << st.ErrorMessage() << std::endl; + } + ASSERT_TRUE(st.IsOK()); + ASSERT_EQ(3, fetches.size()); + VerifyOutputs(fetches[0].Get(), expected_dims_res, expected_values_res); + VerifyOutputs(fetches[1].Get(), expected_dims_res2, expected_values_res2); + VerifyOutputs(fetches[2].Get(), expected_dims_res3, expected_values_res3); +} + } // namespace test } // namespace onnxruntime diff --git a/onnxruntime/test/testdata/test_cast_back_to_back_non_const_mixed_types_origin.onnx b/onnxruntime/test/testdata/test_cast_back_to_back_non_const_mixed_types_origin.onnx new file mode 100644 index 0000000000..6ee055f274 --- /dev/null +++ b/onnxruntime/test/testdata/test_cast_back_to_back_non_const_mixed_types_origin.onnx @@ -0,0 +1,49 @@ + +onnx-tests:Å + +uvcast_0"Cast* +to   + +vwcast_1"Cast* +to  + +wxcast_2"Cast* +to  +! +xrescast_3"Cast* +to  + +ww2cast_4"Cast* +to  +# +w2res2cast_5"Cast* +to  + +xx2cast_6"Cast* +to   +! +x2x3cast_7"Cast* +to  +# +x3res3cast_8"Cast* +to  test-cast-back-to-back-non-constZ +u + + + +b +res + + + +b +res2 + + + +b +res3 + + + +B \ No newline at end of file