mirror of
https://github.com/saymrwulf/onnxruntime.git
synced 2026-06-28 03:20:58 +00:00
Fix reuse logic in allocation planner. (#2393)
* Fix reuse logic in allocation planner. * PR comments * Add helpful comments * Don't allow reuse across string tensors.
This commit is contained in:
parent
b90d55b7ea
commit
7e164eaa6a
3 changed files with 133 additions and 14 deletions
|
|
@ -258,7 +258,7 @@ class PlannerImpl {
|
|||
const auto& val1 = shape1.dim(i);
|
||||
const auto& val2 = shape2.dim(i);
|
||||
if (utils::HasDimValue(val1) && utils::HasDimValue(val2) &&
|
||||
(val1.dim_value() == val2.dim_value()))
|
||||
(val1.dim_value() == val2.dim_value()))
|
||||
continue; // same known dimension
|
||||
if (utils::HasDimParam(val1) && utils::HasDimParam(val2)) {
|
||||
const auto& val1_param = val1.dim_param();
|
||||
|
|
@ -281,9 +281,20 @@ class PlannerImpl {
|
|||
return elt_type->Size();
|
||||
}
|
||||
|
||||
static bool SameSize(const TensorShapeProto& shape1, const DataType& ptype1, const TensorShapeProto& shape2,
|
||||
const DataType& ptype2) {
|
||||
return (GetElementSize(ptype1) == GetElementSize(ptype2)) && SameShape(shape1, shape2);
|
||||
static bool SameSize(const TensorShapeProto& shape1, const onnxruntime::NodeArg& arg1, const TensorShapeProto& shape2, const onnxruntime::NodeArg& arg2) {
|
||||
const auto& ptype1 = arg1.Type();
|
||||
const auto& ptype2 = arg2.Type();
|
||||
auto type1_size = GetElementSize(ptype1);
|
||||
auto type2_size = GetElementSize(ptype2);
|
||||
bool is_type1_string = arg1.TypeAsProto()->tensor_type().elem_type() == ONNX_NAMESPACE::TensorProto_DataType_STRING;
|
||||
bool is_type2_string = arg2.TypeAsProto()->tensor_type().elem_type() == ONNX_NAMESPACE::TensorProto_DataType_STRING;
|
||||
|
||||
// sizeof(std::string) = sizeof(double) on gcc 4.8.x on CentOS. This causes the allocation planner to reuse
|
||||
// a tensor of type double. This won't work for string tensors since they need to be placement new'ed.
|
||||
// If either of the tensors is a string, don't treat them the same. Moreover, reusing a string tensor for a string
|
||||
// tensor without releasing the previous memory can cause memory leaks; hence we don't allow reuse across string
|
||||
// tensors as well.
|
||||
return !(is_type1_string || is_type2_string) && (type1_size == type2_size) && SameShape(shape1, shape2);
|
||||
|
||||
/* TODO: we can improve this if the concrete shapes are known for both as below.
|
||||
Unclear whether this is worthwhile though.
|
||||
|
|
@ -305,14 +316,13 @@ class PlannerImpl {
|
|||
auto p_shape2 = context_.GetShape(arg2);
|
||||
// If the shapes are unknown, we conservatively assume they may be of different size.
|
||||
if ((nullptr == p_shape1) || (nullptr == p_shape2)) return false;
|
||||
return SameSize(*p_shape1, arg1.Type(), *p_shape2, arg2.Type());
|
||||
return SameSize(*p_shape1, arg1, *p_shape2, arg2);
|
||||
}
|
||||
|
||||
// Find if freelist contains a buffer of the same size as output_arg
|
||||
bool FindReusableTensor(const onnxruntime::NodeArg& output_arg, OrtValueIndex* reusable_tensor) {
|
||||
auto p_required_buffer_shape = context_.GetShape(output_arg);
|
||||
if (nullptr == p_required_buffer_shape) return false;
|
||||
auto required_buffer_type = output_arg.Type();
|
||||
auto& required_memory_info = AllocPlan(output_arg.Name()).location;
|
||||
|
||||
for (auto it = freelist_.begin(); it != freelist_.end(); ++it) {
|
||||
|
|
@ -322,9 +332,8 @@ class PlannerImpl {
|
|||
if (!(available_memory_info == required_memory_info)) continue;
|
||||
auto p_available_buffer_shape = context_.GetShape(*p_node_arg);
|
||||
if (nullptr != p_available_buffer_shape) {
|
||||
auto available_buffer_type = p_node_arg->Type();
|
||||
if (SameSize(*p_available_buffer_shape, available_buffer_type,
|
||||
*p_required_buffer_shape, required_buffer_type)) {
|
||||
if (SameSize(*p_available_buffer_shape, *p_node_arg,
|
||||
*p_required_buffer_shape, output_arg)) {
|
||||
*reusable_tensor = it->ml_value;
|
||||
freelist_.erase(it);
|
||||
return true;
|
||||
|
|
|
|||
|
|
@ -172,15 +172,21 @@ static void CreateMatMulModel(std::unique_ptr<onnxruntime::Model>& p_model, Prov
|
|||
ASSERT_TRUE(status.IsOK()) << status.ErrorMessage();
|
||||
}
|
||||
|
||||
template <typename T = float>
|
||||
void VerifyOutputs(const Tensor& tensor, const std::vector<int64_t>& expected_dims,
|
||||
const std::vector<T>& expected_values) {
|
||||
TensorShape expected_shape(expected_dims);
|
||||
ASSERT_EQ(expected_shape, tensor.Shape());
|
||||
const std::vector<T> found(tensor.template Data<T>(),
|
||||
tensor.template Data<T>() + expected_values.size());
|
||||
ASSERT_EQ(expected_values, found);
|
||||
}
|
||||
|
||||
void VerifyOutputs(const std::vector<OrtValue>& fetches, const std::vector<int64_t>& expected_dims,
|
||||
const std::vector<float>& expected_values) {
|
||||
ASSERT_EQ(1, fetches.size());
|
||||
auto& rtensor = fetches.front().Get<Tensor>();
|
||||
TensorShape expected_shape(expected_dims);
|
||||
ASSERT_EQ(expected_shape, rtensor.Shape());
|
||||
const std::vector<float> found(rtensor.template Data<float>(),
|
||||
rtensor.template Data<float>() + expected_values.size());
|
||||
ASSERT_EQ(expected_values, found);
|
||||
VerifyOutputs(rtensor, expected_dims, expected_values);
|
||||
}
|
||||
|
||||
void RunModel(InferenceSession& session_object,
|
||||
|
|
@ -1536,5 +1542,60 @@ TEST(InferenceSessionTests, TestParallelExecutionWithCudaProvider) {
|
|||
|
||||
#endif
|
||||
|
||||
// The model being tested here triggers a case where the allocation planner (AP) tries to reuse a tensor of type
|
||||
// double for a string tensor. The reuse logic of AP works correctly on Windows and Ubuntu 16.x
|
||||
// since there the sizeof(double) != sizeof(std::string). However, on CentOS (gcc 4.8.x), the 2 sizes are equal.
|
||||
TEST(InferenceSessionTests, ModelThatTriggersAllocationPlannerToReuseDoubleTensorForStringTensor) {
|
||||
SessionOptions so;
|
||||
|
||||
so.session_logid = "InferenceSessionTests.ModelThatTriggersAllocationPlannerBug";
|
||||
|
||||
InferenceSession session_object{so, &DefaultLoggingManager()};
|
||||
Status st;
|
||||
ASSERT_TRUE((st = session_object.Load("testdata/test_cast_back_to_back_non_const_mixed_types_origin.onnx")).IsOK())
|
||||
<< st.ErrorMessage();
|
||||
ASSERT_TRUE((st = session_object.Initialize()).IsOK()) << st.ErrorMessage();
|
||||
|
||||
RunOptions run_options;
|
||||
run_options.run_tag = "one session/one tag";
|
||||
|
||||
// prepare inputs
|
||||
std::vector<int64_t> dims_x = {1, 2, 3};
|
||||
std::vector<float> values_x = {1.6f, -0.6f, -0.5f, -1.0f, 0.8f, -2.3f};
|
||||
OrtValue ml_value;
|
||||
CreateMLValue<float>(TestCPUExecutionProvider()->GetAllocator(0, OrtMemTypeDefault), dims_x, values_x,
|
||||
&ml_value);
|
||||
NameMLValMap feeds;
|
||||
feeds.insert(std::make_pair("u", ml_value));
|
||||
|
||||
// prepare outputs
|
||||
std::vector<std::string> output_names;
|
||||
output_names.push_back("res");
|
||||
output_names.push_back("res2");
|
||||
output_names.push_back("res3");
|
||||
std::vector<OrtValue> fetches;
|
||||
|
||||
// prepare expected inputs and outputs
|
||||
std::vector<int64_t> expected_dims_res = {1, 2, 3};
|
||||
std::vector<int64_t> expected_values_res = {1, 0, 0, -1, 0, -2};
|
||||
|
||||
std::vector<int64_t> expected_dims_res2 = {1, 2, 3};
|
||||
std::vector<int64_t> expected_values_res2 = {1, 0, 0, -1, 0, -2};
|
||||
|
||||
std::vector<int64_t> expected_dims_res3 = {1, 2, 3};
|
||||
std::vector<int8_t> expected_values_res3 = {1, 0, 0, 1, 0, 1};
|
||||
|
||||
// Now run
|
||||
st = session_object.Run(run_options, feeds, output_names, &fetches);
|
||||
if (!st.IsOK()) {
|
||||
std::cout << "Run returned status: " << st.ErrorMessage() << std::endl;
|
||||
}
|
||||
ASSERT_TRUE(st.IsOK());
|
||||
ASSERT_EQ(3, fetches.size());
|
||||
VerifyOutputs(fetches[0].Get<Tensor>(), expected_dims_res, expected_values_res);
|
||||
VerifyOutputs(fetches[1].Get<Tensor>(), expected_dims_res2, expected_values_res2);
|
||||
VerifyOutputs(fetches[2].Get<Tensor>(), expected_dims_res3, expected_values_res3);
|
||||
}
|
||||
|
||||
} // namespace test
|
||||
} // namespace onnxruntime
|
||||
|
|
|
|||
49
onnxruntime/test/testdata/test_cast_back_to_back_non_const_mixed_types_origin.onnx
vendored
Normal file
49
onnxruntime/test/testdata/test_cast_back_to_back_non_const_mixed_types_origin.onnx
vendored
Normal file
|
|
@ -0,0 +1,49 @@
|
|||
|
||||
onnx-tests:Å
|
||||
|
||||
uvcast_0"Cast*
|
||||
to
|
||||
|
||||
vwcast_1"Cast*
|
||||
to
|
||||
|
||||
wxcast_2"Cast*
|
||||
to
|
||||
!
|
||||
xrescast_3"Cast*
|
||||
to
|
||||
|
||||
ww2cast_4"Cast*
|
||||
to
|
||||
#
|
||||
w2res2cast_5"Cast*
|
||||
to
|
||||
|
||||
xx2cast_6"Cast*
|
||||
to
|
||||
!
|
||||
x2x3cast_7"Cast*
|
||||
to
|
||||
#
|
||||
x3res3cast_8"Cast*
|
||||
to test-cast-back-to-back-non-constZ
|
||||
u
|
||||
|
||||
|
||||
|
||||
b
|
||||
res
|
||||
|
||||
|
||||
|
||||
b
|
||||
res2
|
||||
|
||||
|
||||
|
||||
b
|
||||
res3
|
||||
|
||||
|
||||
|
||||
B
|
||||
Loading…
Reference in a new issue