Fix reuse logic in allocation planner. (#2393)

* Fix reuse logic in allocation planner.

* PR comments

* Add helpful comments

* Don't allow reuse across string tensors.
This commit is contained in:
Pranav Sharma 2019-11-13 22:51:12 -08:00 committed by GitHub
parent b90d55b7ea
commit 7e164eaa6a
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
3 changed files with 133 additions and 14 deletions

View file

@ -258,7 +258,7 @@ class PlannerImpl {
const auto& val1 = shape1.dim(i);
const auto& val2 = shape2.dim(i);
if (utils::HasDimValue(val1) && utils::HasDimValue(val2) &&
(val1.dim_value() == val2.dim_value()))
(val1.dim_value() == val2.dim_value()))
continue; // same known dimension
if (utils::HasDimParam(val1) && utils::HasDimParam(val2)) {
const auto& val1_param = val1.dim_param();
@ -281,9 +281,20 @@ class PlannerImpl {
return elt_type->Size();
}
static bool SameSize(const TensorShapeProto& shape1, const DataType& ptype1, const TensorShapeProto& shape2,
const DataType& ptype2) {
return (GetElementSize(ptype1) == GetElementSize(ptype2)) && SameShape(shape1, shape2);
static bool SameSize(const TensorShapeProto& shape1, const onnxruntime::NodeArg& arg1, const TensorShapeProto& shape2, const onnxruntime::NodeArg& arg2) {
const auto& ptype1 = arg1.Type();
const auto& ptype2 = arg2.Type();
auto type1_size = GetElementSize(ptype1);
auto type2_size = GetElementSize(ptype2);
bool is_type1_string = arg1.TypeAsProto()->tensor_type().elem_type() == ONNX_NAMESPACE::TensorProto_DataType_STRING;
bool is_type2_string = arg2.TypeAsProto()->tensor_type().elem_type() == ONNX_NAMESPACE::TensorProto_DataType_STRING;
// sizeof(std::string) = sizeof(double) on gcc 4.8.x on CentOS. This causes the allocation planner to reuse
// a tensor of type double. This won't work for string tensors since they need to be placement new'ed.
// If either of the tensors is a string, don't treat them the same. Moreover, reusing a string tensor for a string
// tensor without releasing the previous memory can cause memory leaks; hence we don't allow reuse across string
// tensors as well.
return !(is_type1_string || is_type2_string) && (type1_size == type2_size) && SameShape(shape1, shape2);
/* TODO: we can improve this if the concrete shapes are known for both as below.
Unclear whether this is worthwhile though.
@ -305,14 +316,13 @@ class PlannerImpl {
auto p_shape2 = context_.GetShape(arg2);
// If the shapes are unknown, we conservatively assume they may be of different size.
if ((nullptr == p_shape1) || (nullptr == p_shape2)) return false;
return SameSize(*p_shape1, arg1.Type(), *p_shape2, arg2.Type());
return SameSize(*p_shape1, arg1, *p_shape2, arg2);
}
// Find if freelist contains a buffer of the same size as output_arg
bool FindReusableTensor(const onnxruntime::NodeArg& output_arg, OrtValueIndex* reusable_tensor) {
auto p_required_buffer_shape = context_.GetShape(output_arg);
if (nullptr == p_required_buffer_shape) return false;
auto required_buffer_type = output_arg.Type();
auto& required_memory_info = AllocPlan(output_arg.Name()).location;
for (auto it = freelist_.begin(); it != freelist_.end(); ++it) {
@ -322,9 +332,8 @@ class PlannerImpl {
if (!(available_memory_info == required_memory_info)) continue;
auto p_available_buffer_shape = context_.GetShape(*p_node_arg);
if (nullptr != p_available_buffer_shape) {
auto available_buffer_type = p_node_arg->Type();
if (SameSize(*p_available_buffer_shape, available_buffer_type,
*p_required_buffer_shape, required_buffer_type)) {
if (SameSize(*p_available_buffer_shape, *p_node_arg,
*p_required_buffer_shape, output_arg)) {
*reusable_tensor = it->ml_value;
freelist_.erase(it);
return true;

View file

@ -172,15 +172,21 @@ static void CreateMatMulModel(std::unique_ptr<onnxruntime::Model>& p_model, Prov
ASSERT_TRUE(status.IsOK()) << status.ErrorMessage();
}
template <typename T = float>
void VerifyOutputs(const Tensor& tensor, const std::vector<int64_t>& expected_dims,
const std::vector<T>& expected_values) {
TensorShape expected_shape(expected_dims);
ASSERT_EQ(expected_shape, tensor.Shape());
const std::vector<T> found(tensor.template Data<T>(),
tensor.template Data<T>() + expected_values.size());
ASSERT_EQ(expected_values, found);
}
void VerifyOutputs(const std::vector<OrtValue>& fetches, const std::vector<int64_t>& expected_dims,
const std::vector<float>& expected_values) {
ASSERT_EQ(1, fetches.size());
auto& rtensor = fetches.front().Get<Tensor>();
TensorShape expected_shape(expected_dims);
ASSERT_EQ(expected_shape, rtensor.Shape());
const std::vector<float> found(rtensor.template Data<float>(),
rtensor.template Data<float>() + expected_values.size());
ASSERT_EQ(expected_values, found);
VerifyOutputs(rtensor, expected_dims, expected_values);
}
void RunModel(InferenceSession& session_object,
@ -1536,5 +1542,60 @@ TEST(InferenceSessionTests, TestParallelExecutionWithCudaProvider) {
#endif
// The model being tested here triggers a case where the allocation planner (AP) tries to reuse a tensor of type
// double for a string tensor. The reuse logic of AP works correctly on Windows and Ubuntu 16.x
// since there the sizeof(double) != sizeof(std::string). However, on CentOS (gcc 4.8.x), the 2 sizes are equal.
TEST(InferenceSessionTests, ModelThatTriggersAllocationPlannerToReuseDoubleTensorForStringTensor) {
SessionOptions so;
so.session_logid = "InferenceSessionTests.ModelThatTriggersAllocationPlannerBug";
InferenceSession session_object{so, &DefaultLoggingManager()};
Status st;
ASSERT_TRUE((st = session_object.Load("testdata/test_cast_back_to_back_non_const_mixed_types_origin.onnx")).IsOK())
<< st.ErrorMessage();
ASSERT_TRUE((st = session_object.Initialize()).IsOK()) << st.ErrorMessage();
RunOptions run_options;
run_options.run_tag = "one session/one tag";
// prepare inputs
std::vector<int64_t> dims_x = {1, 2, 3};
std::vector<float> values_x = {1.6f, -0.6f, -0.5f, -1.0f, 0.8f, -2.3f};
OrtValue ml_value;
CreateMLValue<float>(TestCPUExecutionProvider()->GetAllocator(0, OrtMemTypeDefault), dims_x, values_x,
&ml_value);
NameMLValMap feeds;
feeds.insert(std::make_pair("u", ml_value));
// prepare outputs
std::vector<std::string> output_names;
output_names.push_back("res");
output_names.push_back("res2");
output_names.push_back("res3");
std::vector<OrtValue> fetches;
// prepare expected inputs and outputs
std::vector<int64_t> expected_dims_res = {1, 2, 3};
std::vector<int64_t> expected_values_res = {1, 0, 0, -1, 0, -2};
std::vector<int64_t> expected_dims_res2 = {1, 2, 3};
std::vector<int64_t> expected_values_res2 = {1, 0, 0, -1, 0, -2};
std::vector<int64_t> expected_dims_res3 = {1, 2, 3};
std::vector<int8_t> expected_values_res3 = {1, 0, 0, 1, 0, 1};
// Now run
st = session_object.Run(run_options, feeds, output_names, &fetches);
if (!st.IsOK()) {
std::cout << "Run returned status: " << st.ErrorMessage() << std::endl;
}
ASSERT_TRUE(st.IsOK());
ASSERT_EQ(3, fetches.size());
VerifyOutputs(fetches[0].Get<Tensor>(), expected_dims_res, expected_values_res);
VerifyOutputs(fetches[1].Get<Tensor>(), expected_dims_res2, expected_values_res2);
VerifyOutputs(fetches[2].Get<Tensor>(), expected_dims_res3, expected_values_res3);
}
} // namespace test
} // namespace onnxruntime

View file

@ -0,0 +1,49 @@

onnx-tests:Å

uvcast_0"Cast*
to  

vwcast_1"Cast*
to 

wxcast_2"Cast*
to 
!
xrescast_3"Cast*
to 
ww2cast_4"Cast*
to 
#
w2res2cast_5"Cast*
to 
xx2cast_6"Cast*
to  
!
x2x3cast_7"Cast*
to 
#
x3res3cast_8"Cast*
to  test-cast-back-to-back-non-constZ
u



b
res



b
res2



b
res3



B