Handle the Scan subgraph producing outputs with a symbolic dimension.

If the output has a symbolic dimension
  * Infer the shape if it is a loop state variable as we have the input value, and the shape from the subgraph output must match
  * Use a temporary MLValue for the first subgraph execution if it is a subgraph output with a symbolic dimension.
    * After the first execution make the overall output shape concrete and allocate the full output buffer.
    * Use slices of the full output buffer for all other subgraph executions to avoid copies.

Add unit test to validate.
This commit is contained in:
Scott McKay 2018-11-24 17:51:35 +10:00
parent c7513e676f
commit 5d3992f999
5 changed files with 325 additions and 66 deletions

View file

@ -118,6 +118,59 @@ class LoopStateVariable {
MLValue b_;
};
/*
Class that co-ordinates writes to slices of the overall Scan output.
It will directly update the data returned by OpKernelContextInternal.Output(i).
*/
class OutputIterator {
public:
static Status Create(OpKernelContextInternal& context,
int output_index,
bool is_loop_state_var,
TensorShape final_shape,
std::unique_ptr<OutputIterator>& iterator) {
iterator.reset(new OutputIterator(context, output_index, is_loop_state_var, final_shape));
return iterator->Initialize();
}
MLValue& operator*();
OutputIterator& operator++();
void ZeroOutCurrent() {
auto* tensor = (**this).GetMutable<Tensor>();
memset(tensor->MutableDataRaw(), 0, tensor->Size());
}
private:
OutputIterator(OpKernelContextInternal& context,
int output_index,
bool is_loop_state_var,
TensorShape final_shape);
Status Initialize();
Status AllocateFinalBuffer();
Status MakeConcrete();
OpKernelContextInternal& context_;
const int output_index_;
std::vector<int64_t> dims_;
TensorShapeProto per_iteration_shape_;
TensorShape final_shape_;
bool is_loop_state_var_;
int64_t num_iterations_;
int64_t cur_iteration_;
bool is_concrete_shape_;
std::vector<MLValueTensorSlicer<MLValue>::Iterator> slicer_iterators_;
std::vector<MLValueTensorSlicer<MLValue>::Iterator>::iterator cur_slicer_iterator_;
// if shape is not concrete we need the first output to know the missing dimension before
// we can allocate final_output_mlvalue_ and use the slicers.
MLValue first_output_;
MLValue* final_output_mlvalue_;
};
class ScanImpl {
public:
ScanImpl(OpKernelContextInternal& context,
@ -135,10 +188,10 @@ class ScanImpl {
private:
// validate inputs and setup batch size and max sequence length.
Status ValidateInput();
Status ValidateSubgraphInput(int start_input, int end_input, bool has_seq_len_dim,
Status ValidateSubgraphInput(int start_input, int end_input, bool is_loop_state_var,
const std::vector<const NodeArg*>& graph_inputs);
Status AllocateOutput(int index, bool has_sequence_len);
Status AllocateOutput(int index, bool is_loop_state_var);
Status AllocateOutputTensors();
Status CreateLoopStateVariables(std::vector<std::vector<LoopStateVariable>>& loop_state_variables);
@ -147,7 +200,6 @@ class ScanImpl {
Status IterateSequence(std::vector<LoopStateVariable>& loop_state_variables,
ConstTensorSlicerIterators& scan_input_stream_iterators,
MutableTensorSlicerIterators& scan_output_stream_iterators,
int64_t seq_length);
OpKernelContextInternal& context_;
@ -166,6 +218,7 @@ class ScanImpl {
std::vector<int64_t> sequence_lens_;
std::vector<std::string> subgraph_output_names_;
std::vector<std::unique_ptr<OutputIterator>> output_iterators_;
std::unordered_map<std::string, const MLValue*> implicit_inputs_;
};
@ -249,6 +302,149 @@ void LoopStateVariable::Next() {
++iteration_num_;
}
static Status MakeShapeConcrete(const TensorShape& per_iteration_shape, TensorShape& final_shape) {
auto num_dims_per_iteration = per_iteration_shape.NumDimensions();
auto final_shape_offset = final_shape.NumDimensions() - num_dims_per_iteration;
for (size_t i = 0; i < num_dims_per_iteration; ++i) {
auto existing_value = final_shape[i + final_shape_offset];
if (existing_value == -1) {
final_shape[i + final_shape_offset] = per_iteration_shape[i];
} else {
if (existing_value != per_iteration_shape[i]) {
return ONNXRUNTIME_MAKE_STATUS(ONNXRUNTIME, FAIL,
"Mismatch between expected shape and shape from first output",
final_shape, " is not compatible with ", per_iteration_shape);
}
}
}
return Status::OK();
}
OutputIterator::OutputIterator(OpKernelContextInternal& context,
int output_index,
bool is_loop_state_var,
TensorShape final_shape)
: context_{context},
output_index_{output_index},
is_loop_state_var_{is_loop_state_var},
final_shape_{final_shape},
cur_iteration_{0} {
is_concrete_shape_ = final_shape_.Size() >= 0;
// there are one or two dimensions being iterated depending on whether it's a loop state variable or scan input.
auto num_iteration_dims = is_loop_state_var_ ? 1 : 2;
num_iterations_ = final_shape_.Slice(0, num_iteration_dims).Size();
}
Status OutputIterator::Initialize() {
Status status = Status::OK();
if (is_loop_state_var_ && !is_concrete_shape_) {
// copy the shape from the input initial value which will have a concrete shape.
auto* input = context_.Input<Tensor>(output_index_ + 1); // +1 to skip the sequence_len input
status = MakeShapeConcrete(input->Shape(), final_shape_);
ONNXRUNTIME_RETURN_IF_ERROR(status);
is_concrete_shape_ = true;
}
if (is_concrete_shape_) {
status = AllocateFinalBuffer();
ONNXRUNTIME_RETURN_IF_ERROR(status);
} else {
// use first_output_
}
return Status::OK();
}
Status OutputIterator::AllocateFinalBuffer() {
// make sure a single buffer for the full output is created upfront.
// we slice this into per-iteration pieces in Execute using MLValueTensorSlicer.
auto* tensor = context_.Output(output_index_, final_shape_);
if (!tensor)
return ONNXRUNTIME_MAKE_STATUS(ONNXRUNTIME, FAIL, "Failed to create output tensor for output #", output_index_);
// get the output tensor we just created as an MLValue
final_output_mlvalue_ = context_.GetOutputMLValue(output_index_);
if (is_loop_state_var_) {
// only one entry is required as we slice on a single dimension
slicer_iterators_.push_back(MLValueTensorSlicer<MLValue>::Create(*final_output_mlvalue_).begin());
} else {
auto batch_size = final_shape_[0];
for (int i = 0; i < batch_size; ++i) {
// the slicer handles the sequence dimension (dim 1) so create an entry for each batch
slicer_iterators_.push_back(MLValueTensorSlicer<MLValue>::Create(*final_output_mlvalue_, 1, i).begin());
}
}
cur_slicer_iterator_ = slicer_iterators_.begin();
return Status::OK();
}
Status OutputIterator::MakeConcrete() {
ONNXRUNTIME_ENFORCE(first_output_.IsAllocated(), "First usage of OutputIterator did not result in any output.");
Status status = Status::OK();
auto& tensor = first_output_.Get<Tensor>();
auto& tensor_shape = tensor.Shape();
// update the final shape
status = MakeShapeConcrete(tensor_shape, final_shape_);
ONNXRUNTIME_RETURN_IF_ERROR(status);
is_concrete_shape_ = true;
status = AllocateFinalBuffer();
ONNXRUNTIME_RETURN_IF_ERROR(status);
// copy first output to final buffer
auto input_span = gsl::make_span<const gsl::byte>(static_cast<const gsl::byte*>(tensor.DataRaw()), tensor.Size());
auto output = (**this).GetMutable<Tensor>();
auto output_span = gsl::make_span<gsl::byte>(static_cast<gsl::byte*>(output->MutableDataRaw()), output->Size());
gsl::copy(input_span, output_span);
// release the MLValue we used for the first output
first_output_ = {};
return status;
}
MLValue& OutputIterator::operator*() {
ONNXRUNTIME_ENFORCE(cur_iteration_ < num_iterations_);
if (is_concrete_shape_)
return **cur_slicer_iterator_;
else
return first_output_;
}
OutputIterator& OutputIterator::operator++() {
if (cur_iteration_ < num_iterations_) {
if (!is_concrete_shape_) {
// we should have an output now, so convert to using the overall output buffer and slicers
auto status = MakeConcrete();
ONNXRUNTIME_ENFORCE(status.IsOK(), status.ErrorMessage());
}
++cur_iteration_;
// if not a loop state var, see if we just finished the current sequence (dim 1)
if (!is_loop_state_var_ && cur_iteration_ % final_shape_[1] == 0) {
++cur_slicer_iterator_;
} else {
++(*cur_slicer_iterator_);
}
}
return *this;
}
ScanImpl::ScanImpl(OpKernelContextInternal& context,
const SessionState& session_state,
int64_t num_scan_inputs,
@ -258,7 +454,7 @@ ScanImpl::ScanImpl(OpKernelContextInternal& context,
subgraph_{*session_state.GetGraphViewer()},
directions_{directions},
implicit_inputs_{context_.GetImplicitInputs()} {
//optional first input so may be nullptr
// optional first input so may be nullptr
sequence_lens_tensor_ = context.Input<Tensor>(0);
num_variadic_inputs_ = context_.NumVariadicInputs(1);
@ -271,12 +467,12 @@ Status ScanImpl::Initialize() {
auto status = ValidateInput();
ONNXRUNTIME_RETURN_IF_ERROR(status);
auto& graph_outputs = subgraph_.GetOutputs();
subgraph_output_names_.reserve(graph_outputs.size());
auto& subgraph_outputs = subgraph_.GetOutputs();
subgraph_output_names_.reserve(subgraph_outputs.size());
// save list of subgraph output names in their provided order to use when fetching the results
// from each subgraph execution. the Scan outputs will match this order.
for (auto& output : graph_outputs) {
for (auto& output : subgraph_outputs) {
subgraph_output_names_.push_back(output->Name());
}
@ -301,9 +497,10 @@ static const MLValue& GetSubgraphInputMLValue(const OpKernelContextInternal& con
}
// Validate that the subgraph input has valid shapes
Status ScanImpl::ValidateSubgraphInput(int start_input, int end_input, bool has_seq_len_dim,
Status ScanImpl::ValidateSubgraphInput(int start_input, int end_input, bool is_loop_state_var,
const std::vector<const NodeArg*>& graph_inputs) {
// first dim is batch size. optional sequence dim. dim/s for the data
bool has_seq_len_dim = !is_loop_state_var;
auto min_dims_required = has_seq_len_dim ? 3 : 2;
for (int i = start_input; i < end_input; ++i) {
@ -355,11 +552,11 @@ Status ScanImpl::ValidateInput() {
}
// process any loop state variables, which will set the batch size
auto status = ValidateSubgraphInput(0, num_loop_state_variables_, false, graph_inputs);
auto status = ValidateSubgraphInput(0, num_loop_state_variables_, true, graph_inputs);
ONNXRUNTIME_RETURN_IF_ERROR(status);
// process the scan inputs. sets/validates batch size and sequence length
status = ValidateSubgraphInput(num_loop_state_variables_, num_variadic_inputs_, true, graph_inputs);
status = ValidateSubgraphInput(num_loop_state_variables_, num_variadic_inputs_, false, graph_inputs);
ONNXRUNTIME_RETURN_IF_ERROR(status);
if (sequence_lens_tensor_ != nullptr) {
@ -386,11 +583,12 @@ Status ScanImpl::ValidateInput() {
return Status::OK();
}
Status ScanImpl::AllocateOutput(int index, bool has_sequence_len_dimension) {
Status ScanImpl::AllocateOutput(int index, bool is_loop_state_var) {
// use the shape from the subgraph output. we require this to be specified in the model or inferable.
auto& graph_outputs = subgraph_.GetOutputs();
auto* graph_output = graph_outputs.at(index);
auto* graph_output_shape = graph_output->Shape();
if (!graph_output_shape) {
return ONNXRUNTIME_MAKE_STATUS(ONNXRUNTIME, FAIL, "Subgraph must have the shape set for all outputs but ",
graph_output->Name(), " did not.");
@ -404,24 +602,16 @@ Status ScanImpl::AllocateOutput(int index, bool has_sequence_len_dimension) {
scan_output_dims.push_back(batch_size_);
if (has_sequence_len_dimension) {
if (!is_loop_state_var) {
scan_output_dims.push_back(max_sequence_len_);
}
scan_output_dims.insert(scan_output_dims.cend(), graph_output_dims.cbegin(), graph_output_dims.cend());
// make sure a single buffer for the full output is created upfront.
// we slice this into per-iteration pieces in Execute using MLValueTensorSlicer.
auto* tensor = context_.Output(index, TensorShape(scan_output_dims));
std::unique_ptr<OutputIterator> output_iter;
OutputIterator::Create(context_, index, is_loop_state_var, TensorShape(scan_output_dims), output_iter);
if (!tensor)
return ONNXRUNTIME_MAKE_STATUS(ONNXRUNTIME, FAIL, "Failed to create output tensor for ", graph_output->Name());
// zero out the output so that any short sequences have deterministic values in unused slots.
// strictly speaking this isn't required, and alternatively we could fill with zeros when we
// encounter a short sequence and are creating output, but one memset is easy, involves
// less code complexity, and should be relatively cheap.
memset(tensor->MutableDataRaw(), 0, tensor->Size());
output_iterators_.push_back(std::move(output_iter));
return Status::OK();
}
@ -435,17 +625,13 @@ Status ScanImpl::AllocateOutputTensors() {
" outputs but Scan expects ", num_variadic_outputs_);
}
// TODO: Need to handle shape/type inference for subgraphs.
// For now copy shape from subgraph output and expand based on batch size and sequence length
for (int i = 0; i < num_loop_state_variables_; ++i) {
const bool has_sequence_len_dimension = false; // loop state variables don't have a sequence_len dimension;
status = AllocateOutput(i, has_sequence_len_dimension);
status = AllocateOutput(i, true);
ONNXRUNTIME_RETURN_IF_ERROR(status);
}
for (int i = num_loop_state_variables_, end = num_variadic_outputs_; i < end; ++i) {
status = AllocateOutput(i, true);
status = AllocateOutput(i, false);
ONNXRUNTIME_RETURN_IF_ERROR(status);
}
@ -461,9 +647,7 @@ Status ScanImpl::CreateLoopStateVariables(std::vector<std::vector<LoopStateVaria
// each iteration of the subgraph. This minimizes copying of data during each iteration.
std::vector<MLValueTensorSlicer<const MLValue>::Iterator> loop_state_input_iterators;
std::vector<MLValueTensorSlicer<MLValue>::Iterator> loop_state_output_iterators;
loop_state_input_iterators.reserve(num_loop_state_variables_);
loop_state_output_iterators.reserve(num_loop_state_variables_);
// create the input and output slice iterator for each loop state variable.
for (int i = 0; i < num_loop_state_variables_; ++i) {
@ -473,7 +657,6 @@ Status ScanImpl::CreateLoopStateVariables(std::vector<std::vector<LoopStateVaria
ONNXRUNTIME_ENFORCE(p_mlvalue, "Output MLValue has not been created for loop state variable output ", i);
loop_state_input_iterators.push_back(MLValueTensorSlicer<const MLValue>::Create(mlvalue).begin());
loop_state_output_iterators.push_back(MLValueTensorSlicer<MLValue>::Create(*p_mlvalue).begin());
}
batch_loop_state_variables.clear();
@ -490,7 +673,7 @@ Status ScanImpl::CreateLoopStateVariables(std::vector<std::vector<LoopStateVaria
for (int i = 0; i < num_loop_state_variables_; ++i) {
auto& input_iter = loop_state_input_iterators[i];
auto& output_iter = loop_state_output_iterators[i];
auto& output_iter = *output_iterators_[i];
variables.push_back(LoopStateVariable(*input_iter, *output_iter, sequence_lens_[b], alloc));
@ -533,21 +716,9 @@ Status ScanImpl::Execute() {
}
}
// Setup output MLValue streams
std::vector<MLValueTensorSlicer<MLValue>::Iterator> scan_output_stream_iterators;
scan_output_stream_iterators.reserve(num_variadic_outputs_);
for (int i = num_loop_state_variables_, end = num_variadic_outputs_; i < end; ++i) {
MLValue* p_mlvalue = context_.GetOutputMLValue(i);
ONNXRUNTIME_ENFORCE(p_mlvalue, "Output MLValue has not been created for output ", i);
scan_output_stream_iterators.push_back(MLValueTensorSlicer<MLValue>::Create(*p_mlvalue, 1, b).begin());
}
// Call the subgraph for each item in the sequence
status = IterateSequence(batch_loop_state_variables[b],
scan_input_stream_iterators,
scan_output_stream_iterators,
sequence_lens_[b]);
ONNXRUNTIME_RETURN_IF_ERROR(status);
@ -558,7 +729,6 @@ Status ScanImpl::Execute() {
Status ScanImpl::IterateSequence(std::vector<LoopStateVariable>& loop_state_variables,
ConstTensorSlicerIterators& scan_input_stream_iterators,
MutableTensorSlicerIterators& scan_output_stream_iterators,
int64_t seq_length) {
Status status = Status::OK();
auto& graph_inputs = subgraph_.GetInputs();
@ -575,9 +745,8 @@ Status ScanImpl::IterateSequence(std::vector<LoopStateVariable>& loop_state_vari
feeds[entry.first] = *entry.second;
}
// as we fill all the outputs with 0 initially, just iterate seq_length not max_seq_length_
// as we don't need to pad the output for a short sequence here.
for (int64_t seq_no = 0; seq_no < seq_length; ++seq_no) {
int64_t seq_no = 0;
for (; seq_no < seq_length; ++seq_no) {
for (int input = 0; input < num_variadic_inputs_; ++input) {
// the ordering of the Scan inputs should match the ordering of the subgraph inputs
auto name = graph_inputs[input]->Name();
@ -596,15 +765,24 @@ Status ScanImpl::IterateSequence(std::vector<LoopStateVariable>& loop_state_vari
fetches.clear();
bool copy_fetch_to_iter = false;
for (int output = 0, end = num_variadic_outputs_; output < end; ++output) {
if (output < num_loop_state_variables_) {
// add loop state variable output
fetches.push_back(loop_state_variables[output].Output());
} else {
// add sliced output
auto& iterator = scan_output_stream_iterators[output - num_loop_state_variables_];
fetches.push_back(*iterator);
++iterator;
// add MLValue from sliced output
auto& iterator = *output_iterators_[output];
auto& mlvalue = *iterator;
fetches.push_back(mlvalue);
// If there is a dynamic shape in an output we need to copy it back to the OutputIterator
// so it can setup the overall output and avoid copies for all other output values.
// The mlvalue in the iterator will point to data once we have the overall output initialized.
// Check current value as we don't want to unset copy_fetch_to_iter if it is true.
if (!copy_fetch_to_iter)
copy_fetch_to_iter = (seq_no == 0) && (mlvalue.IsAllocated() == false);
}
}
@ -620,6 +798,27 @@ Status ScanImpl::IterateSequence(std::vector<LoopStateVariable>& loop_state_vari
// cycle the LoopStateVariable input/output in preparation for the next iteration
std::for_each(loop_state_variables.begin(), loop_state_variables.end(), [](LoopStateVariable& v) { v.Next(); });
// and move the output iterators.
for (int output = num_loop_state_variables_; output < num_variadic_outputs_; ++output) {
auto& iterator = *output_iterators_[output];
// copy the data from fetches to the iterator so it can setup the overall output
if (copy_fetch_to_iter && (*iterator).IsAllocated() == false) {
*iterator = fetches[output];
}
++iterator;
}
}
// zero out any remaining values in the sequence
for (; seq_length < max_sequence_len_; ++seq_length) {
for (int output = num_loop_state_variables_; output < num_variadic_outputs_; ++output) {
auto& iterator = *output_iterators_[output];
iterator.ZeroOutCurrent();
++iterator;
}
}
return status;

View file

@ -16,7 +16,7 @@ namespace test {
struct RunOptions {
bool include_dim_values_in_main_graph = false;
bool symbolic_dim_values_in_main_graph = false;
int symbolic_dim_value_in_main_graph = -1;
bool include_dim_values_in_subgraph = true;
};
@ -181,7 +181,7 @@ void RunTest(bool condition_value,
IfOpTester test{options};
test.AddShapeToTensorData(options.include_dim_values_in_main_graph,
options.symbolic_dim_values_in_main_graph);
options.symbolic_dim_value_in_main_graph);
// add the main graph inputs and outputs.
// we will handle the 'If' inputs in the AddNodes override, and as 'If' is the last node

View file

@ -261,8 +261,6 @@ void RunTest(const std::string test_name, int64_t batch_size, int64_t max_sequen
ScanOpTester test;
test.AddShapeToTensorData(options.include_dim_values_in_main_graph);
test.AddAttribute("body", proto);
test.AddAttribute<int64_t>("num_scan_inputs", 2);
@ -277,6 +275,8 @@ void RunTest(const std::string test_name, int64_t batch_size, int64_t max_sequen
test.AddInput<int64_t>("sequence_lens", sequence_lens_dims, *sequence_lens);
}
test.AddShapeToTensorData(options.include_dim_values_in_main_graph);
test.AddInput<float>("scan_loop_state_in_0", {batch_size, 1}, loop_state_in_0);
std::vector<int64_t> input_shape{batch_size, max_sequence_len, input_size};
@ -665,5 +665,58 @@ TEST(Scan, MixedTypeInputs) {
test.Run();
}
TEST(Scan, UnknownDimInSubgraphOutput) {
Model model("ScanBody");
auto& graph = model.MainGraph();
TypeProto float_tensor;
float_tensor.mutable_tensor_type()->set_elem_type(TensorProto_DataType_FLOAT);
float_tensor.mutable_tensor_type()->mutable_shape()->add_dim()->set_dim_param("param");
TypeProto int_tensor;
int_tensor.mutable_tensor_type()->set_elem_type(TensorProto_DataType_INT64);
int_tensor.mutable_tensor_type()->mutable_shape()->add_dim()->set_dim_param("param");
auto& state_in_1 = graph.GetOrCreateNodeArg("state_in_1", &float_tensor);
auto& scan_in_1 = graph.GetOrCreateNodeArg("scan_in_1", &float_tensor);
auto& state_out_1 = graph.GetOrCreateNodeArg("state_out_1", &float_tensor);
auto& scan_out_1 = graph.GetOrCreateNodeArg("scan_out_1", &float_tensor);
graph.AddNode("node1", "Identity", "Copy state_in_1 to scan_out_1", {&state_in_1}, {&scan_out_1});
graph.AddNode("node2", "Identity", "Copy scan_in_1 to state_out_1", {&scan_in_1}, {&state_out_1});
graph.SetInputOrder({&state_in_1, &scan_in_1});
graph.SetOutputOrder({&state_out_1, &scan_out_1});
auto status = graph.Resolve();
EXPECT_EQ(status, Status::OK());
auto& scan_body = graph.ToGraphProto();
// Construct and run scan test
ScanOpTester test;
int64_t batch_size = 1, sequence_len = 3, input_size = 1;
std::vector<int64_t> seq_shape{batch_size, sequence_len, input_size};
std::vector<int64_t> state_shape{batch_size, input_size};
test.AddAttribute("body", scan_body);
test.AddAttribute<int64_t>("num_scan_inputs", 1);
// we add a symbolic dimension to bot the initial state and the scan input so we test the path that handles loop
// state variables (prior to execution) and the path that handles subgraph outputs (post first execution).
// Note that we cross the values over in the subgraph, so the symbolic dimension in
// initial_state_1 affects scan_out_1, and the symbolic dimension in scan_input_1 affects state_out_1.
test.AddMissingOptionalInput<int64_t>();
test.AddShapeToTensorData(true, 1); // add shape and symbolic dim in dim 1 for initial_state_1
test.AddInput<float>("initial_state_1", state_shape, {0.0});
test.AddShapeToTensorData(true, 2); // add shape and symbolic dim in dim 2 for scan_input_1
test.AddInput<float>("scan_input_1", seq_shape, {1.0, 2.0, 3.0});
test.AddOutput<float>("final_state_1", state_shape, {3.0});
test.AddOutput<float>("scan_output_1", seq_shape, {0.0, 1.0, 2.0});
test.Run();
}
} // namespace test
} // namespace onnxruntime

View file

@ -407,7 +407,9 @@ void OpTester::Run(ExpectResult expect_result,
const auto& expected_shape = expected_data.data_.Get<Tensor>().Shape();
EXPECT_TRUE(inferred_dims.size() == expected_shape.NumDimensions());
for (int d = 0; d < inferred_dims.size(); ++d) {
EXPECT_EQ(expected_shape[d], inferred_dims[d]);
// check equal unless the input involved a symbolic dimension
if (inferred_dims[d] != -1)
EXPECT_EQ(expected_shape[d], inferred_dims[d]) << "Output idx = " << idx << " dim = " << d;
}
}
Check(expected_data, mlvalue.Get<Tensor>(), provider_type);

View file

@ -91,7 +91,11 @@ struct TTypeProto : ONNX_NAMESPACE::TypeProto {
if (shape) {
auto mutable_shape = mutable_tensor_type()->mutable_shape();
for (auto i : *shape) {
mutable_shape->add_dim()->set_dim_value(i);
auto* mutable_dim = mutable_shape->add_dim();
if (i != -1)
mutable_dim->set_dim_value(i);
else
mutable_dim->set_dim_param("symbolic");
}
}
}
@ -145,10 +149,11 @@ class OpTester {
// Set whether the NodeArg created by AddInput/AddOutput should include shape information
// for Tensor types. If not added, shape inferencing should resolve. If added, shape inferencing
// should validate. Default is to not add.
OpTester& AddShapeToTensorData(bool add_shape = true, bool add_symbolic_dim = false) {
// should validate. Default is to not add.
// Additionally a symbolic dimension will be added if symbolic_dim matches a dimension in the input.
OpTester& AddShapeToTensorData(bool add_shape = true, int symbolic_dim = -1) {
add_shape_to_tensor_data_ = add_shape;
add_symbolic_dim_to_tensor_data_ = add_symbolic_dim;
add_symbolic_dim_to_tensor_data_ = symbolic_dim;
return *this;
}
@ -268,7 +273,7 @@ class OpTester {
ONNXRUNTIME_ENFORCE(shape.Size() == values_count, values_count,
" input values doesn't match tensor size of ", shape.Size());
auto allocator = ::onnxruntime::test::AllocatorManager::Instance().GetAllocator(CPU);
auto allocator = test::AllocatorManager::Instance().GetAllocator(CPU);
auto size_in_bytes = values_count * sizeof(T);
void* buffer = allocator->Alloc(size_in_bytes);
auto p_tensor = std::make_unique<Tensor>(DataTypeImpl::GetType<T>(),
@ -283,8 +288,8 @@ class OpTester {
}
std::vector<int64_t> dims_for_proto{dims};
if (add_symbolic_dim_to_tensor_data_ && !dims.empty()) {
dims_for_proto[0] = -1;
if (add_symbolic_dim_to_tensor_data_ >= 0 && dims.size() > add_symbolic_dim_to_tensor_data_) {
dims_for_proto[add_symbolic_dim_to_tensor_data_] = -1;
}
TTypeProto<T> type_proto(add_shape_to_tensor_data_ ? &dims_for_proto : nullptr);
@ -302,7 +307,7 @@ class OpTester {
const char* domain_;
int opset_version_;
bool add_shape_to_tensor_data_ = true;
bool add_symbolic_dim_to_tensor_data_ = false;
int add_symbolic_dim_to_tensor_data_ = -1;
std::vector<Data> input_data_;
std::vector<Data> output_data_;
std::vector<size_t> initializer_index_;