mirror of
https://github.com/saymrwulf/onnxruntime.git
synced 2026-07-03 03:58:54 +00:00
Handle the Scan subgraph producing outputs with a symbolic dimension.
If the output has a symbolic dimension
* Infer the shape if it is a loop state variable as we have the input value, and the shape from the subgraph output must match
* Use a temporary MLValue for the first subgraph execution if it is a subgraph output with a symbolic dimension.
* After the first execution make the overall output shape concrete and allocate the full output buffer.
* Use slices of the full output buffer for all other subgraph executions to avoid copies.
Add unit test to validate.
This commit is contained in:
parent
c7513e676f
commit
5d3992f999
5 changed files with 325 additions and 66 deletions
|
|
@ -118,6 +118,59 @@ class LoopStateVariable {
|
|||
MLValue b_;
|
||||
};
|
||||
|
||||
/*
|
||||
Class that co-ordinates writes to slices of the overall Scan output.
|
||||
It will directly update the data returned by OpKernelContextInternal.Output(i).
|
||||
*/
|
||||
class OutputIterator {
|
||||
public:
|
||||
static Status Create(OpKernelContextInternal& context,
|
||||
int output_index,
|
||||
bool is_loop_state_var,
|
||||
TensorShape final_shape,
|
||||
std::unique_ptr<OutputIterator>& iterator) {
|
||||
iterator.reset(new OutputIterator(context, output_index, is_loop_state_var, final_shape));
|
||||
return iterator->Initialize();
|
||||
}
|
||||
|
||||
MLValue& operator*();
|
||||
OutputIterator& operator++();
|
||||
|
||||
void ZeroOutCurrent() {
|
||||
auto* tensor = (**this).GetMutable<Tensor>();
|
||||
memset(tensor->MutableDataRaw(), 0, tensor->Size());
|
||||
}
|
||||
|
||||
private:
|
||||
OutputIterator(OpKernelContextInternal& context,
|
||||
int output_index,
|
||||
bool is_loop_state_var,
|
||||
TensorShape final_shape);
|
||||
|
||||
Status Initialize();
|
||||
Status AllocateFinalBuffer();
|
||||
Status MakeConcrete();
|
||||
|
||||
OpKernelContextInternal& context_;
|
||||
const int output_index_;
|
||||
std::vector<int64_t> dims_;
|
||||
TensorShapeProto per_iteration_shape_;
|
||||
TensorShape final_shape_;
|
||||
bool is_loop_state_var_;
|
||||
int64_t num_iterations_;
|
||||
int64_t cur_iteration_;
|
||||
|
||||
bool is_concrete_shape_;
|
||||
std::vector<MLValueTensorSlicer<MLValue>::Iterator> slicer_iterators_;
|
||||
std::vector<MLValueTensorSlicer<MLValue>::Iterator>::iterator cur_slicer_iterator_;
|
||||
|
||||
// if shape is not concrete we need the first output to know the missing dimension before
|
||||
// we can allocate final_output_mlvalue_ and use the slicers.
|
||||
MLValue first_output_;
|
||||
|
||||
MLValue* final_output_mlvalue_;
|
||||
};
|
||||
|
||||
class ScanImpl {
|
||||
public:
|
||||
ScanImpl(OpKernelContextInternal& context,
|
||||
|
|
@ -135,10 +188,10 @@ class ScanImpl {
|
|||
private:
|
||||
// validate inputs and setup batch size and max sequence length.
|
||||
Status ValidateInput();
|
||||
Status ValidateSubgraphInput(int start_input, int end_input, bool has_seq_len_dim,
|
||||
Status ValidateSubgraphInput(int start_input, int end_input, bool is_loop_state_var,
|
||||
const std::vector<const NodeArg*>& graph_inputs);
|
||||
|
||||
Status AllocateOutput(int index, bool has_sequence_len);
|
||||
Status AllocateOutput(int index, bool is_loop_state_var);
|
||||
Status AllocateOutputTensors();
|
||||
Status CreateLoopStateVariables(std::vector<std::vector<LoopStateVariable>>& loop_state_variables);
|
||||
|
||||
|
|
@ -147,7 +200,6 @@ class ScanImpl {
|
|||
|
||||
Status IterateSequence(std::vector<LoopStateVariable>& loop_state_variables,
|
||||
ConstTensorSlicerIterators& scan_input_stream_iterators,
|
||||
MutableTensorSlicerIterators& scan_output_stream_iterators,
|
||||
int64_t seq_length);
|
||||
|
||||
OpKernelContextInternal& context_;
|
||||
|
|
@ -166,6 +218,7 @@ class ScanImpl {
|
|||
std::vector<int64_t> sequence_lens_;
|
||||
|
||||
std::vector<std::string> subgraph_output_names_;
|
||||
std::vector<std::unique_ptr<OutputIterator>> output_iterators_;
|
||||
|
||||
std::unordered_map<std::string, const MLValue*> implicit_inputs_;
|
||||
};
|
||||
|
|
@ -249,6 +302,149 @@ void LoopStateVariable::Next() {
|
|||
++iteration_num_;
|
||||
}
|
||||
|
||||
static Status MakeShapeConcrete(const TensorShape& per_iteration_shape, TensorShape& final_shape) {
|
||||
auto num_dims_per_iteration = per_iteration_shape.NumDimensions();
|
||||
auto final_shape_offset = final_shape.NumDimensions() - num_dims_per_iteration;
|
||||
for (size_t i = 0; i < num_dims_per_iteration; ++i) {
|
||||
auto existing_value = final_shape[i + final_shape_offset];
|
||||
if (existing_value == -1) {
|
||||
final_shape[i + final_shape_offset] = per_iteration_shape[i];
|
||||
} else {
|
||||
if (existing_value != per_iteration_shape[i]) {
|
||||
return ONNXRUNTIME_MAKE_STATUS(ONNXRUNTIME, FAIL,
|
||||
"Mismatch between expected shape and shape from first output",
|
||||
final_shape, " is not compatible with ", per_iteration_shape);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return Status::OK();
|
||||
}
|
||||
|
||||
OutputIterator::OutputIterator(OpKernelContextInternal& context,
|
||||
int output_index,
|
||||
bool is_loop_state_var,
|
||||
TensorShape final_shape)
|
||||
: context_{context},
|
||||
output_index_{output_index},
|
||||
is_loop_state_var_{is_loop_state_var},
|
||||
final_shape_{final_shape},
|
||||
cur_iteration_{0} {
|
||||
is_concrete_shape_ = final_shape_.Size() >= 0;
|
||||
|
||||
// there are one or two dimensions being iterated depending on whether it's a loop state variable or scan input.
|
||||
auto num_iteration_dims = is_loop_state_var_ ? 1 : 2;
|
||||
num_iterations_ = final_shape_.Slice(0, num_iteration_dims).Size();
|
||||
}
|
||||
|
||||
Status OutputIterator::Initialize() {
|
||||
Status status = Status::OK();
|
||||
|
||||
if (is_loop_state_var_ && !is_concrete_shape_) {
|
||||
// copy the shape from the input initial value which will have a concrete shape.
|
||||
auto* input = context_.Input<Tensor>(output_index_ + 1); // +1 to skip the sequence_len input
|
||||
status = MakeShapeConcrete(input->Shape(), final_shape_);
|
||||
ONNXRUNTIME_RETURN_IF_ERROR(status);
|
||||
|
||||
is_concrete_shape_ = true;
|
||||
}
|
||||
|
||||
if (is_concrete_shape_) {
|
||||
status = AllocateFinalBuffer();
|
||||
ONNXRUNTIME_RETURN_IF_ERROR(status);
|
||||
} else {
|
||||
// use first_output_
|
||||
}
|
||||
|
||||
return Status::OK();
|
||||
}
|
||||
|
||||
Status OutputIterator::AllocateFinalBuffer() {
|
||||
// make sure a single buffer for the full output is created upfront.
|
||||
// we slice this into per-iteration pieces in Execute using MLValueTensorSlicer.
|
||||
auto* tensor = context_.Output(output_index_, final_shape_);
|
||||
|
||||
if (!tensor)
|
||||
return ONNXRUNTIME_MAKE_STATUS(ONNXRUNTIME, FAIL, "Failed to create output tensor for output #", output_index_);
|
||||
|
||||
// get the output tensor we just created as an MLValue
|
||||
final_output_mlvalue_ = context_.GetOutputMLValue(output_index_);
|
||||
|
||||
if (is_loop_state_var_) {
|
||||
// only one entry is required as we slice on a single dimension
|
||||
slicer_iterators_.push_back(MLValueTensorSlicer<MLValue>::Create(*final_output_mlvalue_).begin());
|
||||
} else {
|
||||
auto batch_size = final_shape_[0];
|
||||
for (int i = 0; i < batch_size; ++i) {
|
||||
// the slicer handles the sequence dimension (dim 1) so create an entry for each batch
|
||||
slicer_iterators_.push_back(MLValueTensorSlicer<MLValue>::Create(*final_output_mlvalue_, 1, i).begin());
|
||||
}
|
||||
}
|
||||
|
||||
cur_slicer_iterator_ = slicer_iterators_.begin();
|
||||
|
||||
return Status::OK();
|
||||
}
|
||||
|
||||
Status OutputIterator::MakeConcrete() {
|
||||
ONNXRUNTIME_ENFORCE(first_output_.IsAllocated(), "First usage of OutputIterator did not result in any output.");
|
||||
Status status = Status::OK();
|
||||
|
||||
auto& tensor = first_output_.Get<Tensor>();
|
||||
auto& tensor_shape = tensor.Shape();
|
||||
|
||||
// update the final shape
|
||||
status = MakeShapeConcrete(tensor_shape, final_shape_);
|
||||
ONNXRUNTIME_RETURN_IF_ERROR(status);
|
||||
|
||||
is_concrete_shape_ = true;
|
||||
status = AllocateFinalBuffer();
|
||||
ONNXRUNTIME_RETURN_IF_ERROR(status);
|
||||
|
||||
// copy first output to final buffer
|
||||
auto input_span = gsl::make_span<const gsl::byte>(static_cast<const gsl::byte*>(tensor.DataRaw()), tensor.Size());
|
||||
|
||||
auto output = (**this).GetMutable<Tensor>();
|
||||
auto output_span = gsl::make_span<gsl::byte>(static_cast<gsl::byte*>(output->MutableDataRaw()), output->Size());
|
||||
|
||||
gsl::copy(input_span, output_span);
|
||||
|
||||
// release the MLValue we used for the first output
|
||||
first_output_ = {};
|
||||
|
||||
return status;
|
||||
}
|
||||
|
||||
MLValue& OutputIterator::operator*() {
|
||||
ONNXRUNTIME_ENFORCE(cur_iteration_ < num_iterations_);
|
||||
|
||||
if (is_concrete_shape_)
|
||||
return **cur_slicer_iterator_;
|
||||
else
|
||||
return first_output_;
|
||||
}
|
||||
|
||||
OutputIterator& OutputIterator::operator++() {
|
||||
if (cur_iteration_ < num_iterations_) {
|
||||
if (!is_concrete_shape_) {
|
||||
// we should have an output now, so convert to using the overall output buffer and slicers
|
||||
auto status = MakeConcrete();
|
||||
ONNXRUNTIME_ENFORCE(status.IsOK(), status.ErrorMessage());
|
||||
}
|
||||
|
||||
++cur_iteration_;
|
||||
|
||||
// if not a loop state var, see if we just finished the current sequence (dim 1)
|
||||
if (!is_loop_state_var_ && cur_iteration_ % final_shape_[1] == 0) {
|
||||
++cur_slicer_iterator_;
|
||||
} else {
|
||||
++(*cur_slicer_iterator_);
|
||||
}
|
||||
}
|
||||
|
||||
return *this;
|
||||
}
|
||||
|
||||
ScanImpl::ScanImpl(OpKernelContextInternal& context,
|
||||
const SessionState& session_state,
|
||||
int64_t num_scan_inputs,
|
||||
|
|
@ -258,7 +454,7 @@ ScanImpl::ScanImpl(OpKernelContextInternal& context,
|
|||
subgraph_{*session_state.GetGraphViewer()},
|
||||
directions_{directions},
|
||||
implicit_inputs_{context_.GetImplicitInputs()} {
|
||||
//optional first input so may be nullptr
|
||||
// optional first input so may be nullptr
|
||||
sequence_lens_tensor_ = context.Input<Tensor>(0);
|
||||
|
||||
num_variadic_inputs_ = context_.NumVariadicInputs(1);
|
||||
|
|
@ -271,12 +467,12 @@ Status ScanImpl::Initialize() {
|
|||
auto status = ValidateInput();
|
||||
ONNXRUNTIME_RETURN_IF_ERROR(status);
|
||||
|
||||
auto& graph_outputs = subgraph_.GetOutputs();
|
||||
subgraph_output_names_.reserve(graph_outputs.size());
|
||||
auto& subgraph_outputs = subgraph_.GetOutputs();
|
||||
subgraph_output_names_.reserve(subgraph_outputs.size());
|
||||
|
||||
// save list of subgraph output names in their provided order to use when fetching the results
|
||||
// from each subgraph execution. the Scan outputs will match this order.
|
||||
for (auto& output : graph_outputs) {
|
||||
for (auto& output : subgraph_outputs) {
|
||||
subgraph_output_names_.push_back(output->Name());
|
||||
}
|
||||
|
||||
|
|
@ -301,9 +497,10 @@ static const MLValue& GetSubgraphInputMLValue(const OpKernelContextInternal& con
|
|||
}
|
||||
|
||||
// Validate that the subgraph input has valid shapes
|
||||
Status ScanImpl::ValidateSubgraphInput(int start_input, int end_input, bool has_seq_len_dim,
|
||||
Status ScanImpl::ValidateSubgraphInput(int start_input, int end_input, bool is_loop_state_var,
|
||||
const std::vector<const NodeArg*>& graph_inputs) {
|
||||
// first dim is batch size. optional sequence dim. dim/s for the data
|
||||
bool has_seq_len_dim = !is_loop_state_var;
|
||||
auto min_dims_required = has_seq_len_dim ? 3 : 2;
|
||||
|
||||
for (int i = start_input; i < end_input; ++i) {
|
||||
|
|
@ -355,11 +552,11 @@ Status ScanImpl::ValidateInput() {
|
|||
}
|
||||
|
||||
// process any loop state variables, which will set the batch size
|
||||
auto status = ValidateSubgraphInput(0, num_loop_state_variables_, false, graph_inputs);
|
||||
auto status = ValidateSubgraphInput(0, num_loop_state_variables_, true, graph_inputs);
|
||||
ONNXRUNTIME_RETURN_IF_ERROR(status);
|
||||
|
||||
// process the scan inputs. sets/validates batch size and sequence length
|
||||
status = ValidateSubgraphInput(num_loop_state_variables_, num_variadic_inputs_, true, graph_inputs);
|
||||
status = ValidateSubgraphInput(num_loop_state_variables_, num_variadic_inputs_, false, graph_inputs);
|
||||
ONNXRUNTIME_RETURN_IF_ERROR(status);
|
||||
|
||||
if (sequence_lens_tensor_ != nullptr) {
|
||||
|
|
@ -386,11 +583,12 @@ Status ScanImpl::ValidateInput() {
|
|||
return Status::OK();
|
||||
}
|
||||
|
||||
Status ScanImpl::AllocateOutput(int index, bool has_sequence_len_dimension) {
|
||||
Status ScanImpl::AllocateOutput(int index, bool is_loop_state_var) {
|
||||
// use the shape from the subgraph output. we require this to be specified in the model or inferable.
|
||||
auto& graph_outputs = subgraph_.GetOutputs();
|
||||
auto* graph_output = graph_outputs.at(index);
|
||||
auto* graph_output_shape = graph_output->Shape();
|
||||
|
||||
if (!graph_output_shape) {
|
||||
return ONNXRUNTIME_MAKE_STATUS(ONNXRUNTIME, FAIL, "Subgraph must have the shape set for all outputs but ",
|
||||
graph_output->Name(), " did not.");
|
||||
|
|
@ -404,24 +602,16 @@ Status ScanImpl::AllocateOutput(int index, bool has_sequence_len_dimension) {
|
|||
|
||||
scan_output_dims.push_back(batch_size_);
|
||||
|
||||
if (has_sequence_len_dimension) {
|
||||
if (!is_loop_state_var) {
|
||||
scan_output_dims.push_back(max_sequence_len_);
|
||||
}
|
||||
|
||||
scan_output_dims.insert(scan_output_dims.cend(), graph_output_dims.cbegin(), graph_output_dims.cend());
|
||||
|
||||
// make sure a single buffer for the full output is created upfront.
|
||||
// we slice this into per-iteration pieces in Execute using MLValueTensorSlicer.
|
||||
auto* tensor = context_.Output(index, TensorShape(scan_output_dims));
|
||||
std::unique_ptr<OutputIterator> output_iter;
|
||||
OutputIterator::Create(context_, index, is_loop_state_var, TensorShape(scan_output_dims), output_iter);
|
||||
|
||||
if (!tensor)
|
||||
return ONNXRUNTIME_MAKE_STATUS(ONNXRUNTIME, FAIL, "Failed to create output tensor for ", graph_output->Name());
|
||||
|
||||
// zero out the output so that any short sequences have deterministic values in unused slots.
|
||||
// strictly speaking this isn't required, and alternatively we could fill with zeros when we
|
||||
// encounter a short sequence and are creating output, but one memset is easy, involves
|
||||
// less code complexity, and should be relatively cheap.
|
||||
memset(tensor->MutableDataRaw(), 0, tensor->Size());
|
||||
output_iterators_.push_back(std::move(output_iter));
|
||||
|
||||
return Status::OK();
|
||||
}
|
||||
|
|
@ -435,17 +625,13 @@ Status ScanImpl::AllocateOutputTensors() {
|
|||
" outputs but Scan expects ", num_variadic_outputs_);
|
||||
}
|
||||
|
||||
// TODO: Need to handle shape/type inference for subgraphs.
|
||||
// For now copy shape from subgraph output and expand based on batch size and sequence length
|
||||
|
||||
for (int i = 0; i < num_loop_state_variables_; ++i) {
|
||||
const bool has_sequence_len_dimension = false; // loop state variables don't have a sequence_len dimension;
|
||||
status = AllocateOutput(i, has_sequence_len_dimension);
|
||||
status = AllocateOutput(i, true);
|
||||
ONNXRUNTIME_RETURN_IF_ERROR(status);
|
||||
}
|
||||
|
||||
for (int i = num_loop_state_variables_, end = num_variadic_outputs_; i < end; ++i) {
|
||||
status = AllocateOutput(i, true);
|
||||
status = AllocateOutput(i, false);
|
||||
ONNXRUNTIME_RETURN_IF_ERROR(status);
|
||||
}
|
||||
|
||||
|
|
@ -461,9 +647,7 @@ Status ScanImpl::CreateLoopStateVariables(std::vector<std::vector<LoopStateVaria
|
|||
// each iteration of the subgraph. This minimizes copying of data during each iteration.
|
||||
|
||||
std::vector<MLValueTensorSlicer<const MLValue>::Iterator> loop_state_input_iterators;
|
||||
std::vector<MLValueTensorSlicer<MLValue>::Iterator> loop_state_output_iterators;
|
||||
loop_state_input_iterators.reserve(num_loop_state_variables_);
|
||||
loop_state_output_iterators.reserve(num_loop_state_variables_);
|
||||
|
||||
// create the input and output slice iterator for each loop state variable.
|
||||
for (int i = 0; i < num_loop_state_variables_; ++i) {
|
||||
|
|
@ -473,7 +657,6 @@ Status ScanImpl::CreateLoopStateVariables(std::vector<std::vector<LoopStateVaria
|
|||
ONNXRUNTIME_ENFORCE(p_mlvalue, "Output MLValue has not been created for loop state variable output ", i);
|
||||
|
||||
loop_state_input_iterators.push_back(MLValueTensorSlicer<const MLValue>::Create(mlvalue).begin());
|
||||
loop_state_output_iterators.push_back(MLValueTensorSlicer<MLValue>::Create(*p_mlvalue).begin());
|
||||
}
|
||||
|
||||
batch_loop_state_variables.clear();
|
||||
|
|
@ -490,7 +673,7 @@ Status ScanImpl::CreateLoopStateVariables(std::vector<std::vector<LoopStateVaria
|
|||
|
||||
for (int i = 0; i < num_loop_state_variables_; ++i) {
|
||||
auto& input_iter = loop_state_input_iterators[i];
|
||||
auto& output_iter = loop_state_output_iterators[i];
|
||||
auto& output_iter = *output_iterators_[i];
|
||||
|
||||
variables.push_back(LoopStateVariable(*input_iter, *output_iter, sequence_lens_[b], alloc));
|
||||
|
||||
|
|
@ -533,21 +716,9 @@ Status ScanImpl::Execute() {
|
|||
}
|
||||
}
|
||||
|
||||
// Setup output MLValue streams
|
||||
std::vector<MLValueTensorSlicer<MLValue>::Iterator> scan_output_stream_iterators;
|
||||
scan_output_stream_iterators.reserve(num_variadic_outputs_);
|
||||
|
||||
for (int i = num_loop_state_variables_, end = num_variadic_outputs_; i < end; ++i) {
|
||||
MLValue* p_mlvalue = context_.GetOutputMLValue(i);
|
||||
ONNXRUNTIME_ENFORCE(p_mlvalue, "Output MLValue has not been created for output ", i);
|
||||
|
||||
scan_output_stream_iterators.push_back(MLValueTensorSlicer<MLValue>::Create(*p_mlvalue, 1, b).begin());
|
||||
}
|
||||
|
||||
// Call the subgraph for each item in the sequence
|
||||
status = IterateSequence(batch_loop_state_variables[b],
|
||||
scan_input_stream_iterators,
|
||||
scan_output_stream_iterators,
|
||||
sequence_lens_[b]);
|
||||
|
||||
ONNXRUNTIME_RETURN_IF_ERROR(status);
|
||||
|
|
@ -558,7 +729,6 @@ Status ScanImpl::Execute() {
|
|||
|
||||
Status ScanImpl::IterateSequence(std::vector<LoopStateVariable>& loop_state_variables,
|
||||
ConstTensorSlicerIterators& scan_input_stream_iterators,
|
||||
MutableTensorSlicerIterators& scan_output_stream_iterators,
|
||||
int64_t seq_length) {
|
||||
Status status = Status::OK();
|
||||
auto& graph_inputs = subgraph_.GetInputs();
|
||||
|
|
@ -575,9 +745,8 @@ Status ScanImpl::IterateSequence(std::vector<LoopStateVariable>& loop_state_vari
|
|||
feeds[entry.first] = *entry.second;
|
||||
}
|
||||
|
||||
// as we fill all the outputs with 0 initially, just iterate seq_length not max_seq_length_
|
||||
// as we don't need to pad the output for a short sequence here.
|
||||
for (int64_t seq_no = 0; seq_no < seq_length; ++seq_no) {
|
||||
int64_t seq_no = 0;
|
||||
for (; seq_no < seq_length; ++seq_no) {
|
||||
for (int input = 0; input < num_variadic_inputs_; ++input) {
|
||||
// the ordering of the Scan inputs should match the ordering of the subgraph inputs
|
||||
auto name = graph_inputs[input]->Name();
|
||||
|
|
@ -596,15 +765,24 @@ Status ScanImpl::IterateSequence(std::vector<LoopStateVariable>& loop_state_vari
|
|||
|
||||
fetches.clear();
|
||||
|
||||
bool copy_fetch_to_iter = false;
|
||||
|
||||
for (int output = 0, end = num_variadic_outputs_; output < end; ++output) {
|
||||
if (output < num_loop_state_variables_) {
|
||||
// add loop state variable output
|
||||
fetches.push_back(loop_state_variables[output].Output());
|
||||
} else {
|
||||
// add sliced output
|
||||
auto& iterator = scan_output_stream_iterators[output - num_loop_state_variables_];
|
||||
fetches.push_back(*iterator);
|
||||
++iterator;
|
||||
// add MLValue from sliced output
|
||||
auto& iterator = *output_iterators_[output];
|
||||
auto& mlvalue = *iterator;
|
||||
fetches.push_back(mlvalue);
|
||||
|
||||
// If there is a dynamic shape in an output we need to copy it back to the OutputIterator
|
||||
// so it can setup the overall output and avoid copies for all other output values.
|
||||
// The mlvalue in the iterator will point to data once we have the overall output initialized.
|
||||
// Check current value as we don't want to unset copy_fetch_to_iter if it is true.
|
||||
if (!copy_fetch_to_iter)
|
||||
copy_fetch_to_iter = (seq_no == 0) && (mlvalue.IsAllocated() == false);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -620,6 +798,27 @@ Status ScanImpl::IterateSequence(std::vector<LoopStateVariable>& loop_state_vari
|
|||
|
||||
// cycle the LoopStateVariable input/output in preparation for the next iteration
|
||||
std::for_each(loop_state_variables.begin(), loop_state_variables.end(), [](LoopStateVariable& v) { v.Next(); });
|
||||
|
||||
// and move the output iterators.
|
||||
for (int output = num_loop_state_variables_; output < num_variadic_outputs_; ++output) {
|
||||
auto& iterator = *output_iterators_[output];
|
||||
|
||||
// copy the data from fetches to the iterator so it can setup the overall output
|
||||
if (copy_fetch_to_iter && (*iterator).IsAllocated() == false) {
|
||||
*iterator = fetches[output];
|
||||
}
|
||||
|
||||
++iterator;
|
||||
}
|
||||
}
|
||||
|
||||
// zero out any remaining values in the sequence
|
||||
for (; seq_length < max_sequence_len_; ++seq_length) {
|
||||
for (int output = num_loop_state_variables_; output < num_variadic_outputs_; ++output) {
|
||||
auto& iterator = *output_iterators_[output];
|
||||
iterator.ZeroOutCurrent();
|
||||
++iterator;
|
||||
}
|
||||
}
|
||||
|
||||
return status;
|
||||
|
|
|
|||
|
|
@ -16,7 +16,7 @@ namespace test {
|
|||
|
||||
struct RunOptions {
|
||||
bool include_dim_values_in_main_graph = false;
|
||||
bool symbolic_dim_values_in_main_graph = false;
|
||||
int symbolic_dim_value_in_main_graph = -1;
|
||||
bool include_dim_values_in_subgraph = true;
|
||||
};
|
||||
|
||||
|
|
@ -181,7 +181,7 @@ void RunTest(bool condition_value,
|
|||
IfOpTester test{options};
|
||||
|
||||
test.AddShapeToTensorData(options.include_dim_values_in_main_graph,
|
||||
options.symbolic_dim_values_in_main_graph);
|
||||
options.symbolic_dim_value_in_main_graph);
|
||||
|
||||
// add the main graph inputs and outputs.
|
||||
// we will handle the 'If' inputs in the AddNodes override, and as 'If' is the last node
|
||||
|
|
|
|||
|
|
@ -261,8 +261,6 @@ void RunTest(const std::string test_name, int64_t batch_size, int64_t max_sequen
|
|||
|
||||
ScanOpTester test;
|
||||
|
||||
test.AddShapeToTensorData(options.include_dim_values_in_main_graph);
|
||||
|
||||
test.AddAttribute("body", proto);
|
||||
test.AddAttribute<int64_t>("num_scan_inputs", 2);
|
||||
|
||||
|
|
@ -277,6 +275,8 @@ void RunTest(const std::string test_name, int64_t batch_size, int64_t max_sequen
|
|||
test.AddInput<int64_t>("sequence_lens", sequence_lens_dims, *sequence_lens);
|
||||
}
|
||||
|
||||
test.AddShapeToTensorData(options.include_dim_values_in_main_graph);
|
||||
|
||||
test.AddInput<float>("scan_loop_state_in_0", {batch_size, 1}, loop_state_in_0);
|
||||
|
||||
std::vector<int64_t> input_shape{batch_size, max_sequence_len, input_size};
|
||||
|
|
@ -665,5 +665,58 @@ TEST(Scan, MixedTypeInputs) {
|
|||
test.Run();
|
||||
}
|
||||
|
||||
TEST(Scan, UnknownDimInSubgraphOutput) {
|
||||
Model model("ScanBody");
|
||||
auto& graph = model.MainGraph();
|
||||
|
||||
TypeProto float_tensor;
|
||||
float_tensor.mutable_tensor_type()->set_elem_type(TensorProto_DataType_FLOAT);
|
||||
float_tensor.mutable_tensor_type()->mutable_shape()->add_dim()->set_dim_param("param");
|
||||
TypeProto int_tensor;
|
||||
int_tensor.mutable_tensor_type()->set_elem_type(TensorProto_DataType_INT64);
|
||||
int_tensor.mutable_tensor_type()->mutable_shape()->add_dim()->set_dim_param("param");
|
||||
|
||||
auto& state_in_1 = graph.GetOrCreateNodeArg("state_in_1", &float_tensor);
|
||||
auto& scan_in_1 = graph.GetOrCreateNodeArg("scan_in_1", &float_tensor);
|
||||
|
||||
auto& state_out_1 = graph.GetOrCreateNodeArg("state_out_1", &float_tensor);
|
||||
auto& scan_out_1 = graph.GetOrCreateNodeArg("scan_out_1", &float_tensor);
|
||||
|
||||
graph.AddNode("node1", "Identity", "Copy state_in_1 to scan_out_1", {&state_in_1}, {&scan_out_1});
|
||||
graph.AddNode("node2", "Identity", "Copy scan_in_1 to state_out_1", {&scan_in_1}, {&state_out_1});
|
||||
|
||||
graph.SetInputOrder({&state_in_1, &scan_in_1});
|
||||
graph.SetOutputOrder({&state_out_1, &scan_out_1});
|
||||
|
||||
auto status = graph.Resolve();
|
||||
EXPECT_EQ(status, Status::OK());
|
||||
|
||||
auto& scan_body = graph.ToGraphProto();
|
||||
|
||||
// Construct and run scan test
|
||||
ScanOpTester test;
|
||||
|
||||
int64_t batch_size = 1, sequence_len = 3, input_size = 1;
|
||||
std::vector<int64_t> seq_shape{batch_size, sequence_len, input_size};
|
||||
std::vector<int64_t> state_shape{batch_size, input_size};
|
||||
|
||||
test.AddAttribute("body", scan_body);
|
||||
test.AddAttribute<int64_t>("num_scan_inputs", 1);
|
||||
|
||||
// we add a symbolic dimension to bot the initial state and the scan input so we test the path that handles loop
|
||||
// state variables (prior to execution) and the path that handles subgraph outputs (post first execution).
|
||||
// Note that we cross the values over in the subgraph, so the symbolic dimension in
|
||||
// initial_state_1 affects scan_out_1, and the symbolic dimension in scan_input_1 affects state_out_1.
|
||||
test.AddMissingOptionalInput<int64_t>();
|
||||
test.AddShapeToTensorData(true, 1); // add shape and symbolic dim in dim 1 for initial_state_1
|
||||
test.AddInput<float>("initial_state_1", state_shape, {0.0});
|
||||
test.AddShapeToTensorData(true, 2); // add shape and symbolic dim in dim 2 for scan_input_1
|
||||
test.AddInput<float>("scan_input_1", seq_shape, {1.0, 2.0, 3.0});
|
||||
|
||||
test.AddOutput<float>("final_state_1", state_shape, {3.0});
|
||||
test.AddOutput<float>("scan_output_1", seq_shape, {0.0, 1.0, 2.0});
|
||||
|
||||
test.Run();
|
||||
}
|
||||
} // namespace test
|
||||
} // namespace onnxruntime
|
||||
|
|
|
|||
|
|
@ -407,7 +407,9 @@ void OpTester::Run(ExpectResult expect_result,
|
|||
const auto& expected_shape = expected_data.data_.Get<Tensor>().Shape();
|
||||
EXPECT_TRUE(inferred_dims.size() == expected_shape.NumDimensions());
|
||||
for (int d = 0; d < inferred_dims.size(); ++d) {
|
||||
EXPECT_EQ(expected_shape[d], inferred_dims[d]);
|
||||
// check equal unless the input involved a symbolic dimension
|
||||
if (inferred_dims[d] != -1)
|
||||
EXPECT_EQ(expected_shape[d], inferred_dims[d]) << "Output idx = " << idx << " dim = " << d;
|
||||
}
|
||||
}
|
||||
Check(expected_data, mlvalue.Get<Tensor>(), provider_type);
|
||||
|
|
|
|||
|
|
@ -91,7 +91,11 @@ struct TTypeProto : ONNX_NAMESPACE::TypeProto {
|
|||
if (shape) {
|
||||
auto mutable_shape = mutable_tensor_type()->mutable_shape();
|
||||
for (auto i : *shape) {
|
||||
mutable_shape->add_dim()->set_dim_value(i);
|
||||
auto* mutable_dim = mutable_shape->add_dim();
|
||||
if (i != -1)
|
||||
mutable_dim->set_dim_value(i);
|
||||
else
|
||||
mutable_dim->set_dim_param("symbolic");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -145,10 +149,11 @@ class OpTester {
|
|||
|
||||
// Set whether the NodeArg created by AddInput/AddOutput should include shape information
|
||||
// for Tensor types. If not added, shape inferencing should resolve. If added, shape inferencing
|
||||
// should validate. Default is to not add.
|
||||
OpTester& AddShapeToTensorData(bool add_shape = true, bool add_symbolic_dim = false) {
|
||||
// should validate. Default is to not add.
|
||||
// Additionally a symbolic dimension will be added if symbolic_dim matches a dimension in the input.
|
||||
OpTester& AddShapeToTensorData(bool add_shape = true, int symbolic_dim = -1) {
|
||||
add_shape_to_tensor_data_ = add_shape;
|
||||
add_symbolic_dim_to_tensor_data_ = add_symbolic_dim;
|
||||
add_symbolic_dim_to_tensor_data_ = symbolic_dim;
|
||||
return *this;
|
||||
}
|
||||
|
||||
|
|
@ -268,7 +273,7 @@ class OpTester {
|
|||
ONNXRUNTIME_ENFORCE(shape.Size() == values_count, values_count,
|
||||
" input values doesn't match tensor size of ", shape.Size());
|
||||
|
||||
auto allocator = ::onnxruntime::test::AllocatorManager::Instance().GetAllocator(CPU);
|
||||
auto allocator = test::AllocatorManager::Instance().GetAllocator(CPU);
|
||||
auto size_in_bytes = values_count * sizeof(T);
|
||||
void* buffer = allocator->Alloc(size_in_bytes);
|
||||
auto p_tensor = std::make_unique<Tensor>(DataTypeImpl::GetType<T>(),
|
||||
|
|
@ -283,8 +288,8 @@ class OpTester {
|
|||
}
|
||||
|
||||
std::vector<int64_t> dims_for_proto{dims};
|
||||
if (add_symbolic_dim_to_tensor_data_ && !dims.empty()) {
|
||||
dims_for_proto[0] = -1;
|
||||
if (add_symbolic_dim_to_tensor_data_ >= 0 && dims.size() > add_symbolic_dim_to_tensor_data_) {
|
||||
dims_for_proto[add_symbolic_dim_to_tensor_data_] = -1;
|
||||
}
|
||||
|
||||
TTypeProto<T> type_proto(add_shape_to_tensor_data_ ? &dims_for_proto : nullptr);
|
||||
|
|
@ -302,7 +307,7 @@ class OpTester {
|
|||
const char* domain_;
|
||||
int opset_version_;
|
||||
bool add_shape_to_tensor_data_ = true;
|
||||
bool add_symbolic_dim_to_tensor_data_ = false;
|
||||
int add_symbolic_dim_to_tensor_data_ = -1;
|
||||
std::vector<Data> input_data_;
|
||||
std::vector<Data> output_data_;
|
||||
std::vector<size_t> initializer_index_;
|
||||
|
|
|
|||
Loading…
Reference in a new issue