mirror of
https://github.com/saymrwulf/onnxruntime.git
synced 2026-07-03 03:58:54 +00:00
Remove skipping of Reshape from NNAPI EP (#19618)
### Description <!-- Describe your changes. --> A number of Qualcomm Snapdragon chipsets do not produce correct output if we skip the Reshape, which ironically was a performance optimization for Snapdragon chips. Perf testing showed that Squeeze also seems to execute on CPU so there's no benefit to using that as an alternative where possible e.g. Global*Pool -> Reshape to 2D -> Gemm could be potentially be replaced with Global*Pool -> Squeeze dims 2 and 3 -> Gemm if that offered better performance. ### Motivation and Context <!-- - Why is this change required? What problem does it solve? - If it fixes an open issue, please link to the issue here. --> #19518
This commit is contained in:
parent
18c8fab1ae
commit
8a71b65765
2 changed files with 23 additions and 10 deletions
|
|
@ -965,6 +965,18 @@ Status AddMinMaxOperator(ModelBuilder& model_builder, const NodeUnit& node_unit,
|
|||
return Status::OK();
|
||||
}
|
||||
|
||||
// NOTE: Skipping Reshape results in invalid output on some SnapDragon chipsets. Whilst the NNAPI spec says the input
|
||||
// to FullyConnnected can be > 2D, those chipsets don't handle this correctly.
|
||||
//
|
||||
// CanSkipReshape could potentially be re-enabled in the future if we no longer want to support those old chipsets.
|
||||
// However, the Reshape of newer chipsets may not run on CPU so there may not be a performance issue to try and avoid,
|
||||
// so CanSkipReshape could be redundant anyway.
|
||||
//
|
||||
// Known bad chipsets: Qualcomm Snapdragon 850, 855, 865, 870.
|
||||
//
|
||||
// See https://github.com/microsoft/onnxruntime/issues/19518
|
||||
|
||||
/*
|
||||
// We can skip the Reshape if all the output edges satisfies both the following conditions
|
||||
// 1. The output of the reshape/flatten is not an output of the graph
|
||||
// 2. The output of the reshape/flatten is the input 0 of one or more GEMM/Matmul operators,
|
||||
|
|
@ -977,7 +989,7 @@ Status AddMinMaxOperator(ModelBuilder& model_builder, const NodeUnit& node_unit,
|
|||
// between NNAPI CPU impl and Hardware Accelerator impl and will speed up the execution
|
||||
// If we are going to skip the reshape, we will still add correct shape and operand type for the output in
|
||||
// onnxruntime::nnapi::Model.
|
||||
bool CanSkipReshape(const ModelBuilder& model_builder, const NodeUnit& node_unit,
|
||||
static bool CanSkipReshape(const ModelBuilder& model_builder, const NodeUnit& node_unit,
|
||||
size_t input_rank, size_t output_rank) {
|
||||
// Since we know this is a Reshape NodeUnit, so we can safely assume there is only 1 output
|
||||
// and the node_unit has only one output node.
|
||||
|
|
@ -1039,33 +1051,37 @@ bool CanSkipReshape(const ModelBuilder& model_builder, const NodeUnit& node_unit
|
|||
<< node_unit.Name() << "] with output, " << output_name;
|
||||
return true;
|
||||
}
|
||||
*/
|
||||
|
||||
Status AddReshapeOperator(ModelBuilder& model_builder,
|
||||
const NodeUnit& node_unit,
|
||||
const std::string& input,
|
||||
const std::vector<int32_t>& shape) {
|
||||
auto& shaper(model_builder.GetShaper());
|
||||
const auto& operand_indices(model_builder.GetOperandIndices());
|
||||
const auto& operand_types(model_builder.GetOperandTypes());
|
||||
const auto& output = node_unit.Outputs()[0].node_arg.Name();
|
||||
|
||||
const auto input_shape = shaper[input];
|
||||
const auto output_shape = shaper[output];
|
||||
const auto input_rank = input_shape.size();
|
||||
const auto output_rank = output_shape.size();
|
||||
|
||||
// For reshape, the output type should be the same as the input type except the shape is different
|
||||
auto output_operand_type = operand_types.at(input);
|
||||
output_operand_type.SetDimensions(output_shape);
|
||||
|
||||
/* See CanSkipReshape definition above for explanation of why this is disabled.
|
||||
// Since Reshape is not running using hardware in NNAPI for some CPU (e.g. Qualcomm SD for now)
|
||||
// We will try to see if we the skip the Reshape to prevent context switching between
|
||||
// NNAPI CPU impl and NNAPI hardware accelerator impl
|
||||
if (CanSkipReshape(model_builder, node_unit, input_rank, output_rank)) {
|
||||
// Since reshape can be skipped, only register the dimension and type, with same index and new name
|
||||
const auto& operand_indices(model_builder.GetOperandIndices());
|
||||
const auto input_rank = input_shape.size();
|
||||
const auto output_rank = output_shape.size();
|
||||
// Since reshape can be skipped, only register the dimension and type, with same index and new name.
|
||||
// This essentially redirects the downstream operator builders to the input of the skipped Reshape node,
|
||||
// but with the output shape of the Reshape node.
|
||||
model_builder.RegisterOperand(output, operand_indices.at(input), output_operand_type);
|
||||
} else {
|
||||
// We still need to perform a reshape here
|
||||
} else */
|
||||
{
|
||||
std::string shape_name = model_builder.GetUniqueName(node_unit.Name() + input + "newshape");
|
||||
ORT_RETURN_IF_ERROR(op_builder_helpers::AddNnapiReshape(model_builder, input, shape_name, shape, output));
|
||||
}
|
||||
|
|
|
|||
|
|
@ -181,9 +181,6 @@ Status AddMinMaxOperator(ModelBuilder& model_builder, const NodeUnit& node_unit,
|
|||
Status AddReshapeOperator(ModelBuilder& model_builder, const NodeUnit& node_unit,
|
||||
const std::string& input, const std::vector<int32_t>& shape);
|
||||
|
||||
bool CanSkipReshape(const ModelBuilder& model_builder, const NodeUnit& node_unit,
|
||||
size_t input_rank, size_t output_rank);
|
||||
|
||||
Status GetAxesForSqueezeAndUnSqueeze(ModelBuilder& model_builder, const NodeUnit& node_unit,
|
||||
std::vector<int32_t>& axes);
|
||||
|
||||
|
|
|
|||
Loading…
Reference in a new issue