mirror of
https://github.com/saymrwulf/onnxruntime.git
synced 2026-05-19 21:32:23 +00:00
[QNN EP] Support Softmax/LogSoftmax with any axis attribute (#17877)
### Description The QNN HTP backend only supports Softmax/LogSoftmax operators with an axis attribute set to `input_rank - 1` (i.e., the last dimension). This PR adds support for any axis by wrapping the QNN operator in transposes. ### Motivation and Context Support more models.
This commit is contained in:
parent
63dc5dc1a9
commit
565bead85f
6 changed files with 298 additions and 44 deletions
|
|
@ -47,12 +47,10 @@ OpBuilderRegistrations::OpBuilderRegistrations() {
|
|||
CreateSimpleOpBuilder("Where", *this);
|
||||
CreateSimpleOpBuilder("Sigmoid", *this);
|
||||
CreateSimpleOpBuilder("Sin", *this);
|
||||
CreateSimpleOpBuilder("Softmax", *this);
|
||||
CreateSimpleOpBuilder("Sqrt", *this);
|
||||
CreateSimpleOpBuilder("Sub", *this);
|
||||
CreateSimpleOpBuilder("Tanh", *this);
|
||||
|
||||
CreateSimpleOpBuilder("LogSoftmax", *this);
|
||||
CreateSimpleOpBuilder("MatMul", *this);
|
||||
CreateSimpleOpBuilder("Concat", *this);
|
||||
|
||||
|
|
@ -67,6 +65,11 @@ OpBuilderRegistrations::OpBuilderRegistrations() {
|
|||
CreateSimpleOpBuilder("GridSample", *this);
|
||||
}
|
||||
|
||||
{
|
||||
CreateSoftmaxOpBuilder("Softmax", *this);
|
||||
CreateSoftmaxOpBuilder("LogSoftmax", *this);
|
||||
}
|
||||
|
||||
{
|
||||
CreateCastOpBuilder("Cast", *this);
|
||||
}
|
||||
|
|
|
|||
|
|
@ -50,6 +50,8 @@ const IOpBuilder* GetOpBuilder(const std::string& onnx_op_type);
|
|||
|
||||
void CreateSimpleOpBuilder(const std::string& op_type, OpBuilderRegistrations& op_registrations);
|
||||
|
||||
void CreateSoftmaxOpBuilder(const std::string& op_type, OpBuilderRegistrations& op_registrations);
|
||||
|
||||
void CreateCastOpBuilder(const std::string& op_type, OpBuilderRegistrations& op_registrations);
|
||||
|
||||
void CreateConvOpBuilder(const std::string& op_type, OpBuilderRegistrations& op_registrations);
|
||||
|
|
|
|||
|
|
@ -29,7 +29,7 @@ class SimpleOpBuilder : public BaseOpBuilder {
|
|||
bool do_op_validation) const override ORT_MUST_USE_RESULT;
|
||||
|
||||
private:
|
||||
Status ExplicitOpCheck(const QnnModelWrapper& qnn_model_wrapper, const NodeUnit& node_unit) const;
|
||||
Status ExplicitOpCheck(const NodeUnit& node_unit) const;
|
||||
Status ProcessSigmoidOrTanhOutput(QnnModelWrapper& qnn_model_wrapper,
|
||||
const NodeUnit& node_unit,
|
||||
std::vector<std::string>&& input_names,
|
||||
|
|
@ -41,30 +41,9 @@ class SimpleOpBuilder : public BaseOpBuilder {
|
|||
static constexpr std::array<std::string_view, 3> gridsample_supported_padding_modes = {"zeros", "border", "reflection"};
|
||||
};
|
||||
|
||||
static int32_t GetDefaultAxisAttribute(const std::string& op_type, int opset_version) {
|
||||
if (op_type == "Softmax" || op_type == "LogSoftmax") {
|
||||
// Default axis changed from 1 to -1 in opset 13.
|
||||
return opset_version < 13 ? 1 : -1;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
Status SimpleOpBuilder::ExplicitOpCheck(const QnnModelWrapper& qnn_model_wrapper, const NodeUnit& node_unit) const {
|
||||
Status SimpleOpBuilder::ExplicitOpCheck(const NodeUnit& node_unit) const {
|
||||
const std::string& op_type = node_unit.OpType();
|
||||
|
||||
// QNN Softmax and LogSoftmax only support an axis value equal to input_rank - 1 (i.e., same as -1).
|
||||
if (op_type == "Softmax" || op_type == "LogSoftmax") {
|
||||
int32_t axis = GetDefaultAxisAttribute(op_type, node_unit.SinceVersion());
|
||||
Qnn_Scalar_t axis_qnn_scalar = QNN_SCALAR_INIT;
|
||||
ORT_RETURN_IF_ERROR(ProcessAxisAttribute(qnn_model_wrapper, node_unit, axis_qnn_scalar, axis));
|
||||
std::vector<uint32_t> input_shape;
|
||||
ORT_RETURN_IF_NOT(qnn_model_wrapper.GetOnnxShape(node_unit.Inputs()[0].node_arg, input_shape),
|
||||
"QNN EP: Cannot get shape for Softmax input");
|
||||
ORT_RETURN_IF(axis != static_cast<int32_t>(input_shape.size() - 1),
|
||||
"QNN ", op_type.c_str(), " only supports an `axis` attribute equal to input_rank-1 (or -1)");
|
||||
}
|
||||
|
||||
if (op_type == "GridSample") {
|
||||
NodeAttrHelper node_helper(node_unit);
|
||||
std::string mode = node_helper.Get("mode", "linear");
|
||||
|
|
@ -231,7 +210,7 @@ Status SimpleOpBuilder::ProcessAttributesAndOutputs(QnnModelWrapper& qnn_model_w
|
|||
const std::string& op_type = node_unit.OpType();
|
||||
|
||||
if (do_op_validation) {
|
||||
ORT_RETURN_IF_ERROR(ExplicitOpCheck(qnn_model_wrapper, node_unit));
|
||||
ORT_RETURN_IF_ERROR(ExplicitOpCheck(node_unit));
|
||||
// Skip the op validation for DepthToSpace & SpaceToDepth if it's not NHWC data layout
|
||||
if (node_unit.Domain() != kMSInternalNHWCDomain && (op_type == "DepthToSpace" || op_type == "SpaceToDepth" || op_type == "GridSample")) {
|
||||
return Status::OK();
|
||||
|
|
@ -251,8 +230,8 @@ Status SimpleOpBuilder::ProcessAttributesAndOutputs(QnnModelWrapper& qnn_model_w
|
|||
|
||||
std::vector<std::string> param_tensor_names;
|
||||
// Add attribute
|
||||
if (op_type == "LogSoftmax" || op_type == "Softmax" || op_type == "Concat") {
|
||||
int32_t default_axis = GetDefaultAxisAttribute(op_type, node_unit.SinceVersion());
|
||||
if (op_type == "Concat") {
|
||||
int32_t default_axis = 0;
|
||||
Qnn_Scalar_t axis_qnn_scalar = QNN_SCALAR_INIT;
|
||||
ORT_RETURN_IF_ERROR(ProcessAxisAttribute(qnn_model_wrapper, node_unit, axis_qnn_scalar, default_axis));
|
||||
QnnParamWrapper axis_param(node_unit.Index(), node_unit.Name(), QNN_OP_SOFTMAX_PARAM_AXIS, axis_qnn_scalar);
|
||||
|
|
|
|||
|
|
@ -0,0 +1,237 @@
|
|||
// Copyright (c) Microsoft Corporation. All rights reserved.
|
||||
// Licensed under the MIT License.
|
||||
|
||||
#include "core/providers/common.h"
|
||||
#include "core/providers/shared/utils/utils.h"
|
||||
#include "core/framework/tensorprotoutils.h"
|
||||
#include "core/providers/qnn/builder/qnn_model_wrapper.h"
|
||||
#include "core/providers/qnn/builder/op_builder_factory.h"
|
||||
#include "core/common/safeint.h"
|
||||
|
||||
#include "base_op_builder.h"
|
||||
|
||||
namespace onnxruntime {
|
||||
namespace qnn {
|
||||
|
||||
class SoftmaxOpBuilder : public BaseOpBuilder {
|
||||
public:
|
||||
SoftmaxOpBuilder() : BaseOpBuilder("SoftmaxOpBuilder") {}
|
||||
ORT_DISALLOW_COPY_ASSIGNMENT_AND_MOVE(SoftmaxOpBuilder);
|
||||
|
||||
Status IsOpSupported(QnnModelWrapper& qnn_model_wrapper,
|
||||
const NodeUnit& node_unit,
|
||||
const logging::Logger& logger) const override final ORT_MUST_USE_RESULT;
|
||||
|
||||
protected:
|
||||
Status ProcessInputs(QnnModelWrapper& qnn_model_wrapper,
|
||||
const NodeUnit& node_unit,
|
||||
const logging::Logger& logger,
|
||||
std::vector<std::string>& input_names,
|
||||
bool do_op_validation) const override ORT_MUST_USE_RESULT;
|
||||
|
||||
Status ProcessAttributesAndOutputs(QnnModelWrapper& qnn_model_wrapper,
|
||||
const NodeUnit& node_unit,
|
||||
std::vector<std::string>&& input_names,
|
||||
const logging::Logger& logger,
|
||||
bool do_op_validation) const override ORT_MUST_USE_RESULT;
|
||||
};
|
||||
|
||||
constexpr int32_t GetDefaultAxisAttribute(int opset_version) {
|
||||
// Default axis changed from 1 to -1 in opset 13.
|
||||
return opset_version < 13 ? 1 : -1;
|
||||
}
|
||||
|
||||
Status SoftmaxOpBuilder::IsOpSupported(QnnModelWrapper& qnn_model_wrapper,
|
||||
const NodeUnit& node_unit,
|
||||
const logging::Logger& logger) const {
|
||||
ORT_UNUSED_PARAMETER(logger);
|
||||
const int opset_version = node_unit.SinceVersion();
|
||||
|
||||
// The QNN HTP backend only supports an `axis` attribute that refers to the last input dimension.
|
||||
// QNN EP is able to support arbitrary axis attributes by wrapping the QNN operator with transposes.
|
||||
// However, the exception is Softmax/LogSoftmax with opset < 13. For these older ONNX operators, only
|
||||
// axis == input_rank - 1 is supported.
|
||||
if (opset_version < 13) {
|
||||
const std::string& op_type = node_unit.OpType();
|
||||
|
||||
int32_t axis = GetDefaultAxisAttribute(opset_version);
|
||||
Qnn_Scalar_t axis_qnn_scalar = QNN_SCALAR_INIT;
|
||||
ORT_RETURN_IF_ERROR(ProcessAxisAttribute(qnn_model_wrapper, node_unit, axis_qnn_scalar, axis));
|
||||
std::vector<uint32_t> input_shape;
|
||||
ORT_RETURN_IF_NOT(qnn_model_wrapper.GetOnnxShape(node_unit.Inputs()[0].node_arg, input_shape),
|
||||
"QNN EP: Cannot get shape for Softmax input");
|
||||
ORT_RETURN_IF(axis != static_cast<int32_t>(input_shape.size() - 1),
|
||||
"QNN ", op_type.c_str(),
|
||||
" only supports an `axis` attribute equal to input_rank-1 (or -1) for ONNX opset < 13");
|
||||
}
|
||||
|
||||
return AddToModelBuilder(qnn_model_wrapper, node_unit, logger, true);
|
||||
}
|
||||
|
||||
static std::vector<uint32_t> GetTransposePermToUseLastAxis(uint32_t input_rank, uint32_t axis) {
|
||||
assert(axis < input_rank);
|
||||
std::vector<uint32_t> transpose_perm;
|
||||
transpose_perm.reserve(input_rank);
|
||||
|
||||
for (uint32_t dim = 0; dim < input_rank; dim++) {
|
||||
transpose_perm.push_back(dim);
|
||||
}
|
||||
|
||||
// Swap axis dim with last dim.
|
||||
transpose_perm[axis] = input_rank - 1;
|
||||
transpose_perm[input_rank - 1] = axis;
|
||||
|
||||
return transpose_perm;
|
||||
}
|
||||
|
||||
Status SoftmaxOpBuilder::ProcessInputs(QnnModelWrapper& qnn_model_wrapper,
|
||||
const NodeUnit& node_unit,
|
||||
const logging::Logger& logger,
|
||||
std::vector<std::string>& input_names,
|
||||
bool do_op_validation) const {
|
||||
const bool is_npu_backend = IsNpuBackend(qnn_model_wrapper.GetQnnBackendType());
|
||||
const auto& inputs = node_unit.Inputs();
|
||||
assert(inputs.size() == 1);
|
||||
|
||||
int32_t axis = GetDefaultAxisAttribute(node_unit.SinceVersion());
|
||||
Qnn_Scalar_t axis_qnn_scalar = QNN_SCALAR_INIT;
|
||||
ORT_RETURN_IF_ERROR(ProcessAxisAttribute(qnn_model_wrapper, node_unit, axis_qnn_scalar, axis));
|
||||
|
||||
OnnxInputInfo input_info = {};
|
||||
ORT_RETURN_IF_ERROR(qnn_model_wrapper.GetOnnxInputInfo(inputs[0], input_info));
|
||||
const size_t input_rank = input_info.shape.size();
|
||||
|
||||
// If the axis attribute refers to the last dimension, then process the input as normal.
|
||||
if (!is_npu_backend || axis == static_cast<int32_t>(input_rank) - 1) {
|
||||
return ProcessInput(qnn_model_wrapper, inputs[0], logger, input_names);
|
||||
}
|
||||
|
||||
//
|
||||
// The axis does **not** refer to the last input dimension. Must wrap transposes around the operator to be able to use
|
||||
// QNN's Softmax operator, which always uses an axis value that refers to the last dimension.
|
||||
//
|
||||
|
||||
std::vector<uint32_t> transpose_perm = GetTransposePermToUseLastAxis(static_cast<uint32_t>(input_rank),
|
||||
static_cast<uint32_t>(axis));
|
||||
|
||||
const std::string& input_name = inputs[0].node_arg.Name();
|
||||
std::string op_input_name = input_info.is_initializer ? input_name : input_name + "_ort_qnn_ep_transpose";
|
||||
input_names.push_back(op_input_name);
|
||||
|
||||
std::vector<uint32_t> op_input_shape = input_info.shape;
|
||||
op_input_shape[input_rank - 1] = input_info.shape[axis];
|
||||
op_input_shape[axis] = input_info.shape[input_rank - 1];
|
||||
|
||||
ORT_RETURN_IF(input_info.is_initializer, "QNN EP does not support (Log)Softmax with an initializer input, ",
|
||||
"which should be optimized away by the ORT optimizer");
|
||||
|
||||
// Input is dynamic, so add transpose node before input.
|
||||
const bool is_graph_input = qnn_model_wrapper.IsGraphInput(input_name);
|
||||
|
||||
ORT_RETURN_IF_ERROR(qnn_model_wrapper.AddTransposeNode(node_unit.Index(),
|
||||
input_name,
|
||||
op_input_name,
|
||||
input_info.shape,
|
||||
transpose_perm,
|
||||
op_input_shape,
|
||||
input_info.qnn_data_type,
|
||||
input_info.quant_param,
|
||||
do_op_validation,
|
||||
is_graph_input));
|
||||
|
||||
Qnn_TensorType_t tensor_type = GetInputTensorType(qnn_model_wrapper, op_input_name);
|
||||
QnnTensorWrapper input_tensorwrapper(op_input_name, tensor_type, input_info.qnn_data_type, input_info.quant_param,
|
||||
std::move(op_input_shape), {});
|
||||
ORT_RETURN_IF_NOT(qnn_model_wrapper.AddTensorWrapper(std::move(input_tensorwrapper)), "Failed to add tensor.");
|
||||
|
||||
return Status::OK();
|
||||
}
|
||||
|
||||
Status SoftmaxOpBuilder::ProcessAttributesAndOutputs(QnnModelWrapper& qnn_model_wrapper,
|
||||
const NodeUnit& node_unit,
|
||||
std::vector<std::string>&& input_names,
|
||||
const logging::Logger& logger,
|
||||
bool do_op_validation) const {
|
||||
const bool is_npu_backend = IsNpuBackend(qnn_model_wrapper.GetQnnBackendType());
|
||||
const std::string& op_type = node_unit.OpType();
|
||||
const auto& outputs = node_unit.Outputs();
|
||||
assert(outputs.size() == 1);
|
||||
|
||||
int32_t axis = GetDefaultAxisAttribute(node_unit.SinceVersion());
|
||||
Qnn_Scalar_t axis_qnn_scalar = QNN_SCALAR_INIT;
|
||||
ORT_RETURN_IF_ERROR(ProcessAxisAttribute(qnn_model_wrapper, node_unit, axis_qnn_scalar, axis));
|
||||
|
||||
OnnxInputInfo output_info = {};
|
||||
ORT_RETURN_IF_ERROR(qnn_model_wrapper.GetOnnxInputInfo(outputs[0], output_info));
|
||||
const size_t output_rank = output_info.shape.size();
|
||||
const bool axis_is_last_dim = static_cast<size_t>(axis) == output_rank - 1;
|
||||
|
||||
// If axis refers to the last dimension, process outputs as usual.
|
||||
if (!is_npu_backend || axis_is_last_dim) {
|
||||
QnnParamWrapper axis_param(node_unit.Index(), node_unit.Name(), QNN_OP_SOFTMAX_PARAM_AXIS, axis_qnn_scalar);
|
||||
|
||||
std::vector<std::string> param_tensor_names;
|
||||
param_tensor_names.push_back(axis_param.GetParamTensorName());
|
||||
qnn_model_wrapper.AddParamWrapper(std::move(axis_param));
|
||||
|
||||
return ProcessOutputs(qnn_model_wrapper, node_unit,
|
||||
std::move(input_names),
|
||||
std::move(param_tensor_names),
|
||||
logger, do_op_validation, GetQnnOpType(op_type));
|
||||
}
|
||||
|
||||
//
|
||||
// The axis **does** not refer to the last dimension. Must wrap the operator with Transposes to be able to use
|
||||
// QNN's Softmax operator, which only supports an axis that refers to the last dimension.
|
||||
//
|
||||
|
||||
axis_qnn_scalar.uint32Value = static_cast<uint32_t>(output_rank - 1); // NOTE: override axis.
|
||||
QnnParamWrapper axis_param(node_unit.Index(), node_unit.Name(), QNN_OP_SOFTMAX_PARAM_AXIS, axis_qnn_scalar);
|
||||
|
||||
std::vector<std::string> param_tensor_names;
|
||||
param_tensor_names.push_back(axis_param.GetParamTensorName());
|
||||
qnn_model_wrapper.AddParamWrapper(std::move(axis_param));
|
||||
|
||||
const std::string& orig_output_name = outputs[0].node_arg.Name();
|
||||
std::string op_output_name = orig_output_name + "_ort_qnn_ep_transpose";
|
||||
|
||||
std::vector<uint32_t> op_output_shape = output_info.shape;
|
||||
op_output_shape[output_rank - 1] = output_info.shape[axis];
|
||||
op_output_shape[axis] = output_info.shape[output_rank - 1];
|
||||
|
||||
QnnTensorWrapper output_tensorwrapper(op_output_name, QNN_TENSOR_TYPE_NATIVE, output_info.qnn_data_type, output_info.quant_param,
|
||||
std::vector<uint32_t>(op_output_shape));
|
||||
ORT_RETURN_IF_NOT(qnn_model_wrapper.AddTensorWrapper(std::move(output_tensorwrapper)), "Failed to add tensor.");
|
||||
ORT_RETURN_IF_NOT(qnn_model_wrapper.CreateQnnNode(GetNodeName(node_unit),
|
||||
QNN_OP_PACKAGE_NAME_QTI_AISW,
|
||||
GetQnnOpType(node_unit.OpType()),
|
||||
std::move(input_names),
|
||||
{op_output_name},
|
||||
std::move(param_tensor_names)),
|
||||
"Failed to add node.");
|
||||
|
||||
const bool is_graph_output = qnn_model_wrapper.IsGraphOutput(orig_output_name);
|
||||
std::vector<uint32_t> transpose_perm = GetTransposePermToUseLastAxis(static_cast<uint32_t>(output_rank),
|
||||
static_cast<uint32_t>(axis));
|
||||
|
||||
ORT_RETURN_IF_ERROR(qnn_model_wrapper.AddTransposeNode(node_unit.Index(),
|
||||
op_output_name,
|
||||
orig_output_name,
|
||||
op_output_shape,
|
||||
transpose_perm,
|
||||
output_info.shape,
|
||||
output_info.qnn_data_type,
|
||||
output_info.quant_param,
|
||||
do_op_validation,
|
||||
false,
|
||||
is_graph_output));
|
||||
|
||||
return Status::OK();
|
||||
}
|
||||
|
||||
void CreateSoftmaxOpBuilder(const std::string& op_type, OpBuilderRegistrations& op_registrations) {
|
||||
op_registrations.AddOpBuilder(op_type, std::make_unique<SoftmaxOpBuilder>());
|
||||
}
|
||||
|
||||
} // namespace qnn
|
||||
} // namespace onnxruntime
|
||||
|
|
@ -238,22 +238,25 @@ QNNExecutionProvider::GetSupportedNodes(const GraphViewer& graph_viewer,
|
|||
initializer_input_lookup,
|
||||
qnn_backend_manager_->GetQnnBackendType());
|
||||
|
||||
for (const auto& node : graph_viewer.Nodes()) {
|
||||
const NodeUnit* node_unit = node_unit_map.at(&node);
|
||||
const auto& node_indices = graph_viewer.GetNodesInTopologicalOrder();
|
||||
for (size_t i = 0; i < node_indices.size(); i++) {
|
||||
gsl::not_null<const onnxruntime::Node*> node(graph_viewer.GetNode(node_indices[i]));
|
||||
|
||||
const NodeUnit* node_unit = node_unit_map.at(node);
|
||||
const bool supported = IsNodeSupported(qnn_model_wrapper,
|
||||
*node_unit,
|
||||
node_unit_supported_result,
|
||||
logger);
|
||||
LOGS(logger, VERBOSE) << "Node supported: [" << supported
|
||||
<< "] index: [" << node.Index()
|
||||
<< "] name: [" << node.Name()
|
||||
<< "] Operator type: [" << node.OpType()
|
||||
<< "] index: [" << node->Index()
|
||||
<< "] name: [" << node->Name()
|
||||
<< "] Operator type: [" << node->OpType()
|
||||
<< "] as part of the NodeUnit type: [" << node_unit->OpType()
|
||||
<< "] index: [" << node_unit->Index()
|
||||
<< "] name: [" << node_unit->Name()
|
||||
<< "]";
|
||||
if (supported) {
|
||||
supported_nodes.insert(&node);
|
||||
supported_nodes.insert(node);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -447,8 +447,9 @@ TEST_F(QnnHTPBackendTests, UnaryOp_Log_U16) {
|
|||
// Check that QNN compiles DQ -> Softmax -> Q as a single unit.
|
||||
// Test that the default axis (-1) for SoftMax opset 13 works.
|
||||
TEST_F(QnnHTPBackendTests, UnaryOp_Softmax13_DefaultAxis) {
|
||||
const std::vector<float> input_data = GetFloatDataInRange(-5.0f, 5.0f, 6);
|
||||
RunQDQOpTest<uint8_t>("Softmax",
|
||||
{TestInputDef<float>({1, 2, 3}, false, -5.0f, 5.0f)},
|
||||
{TestInputDef<float>({1, 2, 3}, false, input_data)},
|
||||
{}, // Uses default axis of -1 for opset 13
|
||||
13,
|
||||
ExpectedEPNodeAssignment::All);
|
||||
|
|
@ -466,14 +467,43 @@ TEST_F(QnnHTPBackendTests, UnaryOp_Softmax13_U16_DefaultAxis) {
|
|||
true); // Use com.microsoft domain for Q/DQ ops
|
||||
}
|
||||
|
||||
// Check that QNN compiles DQ -> Softmax -> Q as a single unit.
|
||||
// Test that an axis != -1 is not supported.
|
||||
TEST_F(QnnHTPBackendTests, UnaryOp_Softmax13_UnsupportedAxis) {
|
||||
// Test that 8-bit QDQ Softmax (opset 13) with axis != -1 is supported by QNN EP.
|
||||
// QNN EP will wrap the operator with transposes.
|
||||
TEST_F(QnnHTPBackendTests, UnaryOp_Softmax13_NonLastAxis) {
|
||||
const std::vector<float> input_data = {0.0f, 1.0f, 2.0f, 10.0f, 11.0f, 12.0f, 100.0f, 110.0f, 120.0f,
|
||||
1.0856307f, 0.99734545f, 0.2829785f, 1.5062947f, 0.5786002f, 1.6514366f,
|
||||
2.4266791f, 0.42891264f, 1.2659363f};
|
||||
RunQDQOpTest<uint8_t>("Softmax",
|
||||
{TestInputDef<float>({1, 2, 3}, false, -5.0f, 5.0f)},
|
||||
{TestInputDef<float>({1, 2, 3, 3}, false, input_data)},
|
||||
{utils::MakeAttribute("axis", static_cast<int64_t>(1))},
|
||||
13,
|
||||
ExpectedEPNodeAssignment::None);
|
||||
ExpectedEPNodeAssignment::All);
|
||||
}
|
||||
|
||||
// Test that 8-bit QDQ Softmax (opset 13) with axis != -1 is supported by QNN EP.
|
||||
// QNN EP will wrap the operator with transposes.
|
||||
// This is a configuration used in one of our partner's models.
|
||||
TEST_F(QnnHTPBackendTests, UnaryOp_Softmax13_NonLastAxis_LargeInput) {
|
||||
const std::vector<float> input_data = GetFloatDataInRange(-50.0f, 50.0f, 124);
|
||||
RunQDQOpTest<uint8_t>("Softmax",
|
||||
{TestInputDef<float>({1, 124, 1}, false, input_data)},
|
||||
{utils::MakeAttribute("axis", static_cast<int64_t>(1))},
|
||||
13,
|
||||
ExpectedEPNodeAssignment::All);
|
||||
}
|
||||
|
||||
// Test that 16-bit QDQ Softmax (opset 13) with axis != -1 is supported by QNN EP.
|
||||
// QNN EP will wrap the operator with transposes.
|
||||
// This is a configuration used in one of our partner's models.
|
||||
TEST_F(QnnHTPBackendTests, UnaryOp_Softmax13_U16_NonLastAxis_LargeInput) {
|
||||
const std::vector<float> input_data = GetFloatDataInRange(-50.0f, 50.0f, 124);
|
||||
RunQDQOpTest<uint16_t>("Softmax",
|
||||
{TestInputDef<float>({1, 124, 1}, false, input_data)},
|
||||
{utils::MakeAttribute("axis", static_cast<int64_t>(1))},
|
||||
13,
|
||||
ExpectedEPNodeAssignment::All,
|
||||
kOnnxDomain,
|
||||
true);
|
||||
}
|
||||
|
||||
// Check that QNN compiles DQ -> Softmax -> Q as a single unit.
|
||||
|
|
@ -507,15 +537,15 @@ TEST_F(QnnHTPBackendTests, UnaryOp_LogSoftmax13_DefaultAxis) {
|
|||
ExpectedEPNodeAssignment::All);
|
||||
}
|
||||
|
||||
// Check that QNN compiles DQ -> LogSoftmax -> Q as a single unit.
|
||||
// Test that an axis != -1 is not supported.
|
||||
TEST_F(QnnHTPBackendTests, UnaryOp_LogSoftmax13_UnsupportedAxis) {
|
||||
// Test that 8-bit QDQ LogSoftmax (opset 13) with axis != -1 is supported by QNN EP.
|
||||
// QNN EP will wrap the operator with transposes.
|
||||
TEST_F(QnnHTPBackendTests, UnaryOp_LogSoftmax13_NonLastAxis) {
|
||||
std::vector<float> input_data = GetFloatDataInRange(-5.0f, 5.0f, 6);
|
||||
RunQDQOpTest<uint8_t>("LogSoftmax",
|
||||
{TestInputDef<float>({1, 2, 3}, false, input_data)},
|
||||
{utils::MakeAttribute("axis", static_cast<int64_t>(1))},
|
||||
13,
|
||||
ExpectedEPNodeAssignment::None);
|
||||
ExpectedEPNodeAssignment::All);
|
||||
}
|
||||
|
||||
// Check that QNN compiles DQ -> LogSoftmax -> Q as a single unit.
|
||||
|
|
|
|||
Loading…
Reference in a new issue