[QNN EP] Support Softmax/LogSoftmax with any axis attribute (#17877)

### Description The QNN HTP backend only supports Softmax/LogSoftmax operators with an axis attribute set to `input_rank - 1` (i.e., the last dimension). This PR adds support for any axis by wrapping the QNN operator in transposes. ### Motivation and Context Support more models.
2026-05-19 21:32:23 +00:00 · 2023-10-11 17:43:42 -07:00 · 2023-10-11 17:43:42 -07:00 · 565bead85f
commit 565bead85f
parent 63dc5dc1a9
6 changed files with 298 additions and 44 deletions
--- a/onnxruntime/core/providers/qnn/builder/op_builder_factory.cc
+++ b/onnxruntime/core/providers/qnn/builder/op_builder_factory.cc
@ -47,12 +47,10 @@ OpBuilderRegistrations::OpBuilderRegistrations() {
    CreateSimpleOpBuilder("Where", *this);
    CreateSimpleOpBuilder("Sigmoid", *this);
    CreateSimpleOpBuilder("Sin", *this);
-    CreateSimpleOpBuilder("Softmax", *this);
    CreateSimpleOpBuilder("Sqrt", *this);
    CreateSimpleOpBuilder("Sub", *this);
    CreateSimpleOpBuilder("Tanh", *this);

-    CreateSimpleOpBuilder("LogSoftmax", *this);
    CreateSimpleOpBuilder("MatMul", *this);
    CreateSimpleOpBuilder("Concat", *this);

@ -67,6 +65,11 @@ OpBuilderRegistrations::OpBuilderRegistrations() {
    CreateSimpleOpBuilder("GridSample", *this);
  }

+  {
+    CreateSoftmaxOpBuilder("Softmax", *this);
+    CreateSoftmaxOpBuilder("LogSoftmax", *this);
+  }
+
  {
    CreateCastOpBuilder("Cast", *this);
  }
--- a/onnxruntime/core/providers/qnn/builder/op_builder_factory.h
+++ b/onnxruntime/core/providers/qnn/builder/op_builder_factory.h
@ -50,6 +50,8 @@ const IOpBuilder* GetOpBuilder(const std::string& onnx_op_type);

 void CreateSimpleOpBuilder(const std::string& op_type, OpBuilderRegistrations& op_registrations);

+void CreateSoftmaxOpBuilder(const std::string& op_type, OpBuilderRegistrations& op_registrations);
+
 void CreateCastOpBuilder(const std::string& op_type, OpBuilderRegistrations& op_registrations);

 void CreateConvOpBuilder(const std::string& op_type, OpBuilderRegistrations& op_registrations);
--- a/onnxruntime/core/providers/qnn/builder/opbuilder/simple_op_builder.cc
+++ b/onnxruntime/core/providers/qnn/builder/opbuilder/simple_op_builder.cc
@ -29,7 +29,7 @@ class SimpleOpBuilder : public BaseOpBuilder {
                                     bool do_op_validation) const override ORT_MUST_USE_RESULT;

 private:
-  Status ExplicitOpCheck(const QnnModelWrapper& qnn_model_wrapper, const NodeUnit& node_unit) const;
+  Status ExplicitOpCheck(const NodeUnit& node_unit) const;
  Status ProcessSigmoidOrTanhOutput(QnnModelWrapper& qnn_model_wrapper,
                                    const NodeUnit& node_unit,
                                    std::vector<std::string>&& input_names,
@ -41,30 +41,9 @@ class SimpleOpBuilder : public BaseOpBuilder {
  static constexpr std::array<std::string_view, 3> gridsample_supported_padding_modes = {"zeros", "border", "reflection"};
 };

-static int32_t GetDefaultAxisAttribute(const std::string& op_type, int opset_version) {
-  if (op_type == "Softmax" || op_type == "LogSoftmax") {
-    // Default axis changed from 1 to -1 in opset 13.
-    return opset_version < 13 ? 1 : -1;
-  }
-
-  return 0;
-}
-
-Status SimpleOpBuilder::ExplicitOpCheck(const QnnModelWrapper& qnn_model_wrapper, const NodeUnit& node_unit) const {
+Status SimpleOpBuilder::ExplicitOpCheck(const NodeUnit& node_unit) const {
  const std::string& op_type = node_unit.OpType();

-  // QNN Softmax and LogSoftmax only support an axis value equal to input_rank - 1 (i.e., same as -1).
-  if (op_type == "Softmax" || op_type == "LogSoftmax") {
-    int32_t axis = GetDefaultAxisAttribute(op_type, node_unit.SinceVersion());
-    Qnn_Scalar_t axis_qnn_scalar = QNN_SCALAR_INIT;
-    ORT_RETURN_IF_ERROR(ProcessAxisAttribute(qnn_model_wrapper, node_unit, axis_qnn_scalar, axis));
-    std::vector<uint32_t> input_shape;
-    ORT_RETURN_IF_NOT(qnn_model_wrapper.GetOnnxShape(node_unit.Inputs()[0].node_arg, input_shape),
-                      "QNN EP: Cannot get shape for Softmax input");
-    ORT_RETURN_IF(axis != static_cast<int32_t>(input_shape.size() - 1),
-                  "QNN ", op_type.c_str(), " only supports an `axis` attribute equal to input_rank-1 (or -1)");
-  }
-
  if (op_type == "GridSample") {
    NodeAttrHelper node_helper(node_unit);
    std::string mode = node_helper.Get("mode", "linear");
@ -231,7 +210,7 @@ Status SimpleOpBuilder::ProcessAttributesAndOutputs(QnnModelWrapper& qnn_model_w
  const std::string& op_type = node_unit.OpType();

  if (do_op_validation) {
-    ORT_RETURN_IF_ERROR(ExplicitOpCheck(qnn_model_wrapper, node_unit));
+    ORT_RETURN_IF_ERROR(ExplicitOpCheck(node_unit));
    // Skip the op validation for DepthToSpace & SpaceToDepth if it's not NHWC data layout
    if (node_unit.Domain() != kMSInternalNHWCDomain && (op_type == "DepthToSpace" || op_type == "SpaceToDepth" || op_type == "GridSample")) {
      return Status::OK();
@ -251,8 +230,8 @@ Status SimpleOpBuilder::ProcessAttributesAndOutputs(QnnModelWrapper& qnn_model_w

  std::vector<std::string> param_tensor_names;
  // Add attribute
-  if (op_type == "LogSoftmax" || op_type == "Softmax" || op_type == "Concat") {
-    int32_t default_axis = GetDefaultAxisAttribute(op_type, node_unit.SinceVersion());
+  if (op_type == "Concat") {
+    int32_t default_axis = 0;
    Qnn_Scalar_t axis_qnn_scalar = QNN_SCALAR_INIT;
    ORT_RETURN_IF_ERROR(ProcessAxisAttribute(qnn_model_wrapper, node_unit, axis_qnn_scalar, default_axis));
    QnnParamWrapper axis_param(node_unit.Index(), node_unit.Name(), QNN_OP_SOFTMAX_PARAM_AXIS, axis_qnn_scalar);
--- a/onnxruntime/core/providers/qnn/builder/opbuilder/softmax_op_builder.cc
+++ b/onnxruntime/core/providers/qnn/builder/opbuilder/softmax_op_builder.cc
@ -0,0 +1,237 @@
+// Copyright (c) Microsoft Corporation. All rights reserved.
+// Licensed under the MIT License.
+
+#include "core/providers/common.h"
+#include "core/providers/shared/utils/utils.h"
+#include "core/framework/tensorprotoutils.h"
+#include "core/providers/qnn/builder/qnn_model_wrapper.h"
+#include "core/providers/qnn/builder/op_builder_factory.h"
+#include "core/common/safeint.h"
+
+#include "base_op_builder.h"
+
+namespace onnxruntime {
+namespace qnn {
+
+class SoftmaxOpBuilder : public BaseOpBuilder {
+ public:
+  SoftmaxOpBuilder() : BaseOpBuilder("SoftmaxOpBuilder") {}
+  ORT_DISALLOW_COPY_ASSIGNMENT_AND_MOVE(SoftmaxOpBuilder);
+
+  Status IsOpSupported(QnnModelWrapper& qnn_model_wrapper,
+                       const NodeUnit& node_unit,
+                       const logging::Logger& logger) const override final ORT_MUST_USE_RESULT;
+
+ protected:
+  Status ProcessInputs(QnnModelWrapper& qnn_model_wrapper,
+                       const NodeUnit& node_unit,
+                       const logging::Logger& logger,
+                       std::vector<std::string>& input_names,
+                       bool do_op_validation) const override ORT_MUST_USE_RESULT;
+
+  Status ProcessAttributesAndOutputs(QnnModelWrapper& qnn_model_wrapper,
+                                     const NodeUnit& node_unit,
+                                     std::vector<std::string>&& input_names,
+                                     const logging::Logger& logger,
+                                     bool do_op_validation) const override ORT_MUST_USE_RESULT;
+};
+
+constexpr int32_t GetDefaultAxisAttribute(int opset_version) {
+  // Default axis changed from 1 to -1 in opset 13.
+  return opset_version < 13 ? 1 : -1;
+}
+
+Status SoftmaxOpBuilder::IsOpSupported(QnnModelWrapper& qnn_model_wrapper,
+                                       const NodeUnit& node_unit,
+                                       const logging::Logger& logger) const {
+  ORT_UNUSED_PARAMETER(logger);
+  const int opset_version = node_unit.SinceVersion();
+
+  // The QNN HTP backend only supports an `axis` attribute that refers to the last input dimension.
+  // QNN EP is able to support arbitrary axis attributes by wrapping the QNN operator with transposes.
+  // However, the exception is Softmax/LogSoftmax with opset < 13. For these older ONNX operators, only
+  // axis == input_rank - 1 is supported.
+  if (opset_version < 13) {
+    const std::string& op_type = node_unit.OpType();
+
+    int32_t axis = GetDefaultAxisAttribute(opset_version);
+    Qnn_Scalar_t axis_qnn_scalar = QNN_SCALAR_INIT;
+    ORT_RETURN_IF_ERROR(ProcessAxisAttribute(qnn_model_wrapper, node_unit, axis_qnn_scalar, axis));
+    std::vector<uint32_t> input_shape;
+    ORT_RETURN_IF_NOT(qnn_model_wrapper.GetOnnxShape(node_unit.Inputs()[0].node_arg, input_shape),
+                      "QNN EP: Cannot get shape for Softmax input");
+    ORT_RETURN_IF(axis != static_cast<int32_t>(input_shape.size() - 1),
+                  "QNN ", op_type.c_str(),
+                  " only supports an `axis` attribute equal to input_rank-1 (or -1) for ONNX opset < 13");
+  }
+
+  return AddToModelBuilder(qnn_model_wrapper, node_unit, logger, true);
+}
+
+static std::vector<uint32_t> GetTransposePermToUseLastAxis(uint32_t input_rank, uint32_t axis) {
+  assert(axis < input_rank);
+  std::vector<uint32_t> transpose_perm;
+  transpose_perm.reserve(input_rank);
+
+  for (uint32_t dim = 0; dim < input_rank; dim++) {
+    transpose_perm.push_back(dim);
+  }
+
+  // Swap axis dim with last dim.
+  transpose_perm[axis] = input_rank - 1;
+  transpose_perm[input_rank - 1] = axis;
+
+  return transpose_perm;
+}
+
+Status SoftmaxOpBuilder::ProcessInputs(QnnModelWrapper& qnn_model_wrapper,
+                                       const NodeUnit& node_unit,
+                                       const logging::Logger& logger,
+                                       std::vector<std::string>& input_names,
+                                       bool do_op_validation) const {
+  const bool is_npu_backend = IsNpuBackend(qnn_model_wrapper.GetQnnBackendType());
+  const auto& inputs = node_unit.Inputs();
+  assert(inputs.size() == 1);
+
+  int32_t axis = GetDefaultAxisAttribute(node_unit.SinceVersion());
+  Qnn_Scalar_t axis_qnn_scalar = QNN_SCALAR_INIT;
+  ORT_RETURN_IF_ERROR(ProcessAxisAttribute(qnn_model_wrapper, node_unit, axis_qnn_scalar, axis));
+
+  OnnxInputInfo input_info = {};
+  ORT_RETURN_IF_ERROR(qnn_model_wrapper.GetOnnxInputInfo(inputs[0], input_info));
+  const size_t input_rank = input_info.shape.size();
+
+  // If the axis attribute refers to the last dimension, then process the input as normal.
+  if (!is_npu_backend || axis == static_cast<int32_t>(input_rank) - 1) {
+    return ProcessInput(qnn_model_wrapper, inputs[0], logger, input_names);
+  }
+
+  //
+  // The axis does **not** refer to the last input dimension. Must wrap transposes around the operator to be able to use
+  // QNN's Softmax operator, which always uses an axis value that refers to the last dimension.
+  //
+
+  std::vector<uint32_t> transpose_perm = GetTransposePermToUseLastAxis(static_cast<uint32_t>(input_rank),
+                                                                       static_cast<uint32_t>(axis));
+
+  const std::string& input_name = inputs[0].node_arg.Name();
+  std::string op_input_name = input_info.is_initializer ? input_name : input_name + "_ort_qnn_ep_transpose";
+  input_names.push_back(op_input_name);
+
+  std::vector<uint32_t> op_input_shape = input_info.shape;
+  op_input_shape[input_rank - 1] = input_info.shape[axis];
+  op_input_shape[axis] = input_info.shape[input_rank - 1];
+
+  ORT_RETURN_IF(input_info.is_initializer, "QNN EP does not support (Log)Softmax with an initializer input, ",
+                "which should be optimized away by the ORT optimizer");
+
+  // Input is dynamic, so add transpose node before input.
+  const bool is_graph_input = qnn_model_wrapper.IsGraphInput(input_name);
+
+  ORT_RETURN_IF_ERROR(qnn_model_wrapper.AddTransposeNode(node_unit.Index(),
+                                                         input_name,
+                                                         op_input_name,
+                                                         input_info.shape,
+                                                         transpose_perm,
+                                                         op_input_shape,
+                                                         input_info.qnn_data_type,
+                                                         input_info.quant_param,
+                                                         do_op_validation,
+                                                         is_graph_input));
+
+  Qnn_TensorType_t tensor_type = GetInputTensorType(qnn_model_wrapper, op_input_name);
+  QnnTensorWrapper input_tensorwrapper(op_input_name, tensor_type, input_info.qnn_data_type, input_info.quant_param,
+                                       std::move(op_input_shape), {});
+  ORT_RETURN_IF_NOT(qnn_model_wrapper.AddTensorWrapper(std::move(input_tensorwrapper)), "Failed to add tensor.");
+
+  return Status::OK();
+}
+
+Status SoftmaxOpBuilder::ProcessAttributesAndOutputs(QnnModelWrapper& qnn_model_wrapper,
+                                                     const NodeUnit& node_unit,
+                                                     std::vector<std::string>&& input_names,
+                                                     const logging::Logger& logger,
+                                                     bool do_op_validation) const {
+  const bool is_npu_backend = IsNpuBackend(qnn_model_wrapper.GetQnnBackendType());
+  const std::string& op_type = node_unit.OpType();
+  const auto& outputs = node_unit.Outputs();
+  assert(outputs.size() == 1);
+
+  int32_t axis = GetDefaultAxisAttribute(node_unit.SinceVersion());
+  Qnn_Scalar_t axis_qnn_scalar = QNN_SCALAR_INIT;
+  ORT_RETURN_IF_ERROR(ProcessAxisAttribute(qnn_model_wrapper, node_unit, axis_qnn_scalar, axis));
+
+  OnnxInputInfo output_info = {};
+  ORT_RETURN_IF_ERROR(qnn_model_wrapper.GetOnnxInputInfo(outputs[0], output_info));
+  const size_t output_rank = output_info.shape.size();
+  const bool axis_is_last_dim = static_cast<size_t>(axis) == output_rank - 1;
+
+  // If axis refers to the last dimension, process outputs as usual.
+  if (!is_npu_backend || axis_is_last_dim) {
+    QnnParamWrapper axis_param(node_unit.Index(), node_unit.Name(), QNN_OP_SOFTMAX_PARAM_AXIS, axis_qnn_scalar);
+
+    std::vector<std::string> param_tensor_names;
+    param_tensor_names.push_back(axis_param.GetParamTensorName());
+    qnn_model_wrapper.AddParamWrapper(std::move(axis_param));
+
+    return ProcessOutputs(qnn_model_wrapper, node_unit,
+                          std::move(input_names),
+                          std::move(param_tensor_names),
+                          logger, do_op_validation, GetQnnOpType(op_type));
+  }
+
+  //
+  // The axis **does** not refer to the last dimension. Must wrap the operator with Transposes to be able to use
+  // QNN's Softmax operator, which only supports an axis that refers to the last dimension.
+  //
+
+  axis_qnn_scalar.uint32Value = static_cast<uint32_t>(output_rank - 1);  // NOTE: override axis.
+  QnnParamWrapper axis_param(node_unit.Index(), node_unit.Name(), QNN_OP_SOFTMAX_PARAM_AXIS, axis_qnn_scalar);
+
+  std::vector<std::string> param_tensor_names;
+  param_tensor_names.push_back(axis_param.GetParamTensorName());
+  qnn_model_wrapper.AddParamWrapper(std::move(axis_param));
+
+  const std::string& orig_output_name = outputs[0].node_arg.Name();
+  std::string op_output_name = orig_output_name + "_ort_qnn_ep_transpose";
+
+  std::vector<uint32_t> op_output_shape = output_info.shape;
+  op_output_shape[output_rank - 1] = output_info.shape[axis];
+  op_output_shape[axis] = output_info.shape[output_rank - 1];
+
+  QnnTensorWrapper output_tensorwrapper(op_output_name, QNN_TENSOR_TYPE_NATIVE, output_info.qnn_data_type, output_info.quant_param,
+                                        std::vector<uint32_t>(op_output_shape));
+  ORT_RETURN_IF_NOT(qnn_model_wrapper.AddTensorWrapper(std::move(output_tensorwrapper)), "Failed to add tensor.");
+  ORT_RETURN_IF_NOT(qnn_model_wrapper.CreateQnnNode(GetNodeName(node_unit),
+                                                    QNN_OP_PACKAGE_NAME_QTI_AISW,
+                                                    GetQnnOpType(node_unit.OpType()),
+                                                    std::move(input_names),
+                                                    {op_output_name},
+                                                    std::move(param_tensor_names)),
+                    "Failed to add node.");
+
+  const bool is_graph_output = qnn_model_wrapper.IsGraphOutput(orig_output_name);
+  std::vector<uint32_t> transpose_perm = GetTransposePermToUseLastAxis(static_cast<uint32_t>(output_rank),
+                                                                       static_cast<uint32_t>(axis));
+
+  ORT_RETURN_IF_ERROR(qnn_model_wrapper.AddTransposeNode(node_unit.Index(),
+                                                         op_output_name,
+                                                         orig_output_name,
+                                                         op_output_shape,
+                                                         transpose_perm,
+                                                         output_info.shape,
+                                                         output_info.qnn_data_type,
+                                                         output_info.quant_param,
+                                                         do_op_validation,
+                                                         false,
+                                                         is_graph_output));
+
+  return Status::OK();
+}
+
+void CreateSoftmaxOpBuilder(const std::string& op_type, OpBuilderRegistrations& op_registrations) {
+  op_registrations.AddOpBuilder(op_type, std::make_unique<SoftmaxOpBuilder>());
+}
+
+}  // namespace qnn
+}  // namespace onnxruntime
--- a/onnxruntime/core/providers/qnn/qnn_execution_provider.cc
+++ b/onnxruntime/core/providers/qnn/qnn_execution_provider.cc
@ -238,22 +238,25 @@ QNNExecutionProvider::GetSupportedNodes(const GraphViewer& graph_viewer,
                                                initializer_input_lookup,
                                                qnn_backend_manager_->GetQnnBackendType());

-  for (const auto& node : graph_viewer.Nodes()) {
-    const NodeUnit* node_unit = node_unit_map.at(&node);
+  const auto& node_indices = graph_viewer.GetNodesInTopologicalOrder();
+  for (size_t i = 0; i < node_indices.size(); i++) {
+    gsl::not_null<const onnxruntime::Node*> node(graph_viewer.GetNode(node_indices[i]));
+
+    const NodeUnit* node_unit = node_unit_map.at(node);
    const bool supported = IsNodeSupported(qnn_model_wrapper,
                                           *node_unit,
                                           node_unit_supported_result,
                                           logger);
    LOGS(logger, VERBOSE) << "Node supported: [" << supported
-                          << "] index: [" << node.Index()
-                          << "] name: [" << node.Name()
-                          << "] Operator type: [" << node.OpType()
+                          << "] index: [" << node->Index()
+                          << "] name: [" << node->Name()
+                          << "] Operator type: [" << node->OpType()
                          << "] as part of the NodeUnit type: [" << node_unit->OpType()
                          << "] index: [" << node_unit->Index()
                          << "] name: [" << node_unit->Name()
                          << "]";
    if (supported) {
-      supported_nodes.insert(&node);
+      supported_nodes.insert(node);
    }
  }

--- a/onnxruntime/test/providers/qnn/simple_op_htp_test.cc
+++ b/onnxruntime/test/providers/qnn/simple_op_htp_test.cc
@ -447,8 +447,9 @@ TEST_F(QnnHTPBackendTests, UnaryOp_Log_U16) {
 // Check that QNN compiles DQ -> Softmax -> Q as a single unit.
 // Test that the default axis (-1) for SoftMax opset 13 works.
 TEST_F(QnnHTPBackendTests, UnaryOp_Softmax13_DefaultAxis) {
+  const std::vector<float> input_data = GetFloatDataInRange(-5.0f, 5.0f, 6);
  RunQDQOpTest<uint8_t>("Softmax",
-                        {TestInputDef<float>({1, 2, 3}, false, -5.0f, 5.0f)},
+                        {TestInputDef<float>({1, 2, 3}, false, input_data)},
                        {},  // Uses default axis of -1 for opset 13
                        13,
                        ExpectedEPNodeAssignment::All);
@ -466,14 +467,43 @@ TEST_F(QnnHTPBackendTests, UnaryOp_Softmax13_U16_DefaultAxis) {
                         true);        // Use com.microsoft domain for Q/DQ ops
 }

-// Check that QNN compiles DQ -> Softmax -> Q as a single unit.
-// Test that an axis != -1 is not supported.
-TEST_F(QnnHTPBackendTests, UnaryOp_Softmax13_UnsupportedAxis) {
+// Test that 8-bit QDQ Softmax (opset 13) with axis != -1 is supported by QNN EP.
+// QNN EP will wrap the operator with transposes.
+TEST_F(QnnHTPBackendTests, UnaryOp_Softmax13_NonLastAxis) {
+  const std::vector<float> input_data = {0.0f, 1.0f, 2.0f, 10.0f, 11.0f, 12.0f, 100.0f, 110.0f, 120.0f,
+                                         1.0856307f, 0.99734545f, 0.2829785f, 1.5062947f, 0.5786002f, 1.6514366f,
+                                         2.4266791f, 0.42891264f, 1.2659363f};
  RunQDQOpTest<uint8_t>("Softmax",
-                        {TestInputDef<float>({1, 2, 3}, false, -5.0f, 5.0f)},
+                        {TestInputDef<float>({1, 2, 3, 3}, false, input_data)},
                        {utils::MakeAttribute("axis", static_cast<int64_t>(1))},
                        13,
-                        ExpectedEPNodeAssignment::None);
+                        ExpectedEPNodeAssignment::All);
+}
+
+// Test that 8-bit QDQ Softmax (opset 13) with axis != -1 is supported by QNN EP.
+// QNN EP will wrap the operator with transposes.
+// This is a configuration used in one of our partner's models.
+TEST_F(QnnHTPBackendTests, UnaryOp_Softmax13_NonLastAxis_LargeInput) {
+  const std::vector<float> input_data = GetFloatDataInRange(-50.0f, 50.0f, 124);
+  RunQDQOpTest<uint8_t>("Softmax",
+                        {TestInputDef<float>({1, 124, 1}, false, input_data)},
+                        {utils::MakeAttribute("axis", static_cast<int64_t>(1))},
+                        13,
+                        ExpectedEPNodeAssignment::All);
+}
+
+// Test that 16-bit QDQ Softmax (opset 13) with axis != -1 is supported by QNN EP.
+// QNN EP will wrap the operator with transposes.
+// This is a configuration used in one of our partner's models.
+TEST_F(QnnHTPBackendTests, UnaryOp_Softmax13_U16_NonLastAxis_LargeInput) {
+  const std::vector<float> input_data = GetFloatDataInRange(-50.0f, 50.0f, 124);
+  RunQDQOpTest<uint16_t>("Softmax",
+                         {TestInputDef<float>({1, 124, 1}, false, input_data)},
+                         {utils::MakeAttribute("axis", static_cast<int64_t>(1))},
+                         13,
+                         ExpectedEPNodeAssignment::All,
+                         kOnnxDomain,
+                         true);
 }

 // Check that QNN compiles DQ -> Softmax -> Q as a single unit.
@ -507,15 +537,15 @@ TEST_F(QnnHTPBackendTests, UnaryOp_LogSoftmax13_DefaultAxis) {
                        ExpectedEPNodeAssignment::All);
 }

-// Check that QNN compiles DQ -> LogSoftmax -> Q as a single unit.
-// Test that an axis != -1 is not supported.
-TEST_F(QnnHTPBackendTests, UnaryOp_LogSoftmax13_UnsupportedAxis) {
+// Test that 8-bit QDQ LogSoftmax (opset 13) with axis != -1 is supported by QNN EP.
+// QNN EP will wrap the operator with transposes.
+TEST_F(QnnHTPBackendTests, UnaryOp_LogSoftmax13_NonLastAxis) {
  std::vector<float> input_data = GetFloatDataInRange(-5.0f, 5.0f, 6);
  RunQDQOpTest<uint8_t>("LogSoftmax",
                        {TestInputDef<float>({1, 2, 3}, false, input_data)},
                        {utils::MakeAttribute("axis", static_cast<int64_t>(1))},
                        13,
-                        ExpectedEPNodeAssignment::None);
+                        ExpectedEPNodeAssignment::All);
 }

 // Check that QNN compiles DQ -> LogSoftmax -> Q as a single unit.