mirror of
https://github.com/saymrwulf/onnxruntime.git
synced 2026-05-31 23:27:43 +00:00
[QNN EP] Support Cast in HTP backend (#15234)
### Description Adds support for the Cast operator to the QNN HTP backend. ### Motivation and Context Enable more models to run on QNN HTP backend.
This commit is contained in:
parent
a6279d4cfb
commit
febc69e1b2
5 changed files with 272 additions and 4 deletions
|
|
@ -19,7 +19,6 @@ OpBuilderRegistrations::OpBuilderRegistrations() {
|
|||
CreateSimpleOpBuilder("Abs", *this);
|
||||
CreateSimpleOpBuilder("And", *this);
|
||||
CreateSimpleOpBuilder("Ceil", *this);
|
||||
CreateSimpleOpBuilder("Cast", *this);
|
||||
CreateSimpleOpBuilder("Cos", *this);
|
||||
CreateSimpleOpBuilder("Div", *this);
|
||||
CreateSimpleOpBuilder("Equal", *this);
|
||||
|
|
@ -54,6 +53,10 @@ OpBuilderRegistrations::OpBuilderRegistrations() {
|
|||
CreateSimpleOpBuilder("Concat", *this);
|
||||
}
|
||||
|
||||
{
|
||||
CreateCastOpBuilder("Cast", *this);
|
||||
}
|
||||
|
||||
{
|
||||
CreateReduceOpBuilder("ReduceMax", *this);
|
||||
CreateReduceOpBuilder("ReduceMean", *this);
|
||||
|
|
|
|||
|
|
@ -50,6 +50,8 @@ const IOpBuilder* GetOpBuilder(const std::string& onnx_op_type);
|
|||
|
||||
void CreateSimpleOpBuilder(const std::string& op_type, OpBuilderRegistrations& op_registrations);
|
||||
|
||||
void CreateCastOpBuilder(const std::string& op_type, OpBuilderRegistrations& op_registrations);
|
||||
|
||||
void CreateConvOpBuilder(const std::string& op_type, OpBuilderRegistrations& op_registrations);
|
||||
|
||||
void CreatePoolOpBuilder(const std::string& op_type, OpBuilderRegistrations& op_registrations);
|
||||
|
|
|
|||
|
|
@ -0,0 +1,138 @@
|
|||
// Copyright (c) Microsoft Corporation. All rights reserved.
|
||||
// Licensed under the MIT License.
|
||||
|
||||
#include <string>
|
||||
#include <vector>
|
||||
|
||||
#include "core/framework/tensorprotoutils.h"
|
||||
#include "core/providers/qnn/builder/qnn_model_wrapper.h"
|
||||
#include "core/providers/qnn/builder/op_builder_factory.h"
|
||||
|
||||
#include "base_op_builder.h"
|
||||
|
||||
namespace onnxruntime {
|
||||
namespace qnn {
|
||||
|
||||
class CastOpBuilder : public BaseOpBuilder {
|
||||
public:
|
||||
CastOpBuilder() : BaseOpBuilder("CastOpBuilder") {}
|
||||
ORT_DISALLOW_COPY_ASSIGNMENT_AND_MOVE(CastOpBuilder);
|
||||
|
||||
protected:
|
||||
Status ProcessInputs(QnnModelWrapper& qnn_model_wrapper,
|
||||
const NodeUnit& node_unit,
|
||||
const logging::Logger& logger,
|
||||
bool is_quantized_model,
|
||||
std::vector<std::string>& input_names,
|
||||
bool do_op_validation = false) const override ORT_MUST_USE_RESULT;
|
||||
|
||||
Status ProcessAttributesAndOutputs(QnnModelWrapper& qnn_model_wrapper,
|
||||
const NodeUnit& node_unit,
|
||||
std::vector<std::string>&& input_names,
|
||||
const logging::Logger& logger,
|
||||
bool is_quantized_model,
|
||||
bool do_op_validation) const override ORT_MUST_USE_RESULT;
|
||||
|
||||
};
|
||||
|
||||
Status CastOpBuilder::ProcessInputs(QnnModelWrapper& qnn_model_wrapper,
|
||||
const NodeUnit& node_unit,
|
||||
const logging::Logger& logger,
|
||||
bool is_quantized_model,
|
||||
std::vector<std::string>& input_names,
|
||||
bool do_op_validation) const {
|
||||
ORT_UNUSED_PARAMETER(do_op_validation);
|
||||
ORT_UNUSED_PARAMETER(is_quantized_model); // Ignore in all backends. Cast should use same QNN types across backends.
|
||||
|
||||
const auto& inputs = node_unit.Inputs();
|
||||
ORT_ENFORCE(inputs.size() == 1, "QNN Cast node must have a single input.");
|
||||
const auto& input = inputs[0];
|
||||
|
||||
const auto& input_name = input.node_arg.Name();
|
||||
|
||||
if (qnn_model_wrapper.IsQnnTensorWrapperExist(input_name)) {
|
||||
LOGS(logger, VERBOSE) << "Tensor already added, skip it: " << input_name;
|
||||
input_names.push_back(input_name);
|
||||
return Status::OK();
|
||||
}
|
||||
|
||||
std::vector<uint8_t> unpacked_tensor;
|
||||
bool is_initializer_input = qnn_model_wrapper.IsInitializerInput(input_name);
|
||||
if (is_initializer_input) {
|
||||
const auto& input_tensor = qnn_model_wrapper.GetInitializerTensors().at(input_name);
|
||||
ORT_RETURN_IF_ERROR(onnxruntime::utils::UnpackInitializerData(*input_tensor, unpacked_tensor));
|
||||
}
|
||||
|
||||
Qnn_TensorType_t tensor_type = GetInputTensorType(qnn_model_wrapper, input_name);
|
||||
std::vector<uint32_t> input_shape;
|
||||
ORT_RETURN_IF_NOT(qnn_model_wrapper.GetOnnxShape(input.node_arg, input_shape),
|
||||
"Cannot get shape for QNN Cast node's input.");
|
||||
|
||||
Qnn_DataType_t qnn_data_type = QNN_DATATYPE_UNDEFINED;
|
||||
const auto* type_proto = input.node_arg.TypeAsProto();
|
||||
|
||||
ORT_RETURN_IF_ERROR(GetQnnDataType(false, // Do not try to get the quantized type. HTP cast supports normal types.
|
||||
type_proto,
|
||||
qnn_data_type));
|
||||
|
||||
QnnTensorWrapper input_tensorwrapper(input_name, tensor_type, qnn_data_type, QNN_QUANTIZE_PARAMS_INIT,
|
||||
std::move(input_shape), std::move(unpacked_tensor));
|
||||
ORT_RETURN_IF_NOT(qnn_model_wrapper.AddTensorWrapper(std::move(input_tensorwrapper)),
|
||||
"Failed to add input tensor for QNN Cast node.");
|
||||
input_names.push_back(input_name);
|
||||
|
||||
return Status::OK();
|
||||
}
|
||||
|
||||
Status CastOpBuilder::ProcessAttributesAndOutputs(QnnModelWrapper& qnn_model_wrapper,
|
||||
const NodeUnit& node_unit,
|
||||
std::vector<std::string>&& input_names,
|
||||
const logging::Logger& logger,
|
||||
bool is_quantized_model,
|
||||
bool do_op_validation) const {
|
||||
ORT_UNUSED_PARAMETER(logger);
|
||||
ORT_UNUSED_PARAMETER(is_quantized_model); // Ignore in all backends. Cast should use same QNN types across backends.
|
||||
|
||||
const auto& outputs = node_unit.Outputs();
|
||||
ORT_ENFORCE(outputs.size() == 1, "QNN Cast node must have a single output.");
|
||||
const auto& output = outputs[0];
|
||||
const auto& output_name = output.node_arg.Name();
|
||||
|
||||
const auto* type_proto = output.node_arg.TypeAsProto();
|
||||
Qnn_DataType_t qnn_data_type = QNN_DATATYPE_UNDEFINED;
|
||||
ORT_RETURN_IF_ERROR(GetQnnDataType(false, // Do not try to get the quantized type. HTP cast supports normal types.
|
||||
type_proto,
|
||||
qnn_data_type));
|
||||
|
||||
std::vector<uint32_t> output_shape;
|
||||
ORT_RETURN_IF_NOT(qnn_model_wrapper.GetOnnxShape(output.node_arg, output_shape),
|
||||
"Cannot get shape for QNN Cast node's output.");
|
||||
const bool is_graph_output = qnn_model_wrapper.IsGraphOutput(output_name);
|
||||
|
||||
const Qnn_TensorType_t tensor_type = is_graph_output ? QNN_TENSOR_TYPE_APP_READ : QNN_TENSOR_TYPE_NATIVE;
|
||||
QnnTensorWrapper output_tensorwrapper(output_name,
|
||||
tensor_type,
|
||||
qnn_data_type,
|
||||
QNN_QUANTIZE_PARAMS_INIT,
|
||||
std::move(output_shape));
|
||||
ORT_RETURN_IF_NOT(qnn_model_wrapper.AddTensorWrapper(std::move(output_tensorwrapper)),
|
||||
"Failed to add output tensor for QNN Cast node.");
|
||||
|
||||
ORT_RETURN_IF_NOT(qnn_model_wrapper.CreateQnnNode(GetNodeName(node_unit),
|
||||
qnn_def::package_name,
|
||||
GetQnnOpType(node_unit.OpType()),
|
||||
std::move(input_names),
|
||||
{output_name},
|
||||
{},
|
||||
do_op_validation),
|
||||
"Failed to create QNN Cast node.");
|
||||
|
||||
return Status::OK();
|
||||
}
|
||||
|
||||
void CreateCastOpBuilder(const std::string& op_type, OpBuilderRegistrations& op_registrations) {
|
||||
op_registrations.AddOpBuilder(op_type, std::make_unique<CastOpBuilder>());
|
||||
}
|
||||
|
||||
} // namespace qnn
|
||||
} // namespace onnxruntime
|
||||
|
|
@ -83,6 +83,7 @@ bool QNNExecutionProvider::IsNodeSupported(qnn::QnnModelWrapper& qnn_model_wrapp
|
|||
// Is NPU backend, is single node, case by case
|
||||
// Q/DQ nodes -- supported
|
||||
// Transpose nodes -- supported
|
||||
// Cast nodes -- need to call CastOpBuilder::IsOpSupported
|
||||
if (is_npu_backend && NodeUnit::Type::SingleNode == node_unit.UnitType()) {
|
||||
if (IsQdqNode(node_unit)) { // Qnn has Quantize & Dequantize Op
|
||||
LOGS(logger, VERBOSE) << "Single Q/DQ node is supported for NPU backend. Node name: " << node_unit.Name();
|
||||
|
|
@ -95,9 +96,13 @@ bool QNNExecutionProvider::IsNodeSupported(qnn::QnnModelWrapper& qnn_model_wrapp
|
|||
return true;
|
||||
}
|
||||
|
||||
LOGS(logger, VERBOSE) << "Non-QDQ single node is not supported for NPU backend. Node name: " << node_unit.Name()
|
||||
<< " Op type: " << node_unit.OpType();
|
||||
return false;
|
||||
// For Cast, need to call IsOpSupported (below) to validate input and output types.
|
||||
// For other single non-qdq nodes, immediately return not supported.
|
||||
if (node_unit.OpType() != "Cast") {
|
||||
LOGS(logger, VERBOSE) << "Non-QDQ single node is not supported for NPU backend. Node name: " << node_unit.Name()
|
||||
<< " Op type: " << node_unit.OpType();
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
// Non-NPU backend, quantized model not supported, but a QDQ node encountered
|
||||
|
|
|
|||
120
onnxruntime/test/providers/qnn/cast_test.cc
Normal file
120
onnxruntime/test/providers/qnn/cast_test.cc
Normal file
|
|
@ -0,0 +1,120 @@
|
|||
// Copyright (c) Microsoft Corporation. All rights reserved.
|
||||
// Licensed under the MIT License.
|
||||
|
||||
#if !defined(ORT_MINIMAL_BUILD)
|
||||
|
||||
#include <string>
|
||||
#include <unordered_map>
|
||||
|
||||
#include "test/optimizer/qdq_test_utils.h"
|
||||
#include "test/providers/qnn/qnn_test_utils.h"
|
||||
|
||||
#include "onnx/onnx_pb.h"
|
||||
|
||||
#include "gtest/gtest.h"
|
||||
|
||||
namespace onnxruntime {
|
||||
namespace test {
|
||||
|
||||
/**
|
||||
* Creates a graph with a single Cast operator.
|
||||
*
|
||||
* \param shape The shape of the input and output. Input data is randomly generated with this shape.
|
||||
* \param dst_type The destination type as an instance of the DataType enum in TensorProto.
|
||||
*
|
||||
* \return A function that builds the graph with the provided builder.
|
||||
*/
|
||||
template <typename InputType>
|
||||
static GetTestModelFn BuildCastTestCase(const std::vector<int64_t>& shape,
|
||||
ONNX_NAMESPACE::TensorProto_DataType dst_type) {
|
||||
return [shape, dst_type](ModelTestBuilder& builder) {
|
||||
|
||||
// Random input data
|
||||
auto input = builder.MakeInput<InputType>(shape, static_cast<InputType>(0), static_cast<InputType>(20));
|
||||
|
||||
auto* output = builder.MakeOutput();
|
||||
Node& cast_node = builder.AddNode("Cast", {input}, {output});
|
||||
cast_node.AddAttribute("to", static_cast<int64_t>(dst_type));
|
||||
};
|
||||
}
|
||||
|
||||
/**
|
||||
* Runs a Cast model on the QNN CPU or HTP backend. Checks the graph node assignment, and that inference
|
||||
* outputs for QNN and CPU match.
|
||||
*
|
||||
* \param shape The shape of the input and output. Input data is randomly generated with this shape.
|
||||
* \param dst_type The destination type as an instance of the DataType enum in TensorProto.
|
||||
* \param test_description Description of the test for error reporting.
|
||||
* \param expected_ep_assignment How many nodes are expected to be assigned to QNN (All, Some, or None).
|
||||
* \param use_htp True to run on HTP backend. Otherwise, runs on CPU.
|
||||
*/
|
||||
template <typename InputType>
|
||||
static void RunCastOpTest(const std::vector<int64_t>& shape, ONNX_NAMESPACE::TensorProto_DataType dst_type,
|
||||
ExpectedEPNodeAssignment expected_ep_assignment, const char* test_description,
|
||||
bool use_htp) {
|
||||
ProviderOptions provider_options;
|
||||
#if defined(_WIN32)
|
||||
provider_options["backend_path"] = use_htp ? "QnnHtp.dll" : "QnnCpu.dll";
|
||||
#else
|
||||
provider_options["backend_path"] = use_htp ? "libQnnHtp.so" : "libQnnCpu.so";
|
||||
#endif
|
||||
|
||||
constexpr int expected_nodes_in_partition = 1;
|
||||
RunQnnModelTest(BuildCastTestCase<InputType>(shape, dst_type),
|
||||
provider_options,
|
||||
13, // opset
|
||||
expected_ep_assignment,
|
||||
expected_nodes_in_partition,
|
||||
test_description);
|
||||
}
|
||||
|
||||
//
|
||||
// CPU tests:
|
||||
//
|
||||
|
||||
// Cast int32_t to float on CPU
|
||||
TEST(QnnCPUBackendTests, TestCastInt32ToFloat) {
|
||||
RunCastOpTest<int32_t>({2, 3}, ONNX_NAMESPACE::TensorProto_DataType::TensorProto_DataType_FLOAT, ExpectedEPNodeAssignment::All,
|
||||
"TestCastInt32ToFloat", false);
|
||||
}
|
||||
|
||||
// Cast uint8_t to float on CPU
|
||||
TEST(QnnCPUBackendTests, TestCastUInt8ToFloat) {
|
||||
RunCastOpTest<uint8_t>({2, 3}, ONNX_NAMESPACE::TensorProto_DataType::TensorProto_DataType_FLOAT, ExpectedEPNodeAssignment::All,
|
||||
"TestCastUInt8ToFloat", false);
|
||||
}
|
||||
|
||||
// Cast float to int32_t on CPU
|
||||
TEST(QnnCPUBackendTests, TestCastFloatToInt32) {
|
||||
RunCastOpTest<float>({2, 3}, ONNX_NAMESPACE::TensorProto_DataType::TensorProto_DataType_INT32, ExpectedEPNodeAssignment::All,
|
||||
"TestCastInt32ToFloat", false);
|
||||
}
|
||||
|
||||
#if defined(__aarch64__) || defined(_M_ARM64) || defined(__linux__)
|
||||
//
|
||||
// HTP tests:
|
||||
//
|
||||
|
||||
// Cast int32_t to float on HTP
|
||||
TEST_F(QnnHTPBackendTests, TestCastInt32ToFloatHTP) {
|
||||
RunCastOpTest<int32_t>({3, 3}, ONNX_NAMESPACE::TensorProto_DataType::TensorProto_DataType_FLOAT, ExpectedEPNodeAssignment::All,
|
||||
"TestCastInt32ToFloatHTP", true);
|
||||
}
|
||||
|
||||
// Cast uint8_t to float on HTP
|
||||
TEST_F(QnnHTPBackendTests, TestCastUInt8ToFloatHTP) {
|
||||
RunCastOpTest<uint8_t>({3, 3}, ONNX_NAMESPACE::TensorProto_DataType::TensorProto_DataType_FLOAT, ExpectedEPNodeAssignment::All,
|
||||
"TestCastUInt8ToFloatHTP", true);
|
||||
}
|
||||
|
||||
// Cast float to int32_t on HTP
|
||||
TEST_F(QnnHTPBackendTests, TestCastFloatToInt32HTP) {
|
||||
RunCastOpTest<float>({3, 3}, ONNX_NAMESPACE::TensorProto_DataType::TensorProto_DataType_INT32, ExpectedEPNodeAssignment::All,
|
||||
"TestCastFloatToInt32HTP", true);
|
||||
}
|
||||
#endif // defined(__aarch64__) || defined(_M_ARM64) || defined(__linux__)
|
||||
|
||||
} // namespace test
|
||||
} // namespace onnxruntime
|
||||
|
||||
#endif // !defined(ORT_MINIMAL_BUILD)
|
||||
Loading…
Reference in a new issue