[QNN EP] Support Cast in HTP backend (#15234)

### Description
Adds support for the Cast operator to the QNN HTP backend.



### Motivation and Context
Enable more models to run on QNN HTP backend.
This commit is contained in:
Adrian Lizarraga 2023-03-29 11:01:34 -07:00 committed by GitHub
parent a6279d4cfb
commit febc69e1b2
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
5 changed files with 272 additions and 4 deletions

View file

@ -19,7 +19,6 @@ OpBuilderRegistrations::OpBuilderRegistrations() {
CreateSimpleOpBuilder("Abs", *this);
CreateSimpleOpBuilder("And", *this);
CreateSimpleOpBuilder("Ceil", *this);
CreateSimpleOpBuilder("Cast", *this);
CreateSimpleOpBuilder("Cos", *this);
CreateSimpleOpBuilder("Div", *this);
CreateSimpleOpBuilder("Equal", *this);
@ -54,6 +53,10 @@ OpBuilderRegistrations::OpBuilderRegistrations() {
CreateSimpleOpBuilder("Concat", *this);
}
{
CreateCastOpBuilder("Cast", *this);
}
{
CreateReduceOpBuilder("ReduceMax", *this);
CreateReduceOpBuilder("ReduceMean", *this);

View file

@ -50,6 +50,8 @@ const IOpBuilder* GetOpBuilder(const std::string& onnx_op_type);
void CreateSimpleOpBuilder(const std::string& op_type, OpBuilderRegistrations& op_registrations);
void CreateCastOpBuilder(const std::string& op_type, OpBuilderRegistrations& op_registrations);
void CreateConvOpBuilder(const std::string& op_type, OpBuilderRegistrations& op_registrations);
void CreatePoolOpBuilder(const std::string& op_type, OpBuilderRegistrations& op_registrations);

View file

@ -0,0 +1,138 @@
// Copyright (c) Microsoft Corporation. All rights reserved.
// Licensed under the MIT License.
#include <string>
#include <vector>
#include "core/framework/tensorprotoutils.h"
#include "core/providers/qnn/builder/qnn_model_wrapper.h"
#include "core/providers/qnn/builder/op_builder_factory.h"
#include "base_op_builder.h"
namespace onnxruntime {
namespace qnn {
class CastOpBuilder : public BaseOpBuilder {
public:
CastOpBuilder() : BaseOpBuilder("CastOpBuilder") {}
ORT_DISALLOW_COPY_ASSIGNMENT_AND_MOVE(CastOpBuilder);
protected:
Status ProcessInputs(QnnModelWrapper& qnn_model_wrapper,
const NodeUnit& node_unit,
const logging::Logger& logger,
bool is_quantized_model,
std::vector<std::string>& input_names,
bool do_op_validation = false) const override ORT_MUST_USE_RESULT;
Status ProcessAttributesAndOutputs(QnnModelWrapper& qnn_model_wrapper,
const NodeUnit& node_unit,
std::vector<std::string>&& input_names,
const logging::Logger& logger,
bool is_quantized_model,
bool do_op_validation) const override ORT_MUST_USE_RESULT;
};
Status CastOpBuilder::ProcessInputs(QnnModelWrapper& qnn_model_wrapper,
const NodeUnit& node_unit,
const logging::Logger& logger,
bool is_quantized_model,
std::vector<std::string>& input_names,
bool do_op_validation) const {
ORT_UNUSED_PARAMETER(do_op_validation);
ORT_UNUSED_PARAMETER(is_quantized_model); // Ignore in all backends. Cast should use same QNN types across backends.
const auto& inputs = node_unit.Inputs();
ORT_ENFORCE(inputs.size() == 1, "QNN Cast node must have a single input.");
const auto& input = inputs[0];
const auto& input_name = input.node_arg.Name();
if (qnn_model_wrapper.IsQnnTensorWrapperExist(input_name)) {
LOGS(logger, VERBOSE) << "Tensor already added, skip it: " << input_name;
input_names.push_back(input_name);
return Status::OK();
}
std::vector<uint8_t> unpacked_tensor;
bool is_initializer_input = qnn_model_wrapper.IsInitializerInput(input_name);
if (is_initializer_input) {
const auto& input_tensor = qnn_model_wrapper.GetInitializerTensors().at(input_name);
ORT_RETURN_IF_ERROR(onnxruntime::utils::UnpackInitializerData(*input_tensor, unpacked_tensor));
}
Qnn_TensorType_t tensor_type = GetInputTensorType(qnn_model_wrapper, input_name);
std::vector<uint32_t> input_shape;
ORT_RETURN_IF_NOT(qnn_model_wrapper.GetOnnxShape(input.node_arg, input_shape),
"Cannot get shape for QNN Cast node's input.");
Qnn_DataType_t qnn_data_type = QNN_DATATYPE_UNDEFINED;
const auto* type_proto = input.node_arg.TypeAsProto();
ORT_RETURN_IF_ERROR(GetQnnDataType(false, // Do not try to get the quantized type. HTP cast supports normal types.
type_proto,
qnn_data_type));
QnnTensorWrapper input_tensorwrapper(input_name, tensor_type, qnn_data_type, QNN_QUANTIZE_PARAMS_INIT,
std::move(input_shape), std::move(unpacked_tensor));
ORT_RETURN_IF_NOT(qnn_model_wrapper.AddTensorWrapper(std::move(input_tensorwrapper)),
"Failed to add input tensor for QNN Cast node.");
input_names.push_back(input_name);
return Status::OK();
}
Status CastOpBuilder::ProcessAttributesAndOutputs(QnnModelWrapper& qnn_model_wrapper,
const NodeUnit& node_unit,
std::vector<std::string>&& input_names,
const logging::Logger& logger,
bool is_quantized_model,
bool do_op_validation) const {
ORT_UNUSED_PARAMETER(logger);
ORT_UNUSED_PARAMETER(is_quantized_model); // Ignore in all backends. Cast should use same QNN types across backends.
const auto& outputs = node_unit.Outputs();
ORT_ENFORCE(outputs.size() == 1, "QNN Cast node must have a single output.");
const auto& output = outputs[0];
const auto& output_name = output.node_arg.Name();
const auto* type_proto = output.node_arg.TypeAsProto();
Qnn_DataType_t qnn_data_type = QNN_DATATYPE_UNDEFINED;
ORT_RETURN_IF_ERROR(GetQnnDataType(false, // Do not try to get the quantized type. HTP cast supports normal types.
type_proto,
qnn_data_type));
std::vector<uint32_t> output_shape;
ORT_RETURN_IF_NOT(qnn_model_wrapper.GetOnnxShape(output.node_arg, output_shape),
"Cannot get shape for QNN Cast node's output.");
const bool is_graph_output = qnn_model_wrapper.IsGraphOutput(output_name);
const Qnn_TensorType_t tensor_type = is_graph_output ? QNN_TENSOR_TYPE_APP_READ : QNN_TENSOR_TYPE_NATIVE;
QnnTensorWrapper output_tensorwrapper(output_name,
tensor_type,
qnn_data_type,
QNN_QUANTIZE_PARAMS_INIT,
std::move(output_shape));
ORT_RETURN_IF_NOT(qnn_model_wrapper.AddTensorWrapper(std::move(output_tensorwrapper)),
"Failed to add output tensor for QNN Cast node.");
ORT_RETURN_IF_NOT(qnn_model_wrapper.CreateQnnNode(GetNodeName(node_unit),
qnn_def::package_name,
GetQnnOpType(node_unit.OpType()),
std::move(input_names),
{output_name},
{},
do_op_validation),
"Failed to create QNN Cast node.");
return Status::OK();
}
void CreateCastOpBuilder(const std::string& op_type, OpBuilderRegistrations& op_registrations) {
op_registrations.AddOpBuilder(op_type, std::make_unique<CastOpBuilder>());
}
} // namespace qnn
} // namespace onnxruntime

View file

@ -83,6 +83,7 @@ bool QNNExecutionProvider::IsNodeSupported(qnn::QnnModelWrapper& qnn_model_wrapp
// Is NPU backend, is single node, case by case
// Q/DQ nodes -- supported
// Transpose nodes -- supported
// Cast nodes -- need to call CastOpBuilder::IsOpSupported
if (is_npu_backend && NodeUnit::Type::SingleNode == node_unit.UnitType()) {
if (IsQdqNode(node_unit)) { // Qnn has Quantize & Dequantize Op
LOGS(logger, VERBOSE) << "Single Q/DQ node is supported for NPU backend. Node name: " << node_unit.Name();
@ -95,9 +96,13 @@ bool QNNExecutionProvider::IsNodeSupported(qnn::QnnModelWrapper& qnn_model_wrapp
return true;
}
LOGS(logger, VERBOSE) << "Non-QDQ single node is not supported for NPU backend. Node name: " << node_unit.Name()
<< " Op type: " << node_unit.OpType();
return false;
// For Cast, need to call IsOpSupported (below) to validate input and output types.
// For other single non-qdq nodes, immediately return not supported.
if (node_unit.OpType() != "Cast") {
LOGS(logger, VERBOSE) << "Non-QDQ single node is not supported for NPU backend. Node name: " << node_unit.Name()
<< " Op type: " << node_unit.OpType();
return false;
}
}
// Non-NPU backend, quantized model not supported, but a QDQ node encountered

View file

@ -0,0 +1,120 @@
// Copyright (c) Microsoft Corporation. All rights reserved.
// Licensed under the MIT License.
#if !defined(ORT_MINIMAL_BUILD)
#include <string>
#include <unordered_map>
#include "test/optimizer/qdq_test_utils.h"
#include "test/providers/qnn/qnn_test_utils.h"
#include "onnx/onnx_pb.h"
#include "gtest/gtest.h"
namespace onnxruntime {
namespace test {
/**
* Creates a graph with a single Cast operator.
*
* \param shape The shape of the input and output. Input data is randomly generated with this shape.
* \param dst_type The destination type as an instance of the DataType enum in TensorProto.
*
* \return A function that builds the graph with the provided builder.
*/
template <typename InputType>
static GetTestModelFn BuildCastTestCase(const std::vector<int64_t>& shape,
ONNX_NAMESPACE::TensorProto_DataType dst_type) {
return [shape, dst_type](ModelTestBuilder& builder) {
// Random input data
auto input = builder.MakeInput<InputType>(shape, static_cast<InputType>(0), static_cast<InputType>(20));
auto* output = builder.MakeOutput();
Node& cast_node = builder.AddNode("Cast", {input}, {output});
cast_node.AddAttribute("to", static_cast<int64_t>(dst_type));
};
}
/**
* Runs a Cast model on the QNN CPU or HTP backend. Checks the graph node assignment, and that inference
* outputs for QNN and CPU match.
*
* \param shape The shape of the input and output. Input data is randomly generated with this shape.
* \param dst_type The destination type as an instance of the DataType enum in TensorProto.
* \param test_description Description of the test for error reporting.
* \param expected_ep_assignment How many nodes are expected to be assigned to QNN (All, Some, or None).
* \param use_htp True to run on HTP backend. Otherwise, runs on CPU.
*/
template <typename InputType>
static void RunCastOpTest(const std::vector<int64_t>& shape, ONNX_NAMESPACE::TensorProto_DataType dst_type,
ExpectedEPNodeAssignment expected_ep_assignment, const char* test_description,
bool use_htp) {
ProviderOptions provider_options;
#if defined(_WIN32)
provider_options["backend_path"] = use_htp ? "QnnHtp.dll" : "QnnCpu.dll";
#else
provider_options["backend_path"] = use_htp ? "libQnnHtp.so" : "libQnnCpu.so";
#endif
constexpr int expected_nodes_in_partition = 1;
RunQnnModelTest(BuildCastTestCase<InputType>(shape, dst_type),
provider_options,
13, // opset
expected_ep_assignment,
expected_nodes_in_partition,
test_description);
}
//
// CPU tests:
//
// Cast int32_t to float on CPU
TEST(QnnCPUBackendTests, TestCastInt32ToFloat) {
RunCastOpTest<int32_t>({2, 3}, ONNX_NAMESPACE::TensorProto_DataType::TensorProto_DataType_FLOAT, ExpectedEPNodeAssignment::All,
"TestCastInt32ToFloat", false);
}
// Cast uint8_t to float on CPU
TEST(QnnCPUBackendTests, TestCastUInt8ToFloat) {
RunCastOpTest<uint8_t>({2, 3}, ONNX_NAMESPACE::TensorProto_DataType::TensorProto_DataType_FLOAT, ExpectedEPNodeAssignment::All,
"TestCastUInt8ToFloat", false);
}
// Cast float to int32_t on CPU
TEST(QnnCPUBackendTests, TestCastFloatToInt32) {
RunCastOpTest<float>({2, 3}, ONNX_NAMESPACE::TensorProto_DataType::TensorProto_DataType_INT32, ExpectedEPNodeAssignment::All,
"TestCastInt32ToFloat", false);
}
#if defined(__aarch64__) || defined(_M_ARM64) || defined(__linux__)
//
// HTP tests:
//
// Cast int32_t to float on HTP
TEST_F(QnnHTPBackendTests, TestCastInt32ToFloatHTP) {
RunCastOpTest<int32_t>({3, 3}, ONNX_NAMESPACE::TensorProto_DataType::TensorProto_DataType_FLOAT, ExpectedEPNodeAssignment::All,
"TestCastInt32ToFloatHTP", true);
}
// Cast uint8_t to float on HTP
TEST_F(QnnHTPBackendTests, TestCastUInt8ToFloatHTP) {
RunCastOpTest<uint8_t>({3, 3}, ONNX_NAMESPACE::TensorProto_DataType::TensorProto_DataType_FLOAT, ExpectedEPNodeAssignment::All,
"TestCastUInt8ToFloatHTP", true);
}
// Cast float to int32_t on HTP
TEST_F(QnnHTPBackendTests, TestCastFloatToInt32HTP) {
RunCastOpTest<float>({3, 3}, ONNX_NAMESPACE::TensorProto_DataType::TensorProto_DataType_INT32, ExpectedEPNodeAssignment::All,
"TestCastFloatToInt32HTP", true);
}
#endif // defined(__aarch64__) || defined(_M_ARM64) || defined(__linux__)
} // namespace test
} // namespace onnxruntime
#endif // !defined(ORT_MINIMAL_BUILD)