mirror of
https://github.com/saymrwulf/onnxruntime.git
synced 2026-05-24 22:17:32 +00:00
Add sample qdq unit test case for nnapi ep qdq integration (#10358)
* add sample unit test case and make qdq modeltestubuilder shared * update * address pr comments * modify redundant funcs impl * update * update * address pr comments * update * update * update * fix build breaks * minor update * fix bad_alloc in UT * address pr comments Co-authored-by: rachguo <rachguo@rachguos-Mini.attlocal.net> Co-authored-by: Guoyu Wang <wanggy@outlook.com>
This commit is contained in:
parent
0e951d7d6b
commit
ff2057a817
5 changed files with 134 additions and 71 deletions
82
onnxruntime/test/optimizer/qdq_test_utils.h
Normal file
82
onnxruntime/test/optimizer/qdq_test_utils.h
Normal file
|
|
@ -0,0 +1,82 @@
|
|||
// Copyright (c) Microsoft Corporation. All rights reserved.
|
||||
// Licensed under the MIT License.
|
||||
|
||||
#include "graph_transform_test_builder.h"
|
||||
|
||||
#include "core/optimizer/qdq_transformer/selectors_actions/qdq_selector_action_transformer.h"
|
||||
#include "core/session/inference_session.h"
|
||||
|
||||
#include "test/util/include/asserts.h"
|
||||
#include "test/util/include/inference_session_wrapper.h"
|
||||
|
||||
namespace onnxruntime {
|
||||
namespace test {
|
||||
|
||||
using GetQDQConvTestCaseFn = std::function<void(ModelTestBuilder& builder)>;
|
||||
|
||||
template <typename T>
|
||||
typename std::enable_if<IsTypeQuantLinearCompatible<T>::value, NodeArg*>::type
|
||||
AddQDQNodePair(ModelTestBuilder& builder, NodeArg* q_input, float scale, T zp = T()) {
|
||||
auto* q_output = builder.MakeIntermediate();
|
||||
auto* dq_output = builder.MakeIntermediate();
|
||||
builder.AddQuantizeLinearNode<T>(q_input, scale, zp, q_output);
|
||||
builder.AddDequantizeLinearNode<T>(q_output, scale, zp, dq_output);
|
||||
return dq_output;
|
||||
}
|
||||
|
||||
// TODO: for now it just builds a conv qdq graph.
|
||||
// can be modified and made it shared among different qdq test graphs associated with other operators
|
||||
template <typename InputType, typename WeightType, typename BiasType, typename OutputType>
|
||||
GetQDQConvTestCaseFn BuildQDQConvTestCase(const std::vector<int64_t>& input_shape, const std::vector<int64_t>& weights_shape) {
|
||||
return [input_shape, weights_shape](ModelTestBuilder& builder) {
|
||||
auto* input_arg = builder.MakeInput<float>(input_shape, -1.f, 1.f);
|
||||
auto* output_arg = builder.MakeOutput();
|
||||
|
||||
using InputLimits = std::numeric_limits<InputType>;
|
||||
using WeightLimits = std::numeric_limits<WeightType>;
|
||||
using OutputLimits = std::numeric_limits<OutputType>;
|
||||
|
||||
InputType input_min_value = InputLimits::min();
|
||||
InputType input_max_value = InputLimits::max();
|
||||
|
||||
WeightType weight_min_value = WeightLimits::min();
|
||||
WeightType weight_max_value = WeightLimits::max();
|
||||
|
||||
// the reason that we reduce weight range by half for int8 weight type comes from the case when
|
||||
// running on cpu, MLAS kernel will overflow for uint8 activation and int8 weight with avx2 and avx512 extension
|
||||
// reduced weight range can prevent the overflow.
|
||||
if constexpr (std::is_same<WeightType, int8_t>::value) {
|
||||
weight_min_value /= 2;
|
||||
weight_max_value /= 2;
|
||||
}
|
||||
|
||||
auto* dq_w_output = builder.MakeIntermediate();
|
||||
auto* weight = builder.MakeInitializer<WeightType>(weights_shape, weight_min_value, weight_max_value);
|
||||
builder.AddDequantizeLinearNode<WeightType>(weight, .03f,
|
||||
(weight_min_value + weight_max_value) / 2 + 1,
|
||||
dq_w_output);
|
||||
|
||||
auto* dq_bias_output = builder.MakeIntermediate();
|
||||
auto* bias = builder.MakeInitializer<BiasType>({weights_shape[0]}, static_cast<BiasType>(0), static_cast<BiasType>(127));
|
||||
builder.AddDequantizeLinearNode<BiasType>(bias, .0012f,
|
||||
0,
|
||||
dq_bias_output);
|
||||
|
||||
auto* conv_output = builder.MakeIntermediate();
|
||||
auto* dq_output = AddQDQNodePair<InputType>(builder, input_arg, .04f,
|
||||
(input_min_value + input_max_value) / 2 + 1);
|
||||
builder.AddNode("Conv", {dq_output, dq_w_output, dq_bias_output}, {conv_output});
|
||||
|
||||
auto* q_output = builder.MakeIntermediate();
|
||||
builder.AddQuantizeLinearNode<OutputType>(conv_output, .039f,
|
||||
(OutputLimits::min() + OutputLimits::max()) / 2 + 1,
|
||||
q_output);
|
||||
|
||||
builder.AddDequantizeLinearNode<OutputType>(q_output, .039f,
|
||||
(OutputLimits::min() + OutputLimits::max()) / 2 + 1,
|
||||
output_arg);
|
||||
};
|
||||
}
|
||||
|
||||
} // namespace test
|
||||
} // namespace onnxruntime
|
||||
|
|
@ -21,6 +21,8 @@
|
|||
#include "gtest/gtest.h"
|
||||
#include "graph_transform_test_builder.h"
|
||||
|
||||
#include "qdq_test_utils.h"
|
||||
|
||||
#if defined(_MSC_VER)
|
||||
#pragma warning(disable : 4127)
|
||||
#endif // #if defined(_MSC_VER)
|
||||
|
|
@ -32,76 +34,11 @@
|
|||
namespace onnxruntime {
|
||||
namespace test {
|
||||
|
||||
template <typename T>
|
||||
typename std::enable_if<IsTypeQuantLinearCompatible<T>::value, NodeArg*>::type
|
||||
AddQDQNodePair(ModelTestBuilder& builder, NodeArg* q_input, float scale, T zp) {
|
||||
auto* q_output = builder.MakeIntermediate();
|
||||
auto* dq_output = builder.MakeIntermediate();
|
||||
builder.AddQuantizeLinearNode<T>(q_input, scale, zp, q_output);
|
||||
builder.AddDequantizeLinearNode<T>(q_output, scale, zp, dq_output);
|
||||
return dq_output;
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
typename std::enable_if<IsTypeQuantLinearCompatible<T>::value, NodeArg*>::type
|
||||
AddQDQNodePair(ModelTestBuilder& builder, NodeArg* q_input, float scale) {
|
||||
auto* q_output = builder.MakeIntermediate();
|
||||
auto* dq_output = builder.MakeIntermediate();
|
||||
builder.AddQuantizeLinearNode(q_input, scale, q_output);
|
||||
builder.AddDequantizeLinearNode<T>(q_output, scale, dq_output);
|
||||
return dq_output;
|
||||
}
|
||||
|
||||
#ifndef DISABLE_CONTRIB_OPS
|
||||
|
||||
template <typename InputType, typename WeightType, typename BiasType, typename OutputType>
|
||||
void QDQTransformerConvTests() {
|
||||
auto test_case = [&](const std::vector<int64_t>& input_shape, const std::vector<int64_t>& weights_shape) {
|
||||
auto build_test_case = [&](ModelTestBuilder& builder) {
|
||||
auto* input_arg = builder.MakeInput<float>(input_shape, -1.f, 1.f);
|
||||
auto* output_arg = builder.MakeOutput();
|
||||
|
||||
typedef std::numeric_limits<InputType> InputLimits;
|
||||
typedef std::numeric_limits<WeightType> WeightLimits;
|
||||
typedef std::numeric_limits<OutputType> OutputLimits;
|
||||
|
||||
InputType input_min_value = InputLimits::min();
|
||||
InputType input_max_value = InputLimits::max();
|
||||
|
||||
WeightType weight_min_value = WeightLimits::min();
|
||||
WeightType weight_max_value = WeightLimits::max();
|
||||
if (std::is_same<WeightType, int8_t>::value) {
|
||||
weight_min_value /= 2;
|
||||
weight_max_value /= 2;
|
||||
}
|
||||
|
||||
auto* dq_w_output = builder.MakeIntermediate();
|
||||
auto* weight = builder.MakeInitializer<WeightType>(weights_shape, weight_min_value, weight_max_value);
|
||||
builder.AddDequantizeLinearNode<WeightType>(weight, .03f,
|
||||
(weight_min_value + weight_max_value) / 2 + 1,
|
||||
dq_w_output);
|
||||
|
||||
auto* dq_bias_output = builder.MakeIntermediate();
|
||||
auto* bias = builder.MakeInitializer<BiasType>({weights_shape[0]}, static_cast<BiasType>(0), static_cast<BiasType>(127));
|
||||
builder.AddDequantizeLinearNode<BiasType>(bias, .0012f,
|
||||
0,
|
||||
dq_bias_output);
|
||||
|
||||
auto* conv_output = builder.MakeIntermediate();
|
||||
auto* dq_output = AddQDQNodePair<InputType>(builder, input_arg, .04f,
|
||||
(input_min_value + input_max_value) / 2 + 1);
|
||||
builder.AddNode("Conv", {dq_output, dq_w_output, dq_bias_output}, {conv_output});
|
||||
|
||||
auto* q_output = builder.MakeIntermediate();
|
||||
builder.AddQuantizeLinearNode<OutputType>(conv_output, .039f,
|
||||
(OutputLimits::min() + OutputLimits::max()) / 2 + 1,
|
||||
q_output);
|
||||
|
||||
builder.AddDequantizeLinearNode<OutputType>(q_output, .039f,
|
||||
(OutputLimits::min() + OutputLimits::max()) / 2 + 1,
|
||||
output_arg);
|
||||
};
|
||||
|
||||
auto check_conv_graph = [&](InferenceSessionWrapper& session) {
|
||||
auto op_to_count = CountOpsInGraph(session.GetGraph());
|
||||
if constexpr (std::is_same<InputType, OutputType>::value &&
|
||||
|
|
@ -119,7 +56,7 @@ void QDQTransformerConvTests() {
|
|||
}
|
||||
};
|
||||
|
||||
TransformerTester(build_test_case,
|
||||
TransformerTester(BuildQDQConvTestCase<InputType, WeightType, BiasType, OutputType>(input_shape, weights_shape),
|
||||
check_conv_graph,
|
||||
TransformerLevel::Level1,
|
||||
TransformerLevel::Level2,
|
||||
|
|
|
|||
|
|
@ -26,6 +26,10 @@
|
|||
#include "gtest/gtest.h"
|
||||
#include "gmock/gmock.h"
|
||||
|
||||
#if !defined(ORT_MINIMAL_BUILD)
|
||||
#include "test/optimizer/qdq_test_utils.h"
|
||||
#endif
|
||||
|
||||
using namespace std;
|
||||
using namespace ONNX_NAMESPACE;
|
||||
using namespace ::onnxruntime::logging;
|
||||
|
|
@ -250,7 +254,31 @@ TEST(NnapiExecutionProviderTest, TestQDQConvModel) {
|
|||
<< "No nodes should have been taken by the NNAPI EP";
|
||||
}
|
||||
|
||||
#endif // !(ORT_MINIMAL_BUILD
|
||||
#if defined(__ANDROID__)
|
||||
TEST(NnapiExecutionProviderTest, TestQDQModel) {
|
||||
onnxruntime::Model model("nnapi_qdq_test_graph", false, DefaultLoggingManager().DefaultLogger());
|
||||
Graph& graph = model.MainGraph();
|
||||
ModelTestBuilder helper(graph);
|
||||
|
||||
auto build_test_case = BuildQDQConvTestCase<uint8_t, uint8_t, int32_t, uint8_t>({1, 1, 5, 5} /*input_shape*/,
|
||||
{1, 1, 3, 3} /*weights_shape*/);
|
||||
build_test_case(helper);
|
||||
helper.SetGraphOutputs();
|
||||
ASSERT_STATUS_OK(model.MainGraph().Resolve());
|
||||
|
||||
// Serialize the model to a string.
|
||||
std::string model_data;
|
||||
model.ToProto().SerializeToString(&model_data);
|
||||
|
||||
RunAndVerifyOutputsWithEP(model_data, "NnapiExecutionProviderTest.TestQDQModel",
|
||||
std::make_unique<NnapiExecutionProvider>(0),
|
||||
helper.feeds_);
|
||||
|
||||
// TODO: can add test load only verfication here later
|
||||
}
|
||||
#endif // defined(__ANDROID__)
|
||||
|
||||
#endif // !(ORT_MINIMAL_BUILD)
|
||||
|
||||
TEST(NnapiExecutionProviderTest, NNAPIFlagsTest) {
|
||||
uint32_t nnapi_flags = NNAPI_FLAG_USE_NONE;
|
||||
|
|
|
|||
|
|
@ -24,5 +24,12 @@ void RunAndVerifyOutputsWithEP(const ORTCHAR_T* model_path,
|
|||
const char* log_id,
|
||||
std::unique_ptr<IExecutionProvider> execution_provider,
|
||||
const NameMLValMap& feeds);
|
||||
|
||||
// helper function that takes in model_data
|
||||
// used in nnapi qdq model tests
|
||||
void RunAndVerifyOutputsWithEP(const std::string& model_data,
|
||||
const char* log_id,
|
||||
std::unique_ptr<IExecutionProvider> execution_provider,
|
||||
const NameMLValMap& feeds);
|
||||
} // namespace test
|
||||
} // namespace onnxruntime
|
||||
|
|
|
|||
|
|
@ -69,6 +69,15 @@ int CountAssignedNodes(const Graph& current_graph, const std::string& ep_type) {
|
|||
void RunAndVerifyOutputsWithEP(const ORTCHAR_T* model_path, const char* log_id,
|
||||
std::unique_ptr<IExecutionProvider> execution_provider,
|
||||
const NameMLValMap& feeds) {
|
||||
// read raw data from model provided by the model_path
|
||||
std::ifstream stream(model_path, std::ios::in | std::ios::binary);
|
||||
std::string model_data((std::istreambuf_iterator<char>(stream)), std::istreambuf_iterator<char>());
|
||||
RunAndVerifyOutputsWithEP(model_data, log_id, std::move(execution_provider), feeds);
|
||||
}
|
||||
|
||||
void RunAndVerifyOutputsWithEP(const std::string& model_data, const char* log_id,
|
||||
std::unique_ptr<IExecutionProvider> execution_provider,
|
||||
const NameMLValMap& feeds) {
|
||||
SessionOptions so;
|
||||
so.session_logid = log_id;
|
||||
RunOptions run_options;
|
||||
|
|
@ -78,7 +87,7 @@ void RunAndVerifyOutputsWithEP(const ORTCHAR_T* model_path, const char* log_id,
|
|||
// get expected output from CPU EP
|
||||
//
|
||||
InferenceSessionWrapper session_object{so, GetEnvironment()};
|
||||
ASSERT_STATUS_OK(session_object.Load(model_path));
|
||||
ASSERT_STATUS_OK(session_object.Load(model_data.data(), static_cast<int>(model_data.size())));
|
||||
ASSERT_STATUS_OK(session_object.Initialize());
|
||||
|
||||
const auto& graph = session_object.GetGraph();
|
||||
|
|
@ -103,13 +112,13 @@ void RunAndVerifyOutputsWithEP(const ORTCHAR_T* model_path, const char* log_id,
|
|||
//
|
||||
InferenceSessionWrapper session_object2{so, GetEnvironment()};
|
||||
ASSERT_STATUS_OK(session_object2.RegisterExecutionProvider(std::move(execution_provider)));
|
||||
ASSERT_STATUS_OK(session_object2.Load(model_path));
|
||||
ASSERT_STATUS_OK(session_object2.Load(model_data.data(), static_cast<int>(model_data.size())));
|
||||
ASSERT_STATUS_OK(session_object2.Initialize());
|
||||
|
||||
// make sure that some nodes are assigned to the EP, otherwise this test is pointless...
|
||||
const auto& graph2 = session_object2.GetGraph();
|
||||
auto ep_nodes = CountAssignedNodes(graph2, provider_type);
|
||||
ASSERT_GT(ep_nodes, 0) << "No nodes were assigned to " << provider_type << " for " << model_path;
|
||||
ASSERT_GT(ep_nodes, 0) << "No nodes were assigned to " << provider_type;
|
||||
|
||||
// Run with EP and verify the result
|
||||
std::vector<OrtValue> fetches;
|
||||
|
|
@ -178,7 +187,7 @@ void SparseIndicesChecker(const ONNX_NAMESPACE::TensorProto& indices_proto, gsl:
|
|||
ASSERT_THAT(ind_span, testing::ContainerEq(expected_indicies));
|
||||
}
|
||||
|
||||
#endif // DISABLE_SPARSE_TENSORS
|
||||
#endif // DISABLE_SPARSE_TENSORS
|
||||
|
||||
} // namespace test
|
||||
} // namespace onnxruntime
|
||||
|
|
|
|||
Loading…
Reference in a new issue