Add sample qdq unit test case for nnapi ep qdq integration (#10358)

* add sample unit test case and make qdq modeltestubuilder shared

* update

* address pr comments

* modify redundant funcs impl

* update

* update

* address pr comments

* update

* update

* update

* fix build breaks

* minor update

* fix bad_alloc in UT

* address pr comments

Co-authored-by: rachguo <rachguo@rachguos-Mini.attlocal.net>
Co-authored-by: Guoyu Wang <wanggy@outlook.com>
This commit is contained in:
Rachel Guo 2022-01-27 15:10:41 -08:00 committed by GitHub
parent 0e951d7d6b
commit ff2057a817
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
5 changed files with 134 additions and 71 deletions

View file

@ -0,0 +1,82 @@
// Copyright (c) Microsoft Corporation. All rights reserved.
// Licensed under the MIT License.
#include "graph_transform_test_builder.h"
#include "core/optimizer/qdq_transformer/selectors_actions/qdq_selector_action_transformer.h"
#include "core/session/inference_session.h"
#include "test/util/include/asserts.h"
#include "test/util/include/inference_session_wrapper.h"
namespace onnxruntime {
namespace test {
using GetQDQConvTestCaseFn = std::function<void(ModelTestBuilder& builder)>;
template <typename T>
typename std::enable_if<IsTypeQuantLinearCompatible<T>::value, NodeArg*>::type
AddQDQNodePair(ModelTestBuilder& builder, NodeArg* q_input, float scale, T zp = T()) {
auto* q_output = builder.MakeIntermediate();
auto* dq_output = builder.MakeIntermediate();
builder.AddQuantizeLinearNode<T>(q_input, scale, zp, q_output);
builder.AddDequantizeLinearNode<T>(q_output, scale, zp, dq_output);
return dq_output;
}
// TODO: for now it just builds a conv qdq graph.
// can be modified and made it shared among different qdq test graphs associated with other operators
template <typename InputType, typename WeightType, typename BiasType, typename OutputType>
GetQDQConvTestCaseFn BuildQDQConvTestCase(const std::vector<int64_t>& input_shape, const std::vector<int64_t>& weights_shape) {
return [input_shape, weights_shape](ModelTestBuilder& builder) {
auto* input_arg = builder.MakeInput<float>(input_shape, -1.f, 1.f);
auto* output_arg = builder.MakeOutput();
using InputLimits = std::numeric_limits<InputType>;
using WeightLimits = std::numeric_limits<WeightType>;
using OutputLimits = std::numeric_limits<OutputType>;
InputType input_min_value = InputLimits::min();
InputType input_max_value = InputLimits::max();
WeightType weight_min_value = WeightLimits::min();
WeightType weight_max_value = WeightLimits::max();
// the reason that we reduce weight range by half for int8 weight type comes from the case when
// running on cpu, MLAS kernel will overflow for uint8 activation and int8 weight with avx2 and avx512 extension
// reduced weight range can prevent the overflow.
if constexpr (std::is_same<WeightType, int8_t>::value) {
weight_min_value /= 2;
weight_max_value /= 2;
}
auto* dq_w_output = builder.MakeIntermediate();
auto* weight = builder.MakeInitializer<WeightType>(weights_shape, weight_min_value, weight_max_value);
builder.AddDequantizeLinearNode<WeightType>(weight, .03f,
(weight_min_value + weight_max_value) / 2 + 1,
dq_w_output);
auto* dq_bias_output = builder.MakeIntermediate();
auto* bias = builder.MakeInitializer<BiasType>({weights_shape[0]}, static_cast<BiasType>(0), static_cast<BiasType>(127));
builder.AddDequantizeLinearNode<BiasType>(bias, .0012f,
0,
dq_bias_output);
auto* conv_output = builder.MakeIntermediate();
auto* dq_output = AddQDQNodePair<InputType>(builder, input_arg, .04f,
(input_min_value + input_max_value) / 2 + 1);
builder.AddNode("Conv", {dq_output, dq_w_output, dq_bias_output}, {conv_output});
auto* q_output = builder.MakeIntermediate();
builder.AddQuantizeLinearNode<OutputType>(conv_output, .039f,
(OutputLimits::min() + OutputLimits::max()) / 2 + 1,
q_output);
builder.AddDequantizeLinearNode<OutputType>(q_output, .039f,
(OutputLimits::min() + OutputLimits::max()) / 2 + 1,
output_arg);
};
}
} // namespace test
} // namespace onnxruntime

View file

@ -21,6 +21,8 @@
#include "gtest/gtest.h"
#include "graph_transform_test_builder.h"
#include "qdq_test_utils.h"
#if defined(_MSC_VER)
#pragma warning(disable : 4127)
#endif // #if defined(_MSC_VER)
@ -32,76 +34,11 @@
namespace onnxruntime {
namespace test {
template <typename T>
typename std::enable_if<IsTypeQuantLinearCompatible<T>::value, NodeArg*>::type
AddQDQNodePair(ModelTestBuilder& builder, NodeArg* q_input, float scale, T zp) {
auto* q_output = builder.MakeIntermediate();
auto* dq_output = builder.MakeIntermediate();
builder.AddQuantizeLinearNode<T>(q_input, scale, zp, q_output);
builder.AddDequantizeLinearNode<T>(q_output, scale, zp, dq_output);
return dq_output;
}
template <typename T>
typename std::enable_if<IsTypeQuantLinearCompatible<T>::value, NodeArg*>::type
AddQDQNodePair(ModelTestBuilder& builder, NodeArg* q_input, float scale) {
auto* q_output = builder.MakeIntermediate();
auto* dq_output = builder.MakeIntermediate();
builder.AddQuantizeLinearNode(q_input, scale, q_output);
builder.AddDequantizeLinearNode<T>(q_output, scale, dq_output);
return dq_output;
}
#ifndef DISABLE_CONTRIB_OPS
template <typename InputType, typename WeightType, typename BiasType, typename OutputType>
void QDQTransformerConvTests() {
auto test_case = [&](const std::vector<int64_t>& input_shape, const std::vector<int64_t>& weights_shape) {
auto build_test_case = [&](ModelTestBuilder& builder) {
auto* input_arg = builder.MakeInput<float>(input_shape, -1.f, 1.f);
auto* output_arg = builder.MakeOutput();
typedef std::numeric_limits<InputType> InputLimits;
typedef std::numeric_limits<WeightType> WeightLimits;
typedef std::numeric_limits<OutputType> OutputLimits;
InputType input_min_value = InputLimits::min();
InputType input_max_value = InputLimits::max();
WeightType weight_min_value = WeightLimits::min();
WeightType weight_max_value = WeightLimits::max();
if (std::is_same<WeightType, int8_t>::value) {
weight_min_value /= 2;
weight_max_value /= 2;
}
auto* dq_w_output = builder.MakeIntermediate();
auto* weight = builder.MakeInitializer<WeightType>(weights_shape, weight_min_value, weight_max_value);
builder.AddDequantizeLinearNode<WeightType>(weight, .03f,
(weight_min_value + weight_max_value) / 2 + 1,
dq_w_output);
auto* dq_bias_output = builder.MakeIntermediate();
auto* bias = builder.MakeInitializer<BiasType>({weights_shape[0]}, static_cast<BiasType>(0), static_cast<BiasType>(127));
builder.AddDequantizeLinearNode<BiasType>(bias, .0012f,
0,
dq_bias_output);
auto* conv_output = builder.MakeIntermediate();
auto* dq_output = AddQDQNodePair<InputType>(builder, input_arg, .04f,
(input_min_value + input_max_value) / 2 + 1);
builder.AddNode("Conv", {dq_output, dq_w_output, dq_bias_output}, {conv_output});
auto* q_output = builder.MakeIntermediate();
builder.AddQuantizeLinearNode<OutputType>(conv_output, .039f,
(OutputLimits::min() + OutputLimits::max()) / 2 + 1,
q_output);
builder.AddDequantizeLinearNode<OutputType>(q_output, .039f,
(OutputLimits::min() + OutputLimits::max()) / 2 + 1,
output_arg);
};
auto check_conv_graph = [&](InferenceSessionWrapper& session) {
auto op_to_count = CountOpsInGraph(session.GetGraph());
if constexpr (std::is_same<InputType, OutputType>::value &&
@ -119,7 +56,7 @@ void QDQTransformerConvTests() {
}
};
TransformerTester(build_test_case,
TransformerTester(BuildQDQConvTestCase<InputType, WeightType, BiasType, OutputType>(input_shape, weights_shape),
check_conv_graph,
TransformerLevel::Level1,
TransformerLevel::Level2,

View file

@ -26,6 +26,10 @@
#include "gtest/gtest.h"
#include "gmock/gmock.h"
#if !defined(ORT_MINIMAL_BUILD)
#include "test/optimizer/qdq_test_utils.h"
#endif
using namespace std;
using namespace ONNX_NAMESPACE;
using namespace ::onnxruntime::logging;
@ -250,7 +254,31 @@ TEST(NnapiExecutionProviderTest, TestQDQConvModel) {
<< "No nodes should have been taken by the NNAPI EP";
}
#endif // !(ORT_MINIMAL_BUILD
#if defined(__ANDROID__)
TEST(NnapiExecutionProviderTest, TestQDQModel) {
onnxruntime::Model model("nnapi_qdq_test_graph", false, DefaultLoggingManager().DefaultLogger());
Graph& graph = model.MainGraph();
ModelTestBuilder helper(graph);
auto build_test_case = BuildQDQConvTestCase<uint8_t, uint8_t, int32_t, uint8_t>({1, 1, 5, 5} /*input_shape*/,
{1, 1, 3, 3} /*weights_shape*/);
build_test_case(helper);
helper.SetGraphOutputs();
ASSERT_STATUS_OK(model.MainGraph().Resolve());
// Serialize the model to a string.
std::string model_data;
model.ToProto().SerializeToString(&model_data);
RunAndVerifyOutputsWithEP(model_data, "NnapiExecutionProviderTest.TestQDQModel",
std::make_unique<NnapiExecutionProvider>(0),
helper.feeds_);
// TODO: can add test load only verfication here later
}
#endif // defined(__ANDROID__)
#endif // !(ORT_MINIMAL_BUILD)
TEST(NnapiExecutionProviderTest, NNAPIFlagsTest) {
uint32_t nnapi_flags = NNAPI_FLAG_USE_NONE;

View file

@ -24,5 +24,12 @@ void RunAndVerifyOutputsWithEP(const ORTCHAR_T* model_path,
const char* log_id,
std::unique_ptr<IExecutionProvider> execution_provider,
const NameMLValMap& feeds);
// helper function that takes in model_data
// used in nnapi qdq model tests
void RunAndVerifyOutputsWithEP(const std::string& model_data,
const char* log_id,
std::unique_ptr<IExecutionProvider> execution_provider,
const NameMLValMap& feeds);
} // namespace test
} // namespace onnxruntime

View file

@ -69,6 +69,15 @@ int CountAssignedNodes(const Graph& current_graph, const std::string& ep_type) {
void RunAndVerifyOutputsWithEP(const ORTCHAR_T* model_path, const char* log_id,
std::unique_ptr<IExecutionProvider> execution_provider,
const NameMLValMap& feeds) {
// read raw data from model provided by the model_path
std::ifstream stream(model_path, std::ios::in | std::ios::binary);
std::string model_data((std::istreambuf_iterator<char>(stream)), std::istreambuf_iterator<char>());
RunAndVerifyOutputsWithEP(model_data, log_id, std::move(execution_provider), feeds);
}
void RunAndVerifyOutputsWithEP(const std::string& model_data, const char* log_id,
std::unique_ptr<IExecutionProvider> execution_provider,
const NameMLValMap& feeds) {
SessionOptions so;
so.session_logid = log_id;
RunOptions run_options;
@ -78,7 +87,7 @@ void RunAndVerifyOutputsWithEP(const ORTCHAR_T* model_path, const char* log_id,
// get expected output from CPU EP
//
InferenceSessionWrapper session_object{so, GetEnvironment()};
ASSERT_STATUS_OK(session_object.Load(model_path));
ASSERT_STATUS_OK(session_object.Load(model_data.data(), static_cast<int>(model_data.size())));
ASSERT_STATUS_OK(session_object.Initialize());
const auto& graph = session_object.GetGraph();
@ -103,13 +112,13 @@ void RunAndVerifyOutputsWithEP(const ORTCHAR_T* model_path, const char* log_id,
//
InferenceSessionWrapper session_object2{so, GetEnvironment()};
ASSERT_STATUS_OK(session_object2.RegisterExecutionProvider(std::move(execution_provider)));
ASSERT_STATUS_OK(session_object2.Load(model_path));
ASSERT_STATUS_OK(session_object2.Load(model_data.data(), static_cast<int>(model_data.size())));
ASSERT_STATUS_OK(session_object2.Initialize());
// make sure that some nodes are assigned to the EP, otherwise this test is pointless...
const auto& graph2 = session_object2.GetGraph();
auto ep_nodes = CountAssignedNodes(graph2, provider_type);
ASSERT_GT(ep_nodes, 0) << "No nodes were assigned to " << provider_type << " for " << model_path;
ASSERT_GT(ep_nodes, 0) << "No nodes were assigned to " << provider_type;
// Run with EP and verify the result
std::vector<OrtValue> fetches;
@ -178,7 +187,7 @@ void SparseIndicesChecker(const ONNX_NAMESPACE::TensorProto& indices_proto, gsl:
ASSERT_THAT(ind_span, testing::ContainerEq(expected_indicies));
}
#endif // DISABLE_SPARSE_TENSORS
#endif // DISABLE_SPARSE_TENSORS
} // namespace test
} // namespace onnxruntime