diff --git a/onnxruntime/test/optimizer/qdq_test_utils.h b/onnxruntime/test/optimizer/qdq_test_utils.h new file mode 100644 index 0000000000..2327ba7485 --- /dev/null +++ b/onnxruntime/test/optimizer/qdq_test_utils.h @@ -0,0 +1,82 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +#include "graph_transform_test_builder.h" + +#include "core/optimizer/qdq_transformer/selectors_actions/qdq_selector_action_transformer.h" +#include "core/session/inference_session.h" + +#include "test/util/include/asserts.h" +#include "test/util/include/inference_session_wrapper.h" + +namespace onnxruntime { +namespace test { + +using GetQDQConvTestCaseFn = std::function; + +template +typename std::enable_if::value, NodeArg*>::type +AddQDQNodePair(ModelTestBuilder& builder, NodeArg* q_input, float scale, T zp = T()) { + auto* q_output = builder.MakeIntermediate(); + auto* dq_output = builder.MakeIntermediate(); + builder.AddQuantizeLinearNode(q_input, scale, zp, q_output); + builder.AddDequantizeLinearNode(q_output, scale, zp, dq_output); + return dq_output; +} + +// TODO: for now it just builds a conv qdq graph. +// can be modified and made it shared among different qdq test graphs associated with other operators +template +GetQDQConvTestCaseFn BuildQDQConvTestCase(const std::vector& input_shape, const std::vector& weights_shape) { + return [input_shape, weights_shape](ModelTestBuilder& builder) { + auto* input_arg = builder.MakeInput(input_shape, -1.f, 1.f); + auto* output_arg = builder.MakeOutput(); + + using InputLimits = std::numeric_limits; + using WeightLimits = std::numeric_limits; + using OutputLimits = std::numeric_limits; + + InputType input_min_value = InputLimits::min(); + InputType input_max_value = InputLimits::max(); + + WeightType weight_min_value = WeightLimits::min(); + WeightType weight_max_value = WeightLimits::max(); + + // the reason that we reduce weight range by half for int8 weight type comes from the case when + // running on cpu, MLAS kernel will overflow for uint8 activation and int8 weight with avx2 and avx512 extension + // reduced weight range can prevent the overflow. + if constexpr (std::is_same::value) { + weight_min_value /= 2; + weight_max_value /= 2; + } + + auto* dq_w_output = builder.MakeIntermediate(); + auto* weight = builder.MakeInitializer(weights_shape, weight_min_value, weight_max_value); + builder.AddDequantizeLinearNode(weight, .03f, + (weight_min_value + weight_max_value) / 2 + 1, + dq_w_output); + + auto* dq_bias_output = builder.MakeIntermediate(); + auto* bias = builder.MakeInitializer({weights_shape[0]}, static_cast(0), static_cast(127)); + builder.AddDequantizeLinearNode(bias, .0012f, + 0, + dq_bias_output); + + auto* conv_output = builder.MakeIntermediate(); + auto* dq_output = AddQDQNodePair(builder, input_arg, .04f, + (input_min_value + input_max_value) / 2 + 1); + builder.AddNode("Conv", {dq_output, dq_w_output, dq_bias_output}, {conv_output}); + + auto* q_output = builder.MakeIntermediate(); + builder.AddQuantizeLinearNode(conv_output, .039f, + (OutputLimits::min() + OutputLimits::max()) / 2 + 1, + q_output); + + builder.AddDequantizeLinearNode(q_output, .039f, + (OutputLimits::min() + OutputLimits::max()) / 2 + 1, + output_arg); + }; +} + +} // namespace test +} // namespace onnxruntime \ No newline at end of file diff --git a/onnxruntime/test/optimizer/qdq_transformer_test.cc b/onnxruntime/test/optimizer/qdq_transformer_test.cc index 9d74b8324b..10d3bec7df 100644 --- a/onnxruntime/test/optimizer/qdq_transformer_test.cc +++ b/onnxruntime/test/optimizer/qdq_transformer_test.cc @@ -21,6 +21,8 @@ #include "gtest/gtest.h" #include "graph_transform_test_builder.h" +#include "qdq_test_utils.h" + #if defined(_MSC_VER) #pragma warning(disable : 4127) #endif // #if defined(_MSC_VER) @@ -32,76 +34,11 @@ namespace onnxruntime { namespace test { -template -typename std::enable_if::value, NodeArg*>::type -AddQDQNodePair(ModelTestBuilder& builder, NodeArg* q_input, float scale, T zp) { - auto* q_output = builder.MakeIntermediate(); - auto* dq_output = builder.MakeIntermediate(); - builder.AddQuantizeLinearNode(q_input, scale, zp, q_output); - builder.AddDequantizeLinearNode(q_output, scale, zp, dq_output); - return dq_output; -} - -template -typename std::enable_if::value, NodeArg*>::type -AddQDQNodePair(ModelTestBuilder& builder, NodeArg* q_input, float scale) { - auto* q_output = builder.MakeIntermediate(); - auto* dq_output = builder.MakeIntermediate(); - builder.AddQuantizeLinearNode(q_input, scale, q_output); - builder.AddDequantizeLinearNode(q_output, scale, dq_output); - return dq_output; -} - #ifndef DISABLE_CONTRIB_OPS template void QDQTransformerConvTests() { auto test_case = [&](const std::vector& input_shape, const std::vector& weights_shape) { - auto build_test_case = [&](ModelTestBuilder& builder) { - auto* input_arg = builder.MakeInput(input_shape, -1.f, 1.f); - auto* output_arg = builder.MakeOutput(); - - typedef std::numeric_limits InputLimits; - typedef std::numeric_limits WeightLimits; - typedef std::numeric_limits OutputLimits; - - InputType input_min_value = InputLimits::min(); - InputType input_max_value = InputLimits::max(); - - WeightType weight_min_value = WeightLimits::min(); - WeightType weight_max_value = WeightLimits::max(); - if (std::is_same::value) { - weight_min_value /= 2; - weight_max_value /= 2; - } - - auto* dq_w_output = builder.MakeIntermediate(); - auto* weight = builder.MakeInitializer(weights_shape, weight_min_value, weight_max_value); - builder.AddDequantizeLinearNode(weight, .03f, - (weight_min_value + weight_max_value) / 2 + 1, - dq_w_output); - - auto* dq_bias_output = builder.MakeIntermediate(); - auto* bias = builder.MakeInitializer({weights_shape[0]}, static_cast(0), static_cast(127)); - builder.AddDequantizeLinearNode(bias, .0012f, - 0, - dq_bias_output); - - auto* conv_output = builder.MakeIntermediate(); - auto* dq_output = AddQDQNodePair(builder, input_arg, .04f, - (input_min_value + input_max_value) / 2 + 1); - builder.AddNode("Conv", {dq_output, dq_w_output, dq_bias_output}, {conv_output}); - - auto* q_output = builder.MakeIntermediate(); - builder.AddQuantizeLinearNode(conv_output, .039f, - (OutputLimits::min() + OutputLimits::max()) / 2 + 1, - q_output); - - builder.AddDequantizeLinearNode(q_output, .039f, - (OutputLimits::min() + OutputLimits::max()) / 2 + 1, - output_arg); - }; - auto check_conv_graph = [&](InferenceSessionWrapper& session) { auto op_to_count = CountOpsInGraph(session.GetGraph()); if constexpr (std::is_same::value && @@ -119,7 +56,7 @@ void QDQTransformerConvTests() { } }; - TransformerTester(build_test_case, + TransformerTester(BuildQDQConvTestCase(input_shape, weights_shape), check_conv_graph, TransformerLevel::Level1, TransformerLevel::Level2, diff --git a/onnxruntime/test/providers/nnapi/nnapi_basic_test.cc b/onnxruntime/test/providers/nnapi/nnapi_basic_test.cc index 2cf6234f84..c5e2c43780 100644 --- a/onnxruntime/test/providers/nnapi/nnapi_basic_test.cc +++ b/onnxruntime/test/providers/nnapi/nnapi_basic_test.cc @@ -26,6 +26,10 @@ #include "gtest/gtest.h" #include "gmock/gmock.h" +#if !defined(ORT_MINIMAL_BUILD) +#include "test/optimizer/qdq_test_utils.h" +#endif + using namespace std; using namespace ONNX_NAMESPACE; using namespace ::onnxruntime::logging; @@ -250,7 +254,31 @@ TEST(NnapiExecutionProviderTest, TestQDQConvModel) { << "No nodes should have been taken by the NNAPI EP"; } -#endif // !(ORT_MINIMAL_BUILD +#if defined(__ANDROID__) +TEST(NnapiExecutionProviderTest, TestQDQModel) { + onnxruntime::Model model("nnapi_qdq_test_graph", false, DefaultLoggingManager().DefaultLogger()); + Graph& graph = model.MainGraph(); + ModelTestBuilder helper(graph); + + auto build_test_case = BuildQDQConvTestCase({1, 1, 5, 5} /*input_shape*/, + {1, 1, 3, 3} /*weights_shape*/); + build_test_case(helper); + helper.SetGraphOutputs(); + ASSERT_STATUS_OK(model.MainGraph().Resolve()); + + // Serialize the model to a string. + std::string model_data; + model.ToProto().SerializeToString(&model_data); + + RunAndVerifyOutputsWithEP(model_data, "NnapiExecutionProviderTest.TestQDQModel", + std::make_unique(0), + helper.feeds_); + + // TODO: can add test load only verfication here later +} +#endif // defined(__ANDROID__) + +#endif // !(ORT_MINIMAL_BUILD) TEST(NnapiExecutionProviderTest, NNAPIFlagsTest) { uint32_t nnapi_flags = NNAPI_FLAG_USE_NONE; diff --git a/onnxruntime/test/util/include/test_utils.h b/onnxruntime/test/util/include/test_utils.h index 388db559fc..60d2a3e8ca 100644 --- a/onnxruntime/test/util/include/test_utils.h +++ b/onnxruntime/test/util/include/test_utils.h @@ -24,5 +24,12 @@ void RunAndVerifyOutputsWithEP(const ORTCHAR_T* model_path, const char* log_id, std::unique_ptr execution_provider, const NameMLValMap& feeds); + +// helper function that takes in model_data +// used in nnapi qdq model tests +void RunAndVerifyOutputsWithEP(const std::string& model_data, + const char* log_id, + std::unique_ptr execution_provider, + const NameMLValMap& feeds); } // namespace test } // namespace onnxruntime diff --git a/onnxruntime/test/util/test_utils.cc b/onnxruntime/test/util/test_utils.cc index 1ccfb359fc..476fed4282 100644 --- a/onnxruntime/test/util/test_utils.cc +++ b/onnxruntime/test/util/test_utils.cc @@ -69,6 +69,15 @@ int CountAssignedNodes(const Graph& current_graph, const std::string& ep_type) { void RunAndVerifyOutputsWithEP(const ORTCHAR_T* model_path, const char* log_id, std::unique_ptr execution_provider, const NameMLValMap& feeds) { + // read raw data from model provided by the model_path + std::ifstream stream(model_path, std::ios::in | std::ios::binary); + std::string model_data((std::istreambuf_iterator(stream)), std::istreambuf_iterator()); + RunAndVerifyOutputsWithEP(model_data, log_id, std::move(execution_provider), feeds); +} + +void RunAndVerifyOutputsWithEP(const std::string& model_data, const char* log_id, + std::unique_ptr execution_provider, + const NameMLValMap& feeds) { SessionOptions so; so.session_logid = log_id; RunOptions run_options; @@ -78,7 +87,7 @@ void RunAndVerifyOutputsWithEP(const ORTCHAR_T* model_path, const char* log_id, // get expected output from CPU EP // InferenceSessionWrapper session_object{so, GetEnvironment()}; - ASSERT_STATUS_OK(session_object.Load(model_path)); + ASSERT_STATUS_OK(session_object.Load(model_data.data(), static_cast(model_data.size()))); ASSERT_STATUS_OK(session_object.Initialize()); const auto& graph = session_object.GetGraph(); @@ -103,13 +112,13 @@ void RunAndVerifyOutputsWithEP(const ORTCHAR_T* model_path, const char* log_id, // InferenceSessionWrapper session_object2{so, GetEnvironment()}; ASSERT_STATUS_OK(session_object2.RegisterExecutionProvider(std::move(execution_provider))); - ASSERT_STATUS_OK(session_object2.Load(model_path)); + ASSERT_STATUS_OK(session_object2.Load(model_data.data(), static_cast(model_data.size()))); ASSERT_STATUS_OK(session_object2.Initialize()); // make sure that some nodes are assigned to the EP, otherwise this test is pointless... const auto& graph2 = session_object2.GetGraph(); auto ep_nodes = CountAssignedNodes(graph2, provider_type); - ASSERT_GT(ep_nodes, 0) << "No nodes were assigned to " << provider_type << " for " << model_path; + ASSERT_GT(ep_nodes, 0) << "No nodes were assigned to " << provider_type; // Run with EP and verify the result std::vector fetches; @@ -178,7 +187,7 @@ void SparseIndicesChecker(const ONNX_NAMESPACE::TensorProto& indices_proto, gsl: ASSERT_THAT(ind_span, testing::ContainerEq(expected_indicies)); } -#endif // DISABLE_SPARSE_TENSORS +#endif // DISABLE_SPARSE_TENSORS } // namespace test } // namespace onnxruntime