diff --git a/onnxruntime/test/optimizer/qdq_test_utils.h b/onnxruntime/test/optimizer/qdq_test_utils.h
new file mode 100644
index 0000000000..2327ba7485
--- /dev/null
+++ b/onnxruntime/test/optimizer/qdq_test_utils.h
@@ -0,0 +1,82 @@
+// Copyright (c) Microsoft Corporation. All rights reserved.
+// Licensed under the MIT License.
+
+#include "graph_transform_test_builder.h"
+
+#include "core/optimizer/qdq_transformer/selectors_actions/qdq_selector_action_transformer.h"
+#include "core/session/inference_session.h"
+
+#include "test/util/include/asserts.h"
+#include "test/util/include/inference_session_wrapper.h"
+
+namespace onnxruntime {
+namespace test {
+
+using GetQDQConvTestCaseFn = std::function<void(ModelTestBuilder& builder)>;
+
+template <typename T>
+typename std::enable_if<IsTypeQuantLinearCompatible<T>::value, NodeArg*>::type
+AddQDQNodePair(ModelTestBuilder& builder, NodeArg* q_input, float scale, T zp = T()) {
+  auto* q_output = builder.MakeIntermediate();
+  auto* dq_output = builder.MakeIntermediate();
+  builder.AddQuantizeLinearNode<T>(q_input, scale, zp, q_output);
+  builder.AddDequantizeLinearNode<T>(q_output, scale, zp, dq_output);
+  return dq_output;
+}
+
+// TODO: for now it just builds a conv qdq graph.
+// can be modified and made it shared among different qdq test graphs associated with other operators
+template <typename InputType, typename WeightType, typename BiasType, typename OutputType>
+GetQDQConvTestCaseFn BuildQDQConvTestCase(const std::vector<int64_t>& input_shape, const std::vector<int64_t>& weights_shape) {
+  return [input_shape, weights_shape](ModelTestBuilder& builder) {
+    auto* input_arg = builder.MakeInput<float>(input_shape, -1.f, 1.f);
+    auto* output_arg = builder.MakeOutput();
+
+    using InputLimits = std::numeric_limits<InputType>;
+    using WeightLimits = std::numeric_limits<WeightType>;
+    using OutputLimits = std::numeric_limits<OutputType>;
+
+    InputType input_min_value = InputLimits::min();
+    InputType input_max_value = InputLimits::max();
+
+    WeightType weight_min_value = WeightLimits::min();
+    WeightType weight_max_value = WeightLimits::max();
+
+    // the reason that we reduce weight range by half for int8 weight type comes from the case when
+    // running on cpu, MLAS kernel will overflow for uint8 activation and int8 weight with avx2 and avx512 extension
+    // reduced weight range can prevent the overflow.
+    if constexpr (std::is_same<WeightType, int8_t>::value) {
+      weight_min_value /= 2;
+      weight_max_value /= 2;
+    }
+
+    auto* dq_w_output = builder.MakeIntermediate();
+    auto* weight = builder.MakeInitializer<WeightType>(weights_shape, weight_min_value, weight_max_value);
+    builder.AddDequantizeLinearNode<WeightType>(weight, .03f,
+                                                (weight_min_value + weight_max_value) / 2 + 1,
+                                                dq_w_output);
+
+    auto* dq_bias_output = builder.MakeIntermediate();
+    auto* bias = builder.MakeInitializer<BiasType>({weights_shape[0]}, static_cast<BiasType>(0), static_cast<BiasType>(127));
+    builder.AddDequantizeLinearNode<BiasType>(bias, .0012f,
+                                              0,
+                                              dq_bias_output);
+
+    auto* conv_output = builder.MakeIntermediate();
+    auto* dq_output = AddQDQNodePair<InputType>(builder, input_arg, .04f,
+                                                (input_min_value + input_max_value) / 2 + 1);
+    builder.AddNode("Conv", {dq_output, dq_w_output, dq_bias_output}, {conv_output});
+
+    auto* q_output = builder.MakeIntermediate();
+    builder.AddQuantizeLinearNode<OutputType>(conv_output, .039f,
+                                              (OutputLimits::min() + OutputLimits::max()) / 2 + 1,
+                                              q_output);
+
+    builder.AddDequantizeLinearNode<OutputType>(q_output, .039f,
+                                                (OutputLimits::min() + OutputLimits::max()) / 2 + 1,
+                                                output_arg);
+  };
+}
+
+}  // namespace test
+}  // namespace onnxruntime
\ No newline at end of file
diff --git a/onnxruntime/test/optimizer/qdq_transformer_test.cc b/onnxruntime/test/optimizer/qdq_transformer_test.cc
index 9d74b8324b..10d3bec7df 100644
--- a/onnxruntime/test/optimizer/qdq_transformer_test.cc
+++ b/onnxruntime/test/optimizer/qdq_transformer_test.cc
@@ -21,6 +21,8 @@
 #include "gtest/gtest.h"
 #include "graph_transform_test_builder.h"
 
+#include "qdq_test_utils.h"
+
 #if defined(_MSC_VER)
 #pragma warning(disable : 4127)
 #endif  // #if defined(_MSC_VER)
@@ -32,76 +34,11 @@
 namespace onnxruntime {
 namespace test {
 
-template <typename T>
-typename std::enable_if<IsTypeQuantLinearCompatible<T>::value, NodeArg*>::type
-AddQDQNodePair(ModelTestBuilder& builder, NodeArg* q_input, float scale, T zp) {
-  auto* q_output = builder.MakeIntermediate();
-  auto* dq_output = builder.MakeIntermediate();
-  builder.AddQuantizeLinearNode<T>(q_input, scale, zp, q_output);
-  builder.AddDequantizeLinearNode<T>(q_output, scale, zp, dq_output);
-  return dq_output;
-}
-
-template <typename T>
-typename std::enable_if<IsTypeQuantLinearCompatible<T>::value, NodeArg*>::type
-AddQDQNodePair(ModelTestBuilder& builder, NodeArg* q_input, float scale) {
-  auto* q_output = builder.MakeIntermediate();
-  auto* dq_output = builder.MakeIntermediate();
-  builder.AddQuantizeLinearNode(q_input, scale, q_output);
-  builder.AddDequantizeLinearNode<T>(q_output, scale, dq_output);
-  return dq_output;
-}
-
 #ifndef DISABLE_CONTRIB_OPS
 
 template <typename InputType, typename WeightType, typename BiasType, typename OutputType>
 void QDQTransformerConvTests() {
   auto test_case = [&](const std::vector<int64_t>& input_shape, const std::vector<int64_t>& weights_shape) {
-    auto build_test_case = [&](ModelTestBuilder& builder) {
-      auto* input_arg = builder.MakeInput<float>(input_shape, -1.f, 1.f);
-      auto* output_arg = builder.MakeOutput();
-
-      typedef std::numeric_limits<InputType> InputLimits;
-      typedef std::numeric_limits<WeightType> WeightLimits;
-      typedef std::numeric_limits<OutputType> OutputLimits;
-
-      InputType input_min_value = InputLimits::min();
-      InputType input_max_value = InputLimits::max();
-
-      WeightType weight_min_value = WeightLimits::min();
-      WeightType weight_max_value = WeightLimits::max();
-      if (std::is_same<WeightType, int8_t>::value) {
-        weight_min_value /= 2;
-        weight_max_value /= 2;
-      }
-
-      auto* dq_w_output = builder.MakeIntermediate();
-      auto* weight = builder.MakeInitializer<WeightType>(weights_shape, weight_min_value, weight_max_value);
-      builder.AddDequantizeLinearNode<WeightType>(weight, .03f,
-                                                  (weight_min_value + weight_max_value) / 2 + 1,
-                                                  dq_w_output);
-
-      auto* dq_bias_output = builder.MakeIntermediate();
-      auto* bias = builder.MakeInitializer<BiasType>({weights_shape[0]}, static_cast<BiasType>(0), static_cast<BiasType>(127));
-      builder.AddDequantizeLinearNode<BiasType>(bias, .0012f,
-                                                0,
-                                                dq_bias_output);
-
-      auto* conv_output = builder.MakeIntermediate();
-      auto* dq_output = AddQDQNodePair<InputType>(builder, input_arg, .04f,
-                                                  (input_min_value + input_max_value) / 2 + 1);
-      builder.AddNode("Conv", {dq_output, dq_w_output, dq_bias_output}, {conv_output});
-
-      auto* q_output = builder.MakeIntermediate();
-      builder.AddQuantizeLinearNode<OutputType>(conv_output, .039f,
-                                                (OutputLimits::min() + OutputLimits::max()) / 2 + 1,
-                                                q_output);
-
-      builder.AddDequantizeLinearNode<OutputType>(q_output, .039f,
-                                                  (OutputLimits::min() + OutputLimits::max()) / 2 + 1,
-                                                  output_arg);
-    };
-
     auto check_conv_graph = [&](InferenceSessionWrapper& session) {
       auto op_to_count = CountOpsInGraph(session.GetGraph());
       if constexpr (std::is_same<InputType, OutputType>::value &&
@@ -119,7 +56,7 @@ void QDQTransformerConvTests() {
       }
     };
 
-    TransformerTester(build_test_case,
+    TransformerTester(BuildQDQConvTestCase<InputType, WeightType, BiasType, OutputType>(input_shape, weights_shape),
                       check_conv_graph,
                       TransformerLevel::Level1,
                       TransformerLevel::Level2,
diff --git a/onnxruntime/test/providers/nnapi/nnapi_basic_test.cc b/onnxruntime/test/providers/nnapi/nnapi_basic_test.cc
index 2cf6234f84..c5e2c43780 100644
--- a/onnxruntime/test/providers/nnapi/nnapi_basic_test.cc
+++ b/onnxruntime/test/providers/nnapi/nnapi_basic_test.cc
@@ -26,6 +26,10 @@
 #include "gtest/gtest.h"
 #include "gmock/gmock.h"
 
+#if !defined(ORT_MINIMAL_BUILD)
+#include "test/optimizer/qdq_test_utils.h"
+#endif
+
 using namespace std;
 using namespace ONNX_NAMESPACE;
 using namespace ::onnxruntime::logging;
@@ -250,7 +254,31 @@ TEST(NnapiExecutionProviderTest, TestQDQConvModel) {
       << "No nodes should have been taken by the NNAPI EP";
 }
 
-#endif  // !(ORT_MINIMAL_BUILD
+#if defined(__ANDROID__)
+TEST(NnapiExecutionProviderTest, TestQDQModel) {
+  onnxruntime::Model model("nnapi_qdq_test_graph", false, DefaultLoggingManager().DefaultLogger());
+  Graph& graph = model.MainGraph();
+  ModelTestBuilder helper(graph);
+
+  auto build_test_case = BuildQDQConvTestCase<uint8_t, uint8_t, int32_t, uint8_t>({1, 1, 5, 5} /*input_shape*/,
+                                                                                  {1, 1, 3, 3} /*weights_shape*/);
+  build_test_case(helper);
+  helper.SetGraphOutputs();
+  ASSERT_STATUS_OK(model.MainGraph().Resolve());
+
+  // Serialize the model to a string.
+  std::string model_data;
+  model.ToProto().SerializeToString(&model_data);
+
+  RunAndVerifyOutputsWithEP(model_data, "NnapiExecutionProviderTest.TestQDQModel",
+                            std::make_unique<NnapiExecutionProvider>(0),
+                            helper.feeds_);
+
+  // TODO: can add test load only verfication here later
+}
+#endif  // defined(__ANDROID__)
+
+#endif  // !(ORT_MINIMAL_BUILD)
 
 TEST(NnapiExecutionProviderTest, NNAPIFlagsTest) {
   uint32_t nnapi_flags = NNAPI_FLAG_USE_NONE;
diff --git a/onnxruntime/test/util/include/test_utils.h b/onnxruntime/test/util/include/test_utils.h
index 388db559fc..60d2a3e8ca 100644
--- a/onnxruntime/test/util/include/test_utils.h
+++ b/onnxruntime/test/util/include/test_utils.h
@@ -24,5 +24,12 @@ void RunAndVerifyOutputsWithEP(const ORTCHAR_T* model_path,
                                const char* log_id,
                                std::unique_ptr<IExecutionProvider> execution_provider,
                                const NameMLValMap& feeds);
+
+// helper function that takes in model_data
+// used in nnapi qdq model tests
+void RunAndVerifyOutputsWithEP(const std::string& model_data,
+                               const char* log_id,
+                               std::unique_ptr<IExecutionProvider> execution_provider,
+                               const NameMLValMap& feeds);
 }  // namespace test
 }  // namespace onnxruntime
diff --git a/onnxruntime/test/util/test_utils.cc b/onnxruntime/test/util/test_utils.cc
index 1ccfb359fc..476fed4282 100644
--- a/onnxruntime/test/util/test_utils.cc
+++ b/onnxruntime/test/util/test_utils.cc
@@ -69,6 +69,15 @@ int CountAssignedNodes(const Graph& current_graph, const std::string& ep_type) {
 void RunAndVerifyOutputsWithEP(const ORTCHAR_T* model_path, const char* log_id,
                                std::unique_ptr<IExecutionProvider> execution_provider,
                                const NameMLValMap& feeds) {
+  // read raw data from model provided by the model_path
+  std::ifstream stream(model_path, std::ios::in | std::ios::binary);
+  std::string model_data((std::istreambuf_iterator<char>(stream)), std::istreambuf_iterator<char>());
+  RunAndVerifyOutputsWithEP(model_data, log_id, std::move(execution_provider), feeds);
+}
+
+void RunAndVerifyOutputsWithEP(const std::string& model_data, const char* log_id,
+                               std::unique_ptr<IExecutionProvider> execution_provider,
+                               const NameMLValMap& feeds) {
   SessionOptions so;
   so.session_logid = log_id;
   RunOptions run_options;
@@ -78,7 +87,7 @@ void RunAndVerifyOutputsWithEP(const ORTCHAR_T* model_path, const char* log_id,
   // get expected output from CPU EP
   //
   InferenceSessionWrapper session_object{so, GetEnvironment()};
-  ASSERT_STATUS_OK(session_object.Load(model_path));
+  ASSERT_STATUS_OK(session_object.Load(model_data.data(), static_cast<int>(model_data.size())));
   ASSERT_STATUS_OK(session_object.Initialize());
 
   const auto& graph = session_object.GetGraph();
@@ -103,13 +112,13 @@ void RunAndVerifyOutputsWithEP(const ORTCHAR_T* model_path, const char* log_id,
   //
   InferenceSessionWrapper session_object2{so, GetEnvironment()};
   ASSERT_STATUS_OK(session_object2.RegisterExecutionProvider(std::move(execution_provider)));
-  ASSERT_STATUS_OK(session_object2.Load(model_path));
+  ASSERT_STATUS_OK(session_object2.Load(model_data.data(), static_cast<int>(model_data.size())));
   ASSERT_STATUS_OK(session_object2.Initialize());
 
   // make sure that some nodes are assigned to the EP, otherwise this test is pointless...
   const auto& graph2 = session_object2.GetGraph();
   auto ep_nodes = CountAssignedNodes(graph2, provider_type);
-  ASSERT_GT(ep_nodes, 0) << "No nodes were assigned to " << provider_type << " for " << model_path;
+  ASSERT_GT(ep_nodes, 0) << "No nodes were assigned to " << provider_type;
 
   // Run with EP and verify the result
   std::vector<OrtValue> fetches;
@@ -178,7 +187,7 @@ void SparseIndicesChecker(const ONNX_NAMESPACE::TensorProto& indices_proto, gsl:
   ASSERT_THAT(ind_span, testing::ContainerEq(expected_indicies));
 }
 
-#endif // DISABLE_SPARSE_TENSORS
+#endif  // DISABLE_SPARSE_TENSORS
 
 }  // namespace test
 }  // namespace onnxruntime