diff --git a/include/onnxruntime/core/session/onnxruntime_c_api.h b/include/onnxruntime/core/session/onnxruntime_c_api.h
index bde27df94e..ab344d367a 100644
--- a/include/onnxruntime/core/session/onnxruntime_c_api.h
+++ b/include/onnxruntime/core/session/onnxruntime_c_api.h
@@ -3660,8 +3660,8 @@ struct OrtApi {
    *     - "1": Enabled.
    *   "offload_graph_io_quantization": Offload graph input quantization and graph output dequantization to another
    *   execution provider (typically CPU EP).
-   *     - "0": Default. Disabled. QNN EP will handle quantization and dequantization of graph I/O.
-   *     - "1": Enabled.
+   *     - "0": Disabled. QNN EP will handle quantization and dequantization of graph I/O.
+   *     - "1": Enabled. This is the default value.
    *
    * SNPE supported keys:
    *   "runtime": SNPE runtime engine, options: "CPU", "CPU_FLOAT32", "GPU", "GPU_FLOAT32_16_HYBRID", "GPU_FLOAT16",
diff --git a/onnxruntime/core/providers/qnn/qnn_execution_provider.cc b/onnxruntime/core/providers/qnn/qnn_execution_provider.cc
index ed193904fe..a7a91d107b 100644
--- a/onnxruntime/core/providers/qnn/qnn_execution_provider.cc
+++ b/onnxruntime/core/providers/qnn/qnn_execution_provider.cc
@@ -377,13 +377,15 @@ QNNExecutionProvider::QNNExecutionProvider(const ProviderOptions& provider_optio
     LOGS_DEFAULT(VERBOSE) << "User specified enable_htp_weight_sharing: " << enable_htp_weight_sharing_;
   }
 
-  model_settings_.offload_graph_io_quantization = ParseBoolOption("offload_graph_io_quantization", false,
+  model_settings_.offload_graph_io_quantization = ParseBoolOption("offload_graph_io_quantization", true,
                                                                   provider_options_map);
 
   if (disable_cpu_ep_fallback_ && model_settings_.offload_graph_io_quantization) {
-    LOGS_DEFAULT(WARNING) << "Fallback to CPU EP is disabled, but user configured QNN EP to offload graph I/O "
-                          << "quantization/dequantization to another EP. Session creation will fail if the CPU EP "
-                          << "handles the graph I/O quantization/dequantization.";
+    LOGS_DEFAULT(INFO) << "Fallback to CPU EP is disabled, but user tried to configure QNN EP to offload graph I/O "
+                       << "quantization/dequantization to another EP. These are conflicting options. Fallback to CPU "
+                       << "EP will remain disabled and graph I/O quantization/dequantization will not be offloaded "
+                       << "to another EP.";
+    model_settings_.offload_graph_io_quantization = false;
   }
 
   qnn_backend_manager_ = std::make_unique<qnn::QnnBackendManager>(
diff --git a/onnxruntime/test/providers/qnn/argmaxmin_op_test.cc b/onnxruntime/test/providers/qnn/argmaxmin_op_test.cc
index c514cf16b2..41de81fd47 100644
--- a/onnxruntime/test/providers/qnn/argmaxmin_op_test.cc
+++ b/onnxruntime/test/providers/qnn/argmaxmin_op_test.cc
@@ -70,6 +70,7 @@ static void RunQDQArgMxxOpTest(const std::string& op_type, TestInputDef<float> i
 #else
   provider_options["backend_path"] = "libQnnHtp.so";
 #endif
+  provider_options["offload_graph_io_quantization"] = "0";
 
   TestQDQModelAccuracy(BuildOpTestCase<float>(op_type, {input_def}, {}, attrs),   // baseline float32 model
                        BuildQDQArgMxxTestCase<QType>(op_type, input_def, attrs),  // QDQ model
diff --git a/onnxruntime/test/providers/qnn/average_pool_test.cc b/onnxruntime/test/providers/qnn/average_pool_test.cc
index 1a0f9bfcba..45d9a4fd2d 100644
--- a/onnxruntime/test/providers/qnn/average_pool_test.cc
+++ b/onnxruntime/test/providers/qnn/average_pool_test.cc
@@ -31,6 +31,7 @@ static void RunAveragePoolOpTest(const std::string& op_type,
 #else
   provider_options["backend_path"] = "libQnnCpu.so";
 #endif
+  provider_options["offload_graph_io_quantization"] = "0";
 
   RunQnnModelTest(BuildOpTestCase<float>(op_type, input_defs, {}, attrs),
                   provider_options,
@@ -53,6 +54,7 @@ static void RunQDQAveragePoolOpTest(const std::string& op_type,
 #else
   provider_options["backend_path"] = "libQnnHtp.so";
 #endif
+  provider_options["offload_graph_io_quantization"] = "0";
 
   TestQDQModelAccuracy(BuildOpTestCase<float>(op_type, input_defs, {}, attrs),
                        BuildQDQOpTestCase<QuantType>(op_type, input_defs, {}, attrs),
diff --git a/onnxruntime/test/providers/qnn/batch_norm_htp_test.cc b/onnxruntime/test/providers/qnn/batch_norm_htp_test.cc
index 0a39413a4e..7471b44faf 100644
--- a/onnxruntime/test/providers/qnn/batch_norm_htp_test.cc
+++ b/onnxruntime/test/providers/qnn/batch_norm_htp_test.cc
@@ -160,6 +160,7 @@ static void RunBatchNormQDQTest(const TestInputDef<float>& input_def,
 #else
   provider_options["backend_path"] = "libQnnHtp.so";
 #endif
+  provider_options["offload_graph_io_quantization"] = "0";
 
   // Runs model with DQ-> InstanceNorm -> Q and compares the outputs of the CPU and QNN EPs.
   TestQDQModelAccuracy(BuildBatchNormTestCase(input_def, scale_def, bias_def),
@@ -180,6 +181,7 @@ static void RunBatchNormFP16Test(const TestInputDef<float>& input_def,
 #else
   provider_options["backend_path"] = "libQnnHtp.so";
 #endif
+  provider_options["offload_graph_io_quantization"] = "0";
 
   TestInputDef<MLFloat16> input_fp16_def = ConvertToFP16InputDef(input_def);
   TestInputDef<MLFloat16> scale_fp16_def = ConvertToFP16InputDef(scale_def);
diff --git a/onnxruntime/test/providers/qnn/cast_test.cc b/onnxruntime/test/providers/qnn/cast_test.cc
index 9b83dd281a..713baab0b7 100644
--- a/onnxruntime/test/providers/qnn/cast_test.cc
+++ b/onnxruntime/test/providers/qnn/cast_test.cc
@@ -57,6 +57,7 @@ static void RunCastOpTest(const std::vector<int64_t>& shape, ONNX_NAMESPACE::Ten
 #else
   provider_options["backend_path"] = use_htp ? "libQnnHtp.so" : "libQnnCpu.so";
 #endif
+  provider_options["offload_graph_io_quantization"] = "0";
 
   if (use_htp && enable_fp16_precision) {
     provider_options["enable_htp_fp16_precision"] = "1";
diff --git a/onnxruntime/test/providers/qnn/clip_op_test.cc b/onnxruntime/test/providers/qnn/clip_op_test.cc
index cfa77a4621..ae169f7b25 100644
--- a/onnxruntime/test/providers/qnn/clip_op_test.cc
+++ b/onnxruntime/test/providers/qnn/clip_op_test.cc
@@ -117,6 +117,7 @@ static void RunQDQClipTestOnHTP(const TestInputDef<float>& input_def,
 #else
   provider_options["backend_path"] = "libQnnHtp.so";
 #endif
+  provider_options["offload_graph_io_quantization"] = "0";
 
   auto f32_model_builder = BuildOpTestCase<float, float>("Clip", {input_def}, {min_max_defs}, {});
   auto qdq_model_builder = BuildQDQOpTestCase<QType, float>("Clip", {input_def}, {min_max_defs}, {},
@@ -205,6 +206,7 @@ TEST_F(QnnHTPBackendTests, Clip_U8_Rank5) {
 #else
   provider_options["backend_path"] = "libQnnHtp.so";
 #endif
+  provider_options["offload_graph_io_quantization"] = "0";
 
   RunQnnModelTest(model_fn,
                   provider_options,
diff --git a/onnxruntime/test/providers/qnn/conv_test.cc b/onnxruntime/test/providers/qnn/conv_test.cc
index cf37fc0033..91677781e8 100644
--- a/onnxruntime/test/providers/qnn/conv_test.cc
+++ b/onnxruntime/test/providers/qnn/conv_test.cc
@@ -93,6 +93,8 @@ static void RunCPUConvOpTest(const std::string& conv_op_type, const TestInputDef
 #else
   provider_options["backend_path"] = "libQnnCpu.so";
 #endif
+  provider_options["offload_graph_io_quantization"] = "0";
+
   auto build_fn = BuildF32ConvTestCase(conv_op_type, input_def, weights_def, bias_def, strides, pads,
                                        dilations, group, auto_pad);
   RunQnnModelTest(build_fn,
@@ -317,6 +319,7 @@ static void RunHTPConvOpTest(const std::string& conv_op_type, const TestInputDef
 #else
   provider_options["backend_path"] = "libQnnHtp.so";
 #endif
+  provider_options["offload_graph_io_quantization"] = "0";
 
   TestQDQModelAccuracy(BuildF32ConvTestCase(conv_op_type, input_def, weights_def, bias_def, strides, pads, dilations,
                                             group, auto_pad, output_activation),
@@ -354,6 +357,7 @@ static void RunHTPConvOpPerChannelTest(const std::string& conv_op_type, const Te
 #else
   provider_options["backend_path"] = "libQnnHtp.so";
 #endif
+  provider_options["offload_graph_io_quantization"] = "0";
 
   auto f32_fn = BuildF32ConvTestCase(conv_op_type, input_def, weights_def, bias_def, strides, pads, dilations,
                                      group, auto_pad, output_activation);
@@ -665,6 +669,7 @@ TEST_F(QnnHTPBackendTests, Test_QDQConvWithDynamicWeightsFromMul) {
 #else
   provider_options["backend_path"] = "libQnnHtp.so";
 #endif
+  provider_options["offload_graph_io_quantization"] = "0";
 
   auto BuildConvMulGraph = [](ModelTestBuilder& builder) {
     // DQ node for Conv input
diff --git a/onnxruntime/test/providers/qnn/flatten_op_test.cc b/onnxruntime/test/providers/qnn/flatten_op_test.cc
index 637d3257dd..6370d4a4f1 100644
--- a/onnxruntime/test/providers/qnn/flatten_op_test.cc
+++ b/onnxruntime/test/providers/qnn/flatten_op_test.cc
@@ -101,6 +101,7 @@ static void RunQDQFlattenTestOnHTP(const TestInputDef<float>& input_def,
 #else
   provider_options["backend_path"] = "libQnnHtp.so";
 #endif
+  provider_options["offload_graph_io_quantization"] = "0";
 
   auto f32_model_builder = BuildOpTestCase<float>("Flatten", {input_def}, {}, attrs);
   auto qdq_model_builder = BuildQDQOpTestCase<QType>("Flatten", {input_def}, {}, attrs, kOnnxDomain, use_contrib_qdq);
@@ -172,6 +173,7 @@ TEST_F(QnnHTPBackendTests, Flatten_QDQ8bit_Rank5) {
 #else
   provider_options["backend_path"] = "libQnnHtp.so";
 #endif
+  provider_options["offload_graph_io_quantization"] = "0";
 
   RunQnnModelTest(model_fn,
                   provider_options,
diff --git a/onnxruntime/test/providers/qnn/gather_elems_op_test.cc b/onnxruntime/test/providers/qnn/gather_elems_op_test.cc
index 81c0887306..68c9867617 100644
--- a/onnxruntime/test/providers/qnn/gather_elems_op_test.cc
+++ b/onnxruntime/test/providers/qnn/gather_elems_op_test.cc
@@ -67,6 +67,7 @@ static void RunCPUGatherElemsOpTest(const TestInputDef<float>& input_def,
 #else
   provider_options["backend_path"] = "libQnnCpu.so";
 #endif
+  provider_options["offload_graph_io_quantization"] = "0";
 
   RunQnnModelTest(BuildOpTestCase<DataType, IndexType>("GatherElements", {input_def}, {indices_def}, attrs),
                   provider_options,
@@ -91,6 +92,7 @@ static void RunHTPQDQGatherElemsOpTest(const TestInputDef<float>& input_def,
 #else
   provider_options["backend_path"] = "libQnnHtp.so";
 #endif
+  provider_options["offload_graph_io_quantization"] = "0";
 
   auto f32_model_builder = BuildOpTestCase<float, IndexType>("GatherElements", {input_def}, {indices_def}, attrs);
   auto qdq_model_builder = BuildQDQGatherElemsTestCase<QuantType, IndexType>(input_def, indices_def, attrs,
@@ -119,6 +121,7 @@ static void RunHTPGatherElemsOpTest(const TestInputDef<DataType>& input_def,
 #else
   provider_options["backend_path"] = "libQnnHtp.so";
 #endif
+  provider_options["offload_graph_io_quantization"] = "0";
 
   RunQnnModelTest(BuildOpTestCase<DataType, IndexType>("GatherElements", {input_def}, {indices_def}, attrs),
                   provider_options,
diff --git a/onnxruntime/test/providers/qnn/gather_op_htp_test.cc b/onnxruntime/test/providers/qnn/gather_op_htp_test.cc
index 55177cc7ed..4478d36ebf 100644
--- a/onnxruntime/test/providers/qnn/gather_op_htp_test.cc
+++ b/onnxruntime/test/providers/qnn/gather_op_htp_test.cc
@@ -63,6 +63,7 @@ static void RunQDQGatherOpTest(const TestInputDef<float>& input_def,
 #else
   provider_options["backend_path"] = "libQnnHtp.so";
 #endif
+  provider_options["offload_graph_io_quantization"] = "0";
 
   auto f32_model_builder = BuildOpTestCase<float, IndicesType>("Gather", {input_def}, {indices_def}, attrs);
   auto qdq_model_builder = BuildQDQGatherTestCase<QuantType, IndicesType>(input_def, indices_def, attrs,
@@ -152,4 +153,4 @@ TEST_F(QnnHTPBackendTests, DISABLED_GatherOp_IndicesStaticInt32_Axis1) {
 }  // namespace test
 }  // namespace onnxruntime
 
-#endif
\ No newline at end of file
+#endif
diff --git a/onnxruntime/test/providers/qnn/gemm_op_test.cc b/onnxruntime/test/providers/qnn/gemm_op_test.cc
index 33c868694c..b2aa6280ef 100644
--- a/onnxruntime/test/providers/qnn/gemm_op_test.cc
+++ b/onnxruntime/test/providers/qnn/gemm_op_test.cc
@@ -29,6 +29,7 @@ static void RunGemmTestOnCPU(const std::vector<TestInputDef<DataType>>& input_de
 #else
   provider_options["backend_path"] = "libQnnCpu.so";
 #endif
+  provider_options["offload_graph_io_quantization"] = "0";
 
   RunQnnModelTest(BuildOpTestCase<float>("Gemm", input_defs, {}, attrs),
                   provider_options,
@@ -246,6 +247,8 @@ static void RunQDQGemmTestOnHTP(const std::vector<TestInputDef<float>>& input_de
 #else
   provider_options["backend_path"] = "libQnnHtp.so";
 #endif
+  provider_options["offload_graph_io_quantization"] = "0";
+
   auto f32_model_builder = BuildOpTestCase<float>("Gemm", input_defs, {}, attrs);
   auto qdq_model_builder = BuildQDQGemmTestCase<InputAQType, InputBQType>(input_defs, attrs, use_contrib_qdq);
   TestQDQModelAccuracy<InputAQType>(f32_model_builder,
diff --git a/onnxruntime/test/providers/qnn/instance_norm_htp_test.cc b/onnxruntime/test/providers/qnn/instance_norm_htp_test.cc
index 3598ba1ac8..d4f66b72e0 100644
--- a/onnxruntime/test/providers/qnn/instance_norm_htp_test.cc
+++ b/onnxruntime/test/providers/qnn/instance_norm_htp_test.cc
@@ -79,6 +79,7 @@ static void RunInstanceNormQDQTest(const TestInputDef<float>& input_def,
 #else
   provider_options["backend_path"] = "libQnnHtp.so";
 #endif
+  provider_options["offload_graph_io_quantization"] = "0";
 
   // Runs model with DQ-> InstanceNorm -> Q and compares the outputs of the CPU and QNN EPs.
   TestQDQModelAccuracy(BuildOpTestCase<float>("InstanceNormalization", {input_def, scale_def, bias_def}, {}, attrs),
diff --git a/onnxruntime/test/providers/qnn/layer_norm_test.cc b/onnxruntime/test/providers/qnn/layer_norm_test.cc
index 947ac19be4..b2997c6278 100644
--- a/onnxruntime/test/providers/qnn/layer_norm_test.cc
+++ b/onnxruntime/test/providers/qnn/layer_norm_test.cc
@@ -28,6 +28,7 @@ static void RunLayerNormCpuTest(const TestInputDef<float>& input_def,
 #else
   provider_options["backend_path"] = "libQnnCpu.so";
 #endif
+  provider_options["offload_graph_io_quantization"] = "0";
 
   RunQnnModelTest(BuildOpTestCase<float>("LayerNormalization", {input_def, scale_def}, {}, attrs),
                   provider_options,
@@ -152,6 +153,7 @@ static void RunLayerNormQDQTest(const TestInputDef<float>& input_def,
 #else
   provider_options["backend_path"] = "libQnnHtp.so";
 #endif
+  provider_options["offload_graph_io_quantization"] = "0";
 
   TestQDQModelAccuracy(BuildOpTestCase<float>("LayerNormalization", {input_def, scale_def}, {}, attrs),
                        BuildQDQLayerNormTestCase<InputQType, ScaleQType>(input_def, scale_def, bias_def, attrs,
diff --git a/onnxruntime/test/providers/qnn/leakyrelu_op_htp_test.cc b/onnxruntime/test/providers/qnn/leakyrelu_op_htp_test.cc
index ece8d91d53..77d96b56d2 100644
--- a/onnxruntime/test/providers/qnn/leakyrelu_op_htp_test.cc
+++ b/onnxruntime/test/providers/qnn/leakyrelu_op_htp_test.cc
@@ -28,6 +28,7 @@ static void RunLeakyReluOpQDQTest(const TestInputDef<float>& input_def,
 #else
   provider_options["backend_path"] = "libQnnHtp.so";
 #endif
+  provider_options["offload_graph_io_quantization"] = "0";
 
   TestQDQModelAccuracy(BuildOpTestCase<float>("LeakyRelu", {input_def}, {}, attrs),
                        BuildQDQOpTestCase<QuantType>("LeakyRelu", {input_def}, {}, attrs),
@@ -66,6 +67,7 @@ TEST_F(QnnHTPBackendTests, LeakyReluFP16OpSet16) {
 #else
   provider_options["backend_path"] = "libQnnHtp.so";
 #endif
+  provider_options["offload_graph_io_quantization"] = "0";
 
   auto input_def = TestInputDef<float>({1, 2, 3}, false, {-40.0f, -20.0f, 1.0f, 10.0f, 30.0f, 40.0f});
   TestInputDef<MLFloat16> input_fp16_def = ConvertToFP16InputDef(input_def);
diff --git a/onnxruntime/test/providers/qnn/logical_comp_ops_test.cc b/onnxruntime/test/providers/qnn/logical_comp_ops_test.cc
index 5910513678..a49fa7c5fc 100644
--- a/onnxruntime/test/providers/qnn/logical_comp_ops_test.cc
+++ b/onnxruntime/test/providers/qnn/logical_comp_ops_test.cc
@@ -73,6 +73,7 @@ static void RunCPULogicalOpTest(const std::string& op_type, const std::vector<in
 #else
   provider_options["backend_path"] = "libQnnCpu.so";
 #endif
+  provider_options["offload_graph_io_quantization"] = "0";
 
   RunQnnModelTest(BuildLogicalOpTestCase(op_type, shape),
                   provider_options,
@@ -92,6 +93,7 @@ static void RunQDQLogicalOpTest(const std::string& op_type, const std::vector<in
 #else
   provider_options["backend_path"] = "libQnnHtp.so";
 #endif
+  provider_options["offload_graph_io_quantization"] = "0";
 
   RunQnnModelTest(BuildQDQLogicalOpTestCase<QuantType>(op_type, shape),
                   provider_options,
@@ -157,6 +159,7 @@ TEST_F(QnnHTPBackendTests, EqualToCast4D) {
 #else
   provider_options["backend_path"] = "libQnnHtp.so";
 #endif
+  provider_options["offload_graph_io_quantization"] = "0";
 
   // Model building function that creates a QDQ graph with an Equal node followed by
   // a Cast to float32.
diff --git a/onnxruntime/test/providers/qnn/lrn_op_test.cc b/onnxruntime/test/providers/qnn/lrn_op_test.cc
index a99cba66bf..8327849a17 100644
--- a/onnxruntime/test/providers/qnn/lrn_op_test.cc
+++ b/onnxruntime/test/providers/qnn/lrn_op_test.cc
@@ -70,6 +70,7 @@ static void RunCPULRNOpTest(const TestInputDef<float>& input_def, int64_t size,
   provider_options["backend_path"] = "libQnnCpu.so";
   fp32_abs_err = 1.5e-5f;  // On linux we need slightly larger tolerance.
 #endif
+  provider_options["offload_graph_io_quantization"] = "0";
 
   RunQnnModelTest(BuildLRNTestCase(input_def, size, alpha, beta, bias),
                   provider_options,
@@ -91,6 +92,7 @@ static void RunQDQLRNOpTest(const TestInputDef<float>& input_def, int64_t size,
 #else
   provider_options["backend_path"] = "libQnnHtp.so";
 #endif
+  provider_options["offload_graph_io_quantization"] = "0";
 
   TestQDQModelAccuracy(BuildLRNTestCase(input_def, size, alpha, beta, bias),
                        BuildQDQLRNTestCase<QuantType>(input_def, size, alpha, beta, bias),
diff --git a/onnxruntime/test/providers/qnn/matmul_test.cpp b/onnxruntime/test/providers/qnn/matmul_test.cpp
index 5c6967761b..24a74b9c4c 100644
--- a/onnxruntime/test/providers/qnn/matmul_test.cpp
+++ b/onnxruntime/test/providers/qnn/matmul_test.cpp
@@ -125,6 +125,7 @@ static void RunQDQPerChannelMatMulOpOpTest(const TestInputDef<float>& input_def,
 #else
   provider_options["backend_path"] = "libQnnHtp.so";
 #endif
+  provider_options["offload_graph_io_quantization"] = "0";
 
   if (enable_fp16_precision) {
     provider_options["enable_htp_fp16_precision"] = "1";
@@ -178,6 +179,7 @@ static void RunQDQMatMulOpOpTest(const TestInputDef<float>& input1_def,
 #else
   provider_options["backend_path"] = "libQnnHtp.so";
 #endif
+  provider_options["offload_graph_io_quantization"] = "0";
 
   TestQDQModelAccuracy(BuildMatMulOpTestCase(input1_def, input2_def),
                        BuildMatMulOpQDQTestCase<Input0QType, Input1QType, OutputQType>(input1_def, input2_def,
diff --git a/onnxruntime/test/providers/qnn/max_min_op_test.cc b/onnxruntime/test/providers/qnn/max_min_op_test.cc
index 3deff121f3..4db1fdcec4 100644
--- a/onnxruntime/test/providers/qnn/max_min_op_test.cc
+++ b/onnxruntime/test/providers/qnn/max_min_op_test.cc
@@ -26,6 +26,7 @@ static void RunCPUMinOrMaxOpTest(const std::string& op_type,
 #else
   provider_options["backend_path"] = "libQnnCpu.so";
 #endif
+  provider_options["offload_graph_io_quantization"] = "0";
 
   RunQnnModelTest(BuildOpTestCase<float>(op_type, input_defs, {}, {}, kOnnxDomain),
                   provider_options,
@@ -47,6 +48,7 @@ static void RunQDQMinOrMaxOpTest(const std::string& op_type,
 #else
   provider_options["backend_path"] = "libQnnHtp.so";
 #endif
+  provider_options["offload_graph_io_quantization"] = "0";
 
   TestQDQModelAccuracy(BuildOpTestCase<float>(op_type, input_defs, {}, {}, kOnnxDomain),     // baseline float32 model
                        BuildQDQOpTestCase<QType>(op_type, input_defs, {}, {}, kOnnxDomain),  // QDQ model
diff --git a/onnxruntime/test/providers/qnn/pad_op_test.cpp b/onnxruntime/test/providers/qnn/pad_op_test.cpp
index a6b8664c6c..17629b9218 100644
--- a/onnxruntime/test/providers/qnn/pad_op_test.cpp
+++ b/onnxruntime/test/providers/qnn/pad_op_test.cpp
@@ -116,6 +116,7 @@ static void RunPadOpTest(const TestInputDef<float>& data_def,
     provider_options["backend_path"] = "libQnnCpu.so";
 #endif
   }
+  provider_options["offload_graph_io_quantization"] = "0";
 
   if (enable_fp16_precision) {
     provider_options["enable_htp_fp16_precision"] = "1";
@@ -144,6 +145,7 @@ static void RunQDQPadOpTest(const TestInputDef<float>& data_def,
 #else
   provider_options["backend_path"] = "libQnnHtp.so";
 #endif
+  provider_options["offload_graph_io_quantization"] = "0";
 
   TestQDQModelAccuracy(BuildPadTestCase(data_def, pads_def, constant_value_def, attrs),
                        BuildPadQDQTestCase<QuantType>(data_def, pads_def, constant_value_def, attrs,
diff --git a/onnxruntime/test/providers/qnn/pool_op_test.cpp b/onnxruntime/test/providers/qnn/pool_op_test.cpp
index 5dd3a6aaa3..8905a64ab4 100644
--- a/onnxruntime/test/providers/qnn/pool_op_test.cpp
+++ b/onnxruntime/test/providers/qnn/pool_op_test.cpp
@@ -60,6 +60,7 @@ static void RunPoolOpTest(const std::string& op_type,
 #else
   provider_options["backend_path"] = "libQnnCpu.so";
 #endif
+  provider_options["offload_graph_io_quantization"] = "0";
 
   RunQnnModelTest(BuildOpTestCase<float>(op_type, {input_def}, {}, attrs),
                   provider_options,
@@ -83,6 +84,7 @@ static void RunQDQPoolOpTest(const std::string& op_type,
 #else
   provider_options["backend_path"] = "libQnnHtp.so";
 #endif
+  provider_options["offload_graph_io_quantization"] = "0";
 
   TestQDQModelAccuracy(BuildOpTestCase<float>(op_type, {input_def}, {}, attrs),
                        BuildPoolQDQTestCase<QuantType>(op_type, input_def, attrs, use_contrib_qdq_ops),
diff --git a/onnxruntime/test/providers/qnn/qnn_basic_test.cc b/onnxruntime/test/providers/qnn/qnn_basic_test.cc
index e8282dbad9..8335bf0b15 100644
--- a/onnxruntime/test/providers/qnn/qnn_basic_test.cc
+++ b/onnxruntime/test/providers/qnn/qnn_basic_test.cc
@@ -117,6 +117,7 @@ TEST(QnnEP, TestDisableCPUFallback_ModelNotFullySupported) {
 #else
     options["backend_path"] = "libQnnCpu.so";
 #endif
+    options["offload_graph_io_quantization"] = "0";
 
     so.AppendExecutionProvider("QNN", options);
 
@@ -148,6 +149,7 @@ TEST(QnnEP, TestDisableCPUFallback_TryingToRunOnQnnCPU) {
 #else
   options["backend_path"] = "libQnnCpu.so";
 #endif
+  options["offload_graph_io_quantization"] = "0";
 
   auto input_defs = {TestInputDef<float>({1, 2, 2, 2}, false, -10.0f, 10.0f),
                      TestInputDef<float>({1, 2, 2, 2}, false, -10.0f, 10.0f)};
@@ -196,6 +198,7 @@ TEST(QnnEP, TestDisableCPUFallback_ConflictingConfig) {
 #else
     options["backend_path"] = "libQnnCpu.so";
 #endif
+    options["offload_graph_io_quantization"] = "0";
 
     so.AppendExecutionProvider("QNN", options);
 
@@ -226,6 +229,7 @@ TEST_F(QnnHTPBackendTests, TestConvWithExternalData) {
 #else
   options["backend_path"] = "libQnnHtp.so";
 #endif
+  options["offload_graph_io_quantization"] = "0";
 
   so.AppendExecutionProvider("QNN", options);
 
@@ -301,6 +305,7 @@ static void RunNHWCResizeModel(const ORTCHAR_T* ort_model_path, bool use_htp, bo
   so.SetGraphOptimizationLevel(ORT_ENABLE_ALL);
 
   onnxruntime::ProviderOptions options;
+  options["offload_graph_io_quantization"] = "0";
 
 #if defined(_WIN32)
   options["backend_path"] = use_htp ? "QnnHtp.dll" : "QnnCpu.dll";
@@ -591,6 +596,7 @@ TEST_F(QnnHTPBackendTests, MultithreadSessionRun) {
 #else
   options["backend_path"] = "libQnnHtp.so";
 #endif
+  options["offload_graph_io_quantization"] = "0";
 
   auto qnn_ep = QnnExecutionProviderWithOptions(options, &session_opts);
   EXPECT_TRUE(session_obj.RegisterExecutionProvider(std::move(qnn_ep)).IsOK());
@@ -640,6 +646,7 @@ TEST_F(QnnHTPBackendTests, MultithreadHtpPowerCfgSessionRunOption) {
 #else
   options["backend_path"] = "libQnnHtp.so";
 #endif
+  options["offload_graph_io_quantization"] = "0";
 
   auto qnn_ep = QnnExecutionProviderWithOptions(options, &session_opts);
   EXPECT_TRUE(session_obj.RegisterExecutionProvider(std::move(qnn_ep)).IsOK());
@@ -705,6 +712,7 @@ TEST_F(QnnHTPBackendTests, MultithreadDefaultHtpPowerCfgFromEpOption) {
 #else
   options["backend_path"] = "libQnnHtp.so";
 #endif
+  options["offload_graph_io_quantization"] = "0";
   options["htp_performance_mode"] = "burst";
 
   auto qnn_ep = QnnExecutionProviderWithOptions(options, &session_opts);
@@ -756,6 +764,7 @@ TEST_F(QnnHTPBackendTests, MultithreadHtpPowerCfgDefaultAndRunOption) {
 #else
   options["backend_path"] = "libQnnHtp.so";
 #endif
+  options["offload_graph_io_quantization"] = "0";
   options["htp_performance_mode"] = "burst";
 
   auto qnn_ep = QnnExecutionProviderWithOptions(options, &session_opts);
@@ -920,6 +929,7 @@ TEST_F(QnnHTPBackendTests, ProfilingTest) {
 #else
   provider_options["backend_path"] = "libQnnHtp.so";
 #endif
+  provider_options["offload_graph_io_quantization"] = "0";
   provider_options["enable_htp_fp16_precision"] = "1";
   provider_options["profiling_level"] = "detailed";
   provider_options["profiling_file_path"] = "detailed_profile.csv";
@@ -940,6 +950,7 @@ TEST_F(QnnHTPBackendTests, CastAddHTPAccuracyTest) {
 #else
   provider_options["backend_path"] = "libQnnHtp.so";
 #endif
+  provider_options["offload_graph_io_quantization"] = "0";
 
   RunQnnModelTest(BuildCastAddTestCase(),
                   provider_options,
@@ -1010,6 +1021,7 @@ TEST_F(QnnHTPBackendTests, EPRejectsDynamicShapesF32) {
 #else
   provider_options["backend_path"] = "libQnnHtp.so";
 #endif
+  provider_options["offload_graph_io_quantization"] = "0";
   provider_options["enable_htp_fp16_precision"] = "1";  // QNN EP will use fp16 precision.
                                                         // CPU EP will use fp32, so we can relax accuracy requirements.
 
diff --git a/onnxruntime/test/providers/qnn/qnn_ep_context_test.cc b/onnxruntime/test/providers/qnn/qnn_ep_context_test.cc
index a3f0ed55b8..dda03829dc 100644
--- a/onnxruntime/test/providers/qnn/qnn_ep_context_test.cc
+++ b/onnxruntime/test/providers/qnn/qnn_ep_context_test.cc
@@ -83,6 +83,7 @@ void QnnContextBinaryMultiPartitionTestBody(bool single_ep_node = true) {
 #else
   provider_options["backend_path"] = "libQnnHtp.so";
 #endif
+  provider_options["offload_graph_io_quantization"] = "0";
 
   const std::unordered_map<std::string, int> domain_to_version = {{"", 13}, {kMSDomain, 1}};
 
@@ -225,6 +226,7 @@ TEST_F(QnnHTPBackendTests, QnnContextBinaryGeneration2InputTypes) {
 #else
   provider_options["backend_path"] = "libQnnHtp.so";
 #endif
+  provider_options["offload_graph_io_quantization"] = "0";
 
   const std::unordered_map<std::string, int> domain_to_version = {{"", 13}, {kMSDomain, 1}};
 
@@ -275,6 +277,7 @@ TEST_F(QnnHTPBackendTests, QnnContextGeneration2InputsOrderIssue) {
 #else
   provider_options["backend_path"] = "libQnnHtp.so";
 #endif
+  provider_options["offload_graph_io_quantization"] = "0";
 
   // Add kMSDomain to cover contrib op like Gelu
   const std::unordered_map<std::string, int> domain_to_version = {{"", 13}, {kMSDomain, 1}};
@@ -311,6 +314,7 @@ TEST_F(QnnHTPBackendTests, QnnContextGenerationNodeNamePrefix) {
 #else
   provider_options["backend_path"] = "libQnnHtp.so";
 #endif
+  provider_options["offload_graph_io_quantization"] = "0";
   std::string node_name_prefix = "node_name_prefix_test";
 
   // Add kMSDomain to cover contrib op like Gelu
@@ -353,6 +357,7 @@ TEST_F(QnnHTPBackendTests, QnnContextBinaryCacheEmbedModeTest) {
 #else
   provider_options["backend_path"] = "libQnnHtp.so";
 #endif
+  provider_options["offload_graph_io_quantization"] = "0";
   const std::string context_binary_file = "./qnn_context_binary_test.onnx";
   std::remove(context_binary_file.c_str());
 
@@ -401,6 +406,7 @@ TEST_F(QnnHTPBackendTests, QnnContextBinaryCacheNonEmbedModeTest) {
 #else
   provider_options["backend_path"] = "libQnnHtp.so";
 #endif
+  provider_options["offload_graph_io_quantization"] = "0";
   const std::string context_binary_file = "./testdata/qnn_context_cache_non_embed.onnx";
   std::string qnn_ctx_bin = "./testdata/qnn_context_cache_non_embed.onnx_QNNExecutionProvider_QNN_8283143575221199085_1_0.bin";
 
@@ -482,6 +488,7 @@ TEST_F(QnnHTPBackendTests, QnnContextBinaryCache_InvalidGraph) {
 #else
   provider_options["backend_path"] = "libQnnHtp.so";
 #endif
+  provider_options["offload_graph_io_quantization"] = "0";
   const std::string context_binary_file = "./qnn_context_cache_non_embed.onnx";
   std::filesystem::path context_bin = "qnn_context_cache_non_embed.onnx_QNNExecutionProvider_QNN_8283143575221199085_1_0.bin";
   std::remove(context_binary_file.c_str());
@@ -579,6 +586,7 @@ TEST_F(QnnHTPBackendTests, QnnContextBinaryRelativePathTest) {
 #else
   provider_options["backend_path"] = "libQnnHtp.so";
 #endif
+  provider_options["offload_graph_io_quantization"] = "0";
 
   ASSERT_STATUS_OK(session_object.RegisterExecutionProvider(QnnExecutionProviderWithOptions(provider_options)));
   ASSERT_STATUS_OK(session_object.Load(model_data.data(), static_cast<int>(model_data.size())));
@@ -609,6 +617,7 @@ TEST_F(QnnHTPBackendTests, QnnContextBinaryAbsolutePathTest) {
 #else
   provider_options["backend_path"] = "libQnnHtp.so";
 #endif
+  provider_options["offload_graph_io_quantization"] = "0";
 
   ASSERT_STATUS_OK(session_object.RegisterExecutionProvider(QnnExecutionProviderWithOptions(provider_options)));
   ASSERT_STATUS_OK(session_object.Load(model_data.data(), static_cast<int>(model_data.size())));
@@ -634,6 +643,7 @@ TEST_F(QnnHTPBackendTests, QnnContextBinaryFileNotExistTest) {
 #else
   provider_options["backend_path"] = "libQnnHtp.so";
 #endif
+  provider_options["offload_graph_io_quantization"] = "0";
 
   ASSERT_STATUS_OK(session_object.RegisterExecutionProvider(QnnExecutionProviderWithOptions(provider_options)));
   ASSERT_STATUS_OK(session_object.Load(model_data.data(), static_cast<int>(model_data.size())));
@@ -659,6 +669,7 @@ TEST_F(QnnHTPBackendTests, QnnContextBinaryFileEmptyStringTest) {
 #else
   provider_options["backend_path"] = "libQnnHtp.so";
 #endif
+  provider_options["offload_graph_io_quantization"] = "0";
 
   ASSERT_STATUS_OK(session_object.RegisterExecutionProvider(QnnExecutionProviderWithOptions(provider_options)));
   ASSERT_STATUS_OK(session_object.Load(model_data.data(), static_cast<int>(model_data.size())));
@@ -676,6 +687,7 @@ TEST_F(QnnHTPBackendTests, QnnContextBinary2InputsTest) {
 #else
   provider_options["backend_path"] = "libQnnHtp.so";
 #endif
+  provider_options["offload_graph_io_quantization"] = "0";
   const std::string context_binary_file = "./qnn_context_binary_2inputs_test.onnx";
   std::remove(context_binary_file.c_str());
 
@@ -727,6 +739,7 @@ TEST_F(QnnHTPBackendTests, QnnContextBinaryCache_SingleNodeNameNotMatchGraphName
 #else
   provider_options["backend_path"] = "libQnnHtp.so";
 #endif
+  provider_options["offload_graph_io_quantization"] = "0";
   const std::string context_binary_file = "./qnn_context_cache_non_embed.onnx";
   std::filesystem::path context_bin = "qnn_context_cache_non_embed.onnx_QNNExecutionProvider_QNN_8283143575221199085_1_0.bin";
   std::remove(context_binary_file.c_str());
@@ -804,6 +817,7 @@ TEST_F(QnnHTPBackendTests, QnnMultiContextEmbeded) {
 #else
   provider_options["backend_path"] = "libQnnHtp.so";
 #endif
+  provider_options["offload_graph_io_quantization"] = "0";
 
   Ort::SessionOptions so;
   so.AppendExecutionProvider("QNN", provider_options);
@@ -819,6 +833,7 @@ TEST_F(QnnHTPBackendTests, QnnMultiContextExternal) {
 #else
   provider_options["backend_path"] = "libQnnHtp.so";
 #endif
+  provider_options["offload_graph_io_quantization"] = "0";
 
   Ort::SessionOptions so;
   so.AppendExecutionProvider("QNN", provider_options);
@@ -950,6 +965,7 @@ TEST_F(QnnHTPBackendTests, QnnContextShareAcrossSessions1) {
 #else
   provider_options["backend_path"] = "libQnnHtp.so";
 #endif
+  provider_options["offload_graph_io_quantization"] = "0";
 
   // Create QDQ models
   std::vector<std::string> onnx_model_paths{"./weight_share1.onnx", "./weight_share2.onnx"};
@@ -1047,6 +1063,7 @@ TEST_F(QnnHTPBackendTests, QnnContextShareAcrossSessions2) {
 #else
   provider_options["backend_path"] = "libQnnHtp.so";
 #endif
+  provider_options["offload_graph_io_quantization"] = "0";
 
   // Create QDQ models
   std::vector<std::string> onnx_model_paths{"./weight_share21.onnx", "./weight_share22.onnx"};
diff --git a/onnxruntime/test/providers/qnn/qnn_test_utils.cc b/onnxruntime/test/providers/qnn/qnn_test_utils.cc
index 79e7d39e85..3f6efc453c 100644
--- a/onnxruntime/test/providers/qnn/qnn_test_utils.cc
+++ b/onnxruntime/test/providers/qnn/qnn_test_utils.cc
@@ -278,7 +278,7 @@ static BackendSupport GetHTPSupport(const onnxruntime::logging::Logger& logger)
   MockKernelLookup kernel_lookup;
   onnxruntime::GraphViewer graph_viewer(graph);
   std::unique_ptr<onnxruntime::IExecutionProvider> qnn_ep = QnnExecutionProviderWithOptions(
-      {{"backend_path", "QnnHtp.dll"}});
+      {{"backend_path", "QnnHtp.dll"}, {"offload_graph_io_quantization", "0"}});
 
   qnn_ep->SetLogger(&logger);
   auto result = qnn_ep->GetCapability(graph_viewer, kernel_lookup);
@@ -341,7 +341,7 @@ static BackendSupport GetCPUSupport(const onnxruntime::logging::Logger& logger)
   MockKernelLookup kernel_lookup;
   onnxruntime::GraphViewer graph_viewer(graph);
   std::unique_ptr<onnxruntime::IExecutionProvider> qnn_ep = QnnExecutionProviderWithOptions(
-      {{"backend_path", "QnnCpu.dll"}});
+      {{"backend_path", "QnnCpu.dll"}, {"offload_graph_io_quantization", "0"}});
 
   qnn_ep->SetLogger(&logger);
   auto result = qnn_ep->GetCapability(graph_viewer, kernel_lookup);
diff --git a/onnxruntime/test/providers/qnn/reduce_op_test.cc b/onnxruntime/test/providers/qnn/reduce_op_test.cc
index 13173d9a87..69df89ebcf 100644
--- a/onnxruntime/test/providers/qnn/reduce_op_test.cc
+++ b/onnxruntime/test/providers/qnn/reduce_op_test.cc
@@ -82,6 +82,7 @@ static void RunReduceTest(const std::string& op_type,
                           float fp32_abs_err = 1e-5f,
                           bool enable_fp16 = false) {
   ProviderOptions provider_options;
+  provider_options["offload_graph_io_quantization"] = "0";
   if (enable_fp16) {
 #if defined(_WIN32)
     provider_options["backend_path"] = "QnnHtp.dll";
@@ -401,6 +402,7 @@ static void RunReduceOpQDQTest(const std::string& op_type,
 #else
   provider_options["backend_path"] = "libQnnHtp.so";
 #endif
+  provider_options["offload_graph_io_quantization"] = "0";
 
   constexpr bool noop_with_empty_axes = false;
   const bool axes_as_input = ReduceOpHasAxesInput(op_type, opset);  // Later opsets have "axes" as an input.
diff --git a/onnxruntime/test/providers/qnn/reshape_expand_op_test.cc b/onnxruntime/test/providers/qnn/reshape_expand_op_test.cc
index 3964edc114..21abc66e67 100644
--- a/onnxruntime/test/providers/qnn/reshape_expand_op_test.cc
+++ b/onnxruntime/test/providers/qnn/reshape_expand_op_test.cc
@@ -30,6 +30,7 @@ static void RunReshapeExpandTestOnCPU(const std::string& op_type,
 #else
   provider_options["backend_path"] = "libQnnCpu.so";
 #endif
+  provider_options["offload_graph_io_quantization"] = "0";
 
   RunQnnModelTest(BuildOpTestCase<DataType, int64_t>(op_type, {input_def}, {shape_def}, attrs),
                   provider_options,
@@ -161,6 +162,7 @@ static void RunReshapeExpandTestOnHTP(const std::string& op_type,
 #else
   provider_options["backend_path"] = "libQnnHtp.so";
 #endif
+  provider_options["offload_graph_io_quantization"] = "0";
 
   RunQnnModelTest(BuildOpTestCase<DataType, int64_t>(op_type, {input_def}, {shape_def}, attrs),
                   provider_options,
@@ -185,6 +187,7 @@ static void RunQDQReshapeExpandTestOnHTP(const std::string& op_type,
 #else
   provider_options["backend_path"] = "libQnnHtp.so";
 #endif
+  provider_options["offload_graph_io_quantization"] = "0";
 
   auto f32_model_builder = BuildOpTestCase<float, int64_t>(op_type, {input_def}, {shape_def}, attrs);
   auto qdq_model_builder = BuildQDQReshapeExpandTestCase<QType>(op_type, input_def, shape_def, attrs, use_contrib_qdq);
diff --git a/onnxruntime/test/providers/qnn/resize_test.cc b/onnxruntime/test/providers/qnn/resize_test.cc
index 15612e3267..15fcd039a0 100644
--- a/onnxruntime/test/providers/qnn/resize_test.cc
+++ b/onnxruntime/test/providers/qnn/resize_test.cc
@@ -127,6 +127,7 @@ static void RunCPUResizeOpTest(const TestInputDef<float>& input_def, const std::
 #else
   provider_options["backend_path"] = "libQnnCpu.so";
 #endif
+  provider_options["offload_graph_io_quantization"] = "0";
 
   RunQnnModelTest(GetResizeModelBuilder(input_def, sizes_data, mode, coordinate_transformation_mode, nearest_mode),
                   provider_options,
@@ -145,6 +146,7 @@ static void RunCPUResizeOpTestWithScales(const TestInputDef<float>& input_def, c
 #else
   provider_options["backend_path"] = "libQnnCpu.so";
 #endif
+  provider_options["offload_graph_io_quantization"] = "0";
 
   RunQnnModelTest(GetResizeModelBuilderWithScales(input_def, scales_data, mode, coordinate_transformation_mode, nearest_mode),
                   provider_options,
@@ -166,6 +168,7 @@ static void RunQDQResizeOpTest(const TestInputDef<float>& input_def,
 #else
   provider_options["backend_path"] = "libQnnHtp.so";
 #endif
+  provider_options["offload_graph_io_quantization"] = "0";
 
   TestQDQModelAccuracy(GetResizeModelBuilder(input_def, sizes_data, mode, coordinate_transformation_mode, nearest_mode),
                        GetQDQResizeModelBuilder<QuantType>(input_def, sizes_data, mode, coordinate_transformation_mode,
diff --git a/onnxruntime/test/providers/qnn/simple_op_htp_test.cc b/onnxruntime/test/providers/qnn/simple_op_htp_test.cc
index 7541d94bac..c0e567b07d 100644
--- a/onnxruntime/test/providers/qnn/simple_op_htp_test.cc
+++ b/onnxruntime/test/providers/qnn/simple_op_htp_test.cc
@@ -32,6 +32,7 @@ static void RunOpTestOnCPU(const std::string& op_type,
 #else
   provider_options["backend_path"] = "libQnnCpu.so";
 #endif
+  provider_options["offload_graph_io_quantization"] = "0";
 
   RunQnnModelTest(BuildOpTestCase<InputType>(op_type, input_defs, {}, attrs, op_domain),
                   provider_options,
@@ -129,6 +130,7 @@ static void RunQDQOpTest(const std::string& op_type,
 #else
   provider_options["backend_path"] = "libQnnHtp.so";
 #endif
+  provider_options["offload_graph_io_quantization"] = "0";
 
   TestQDQModelAccuracy(BuildOpTestCase<float>(op_type, input_defs, {}, attrs, op_domain),
                        BuildQDQOpTestCase<InputQType>(op_type, input_defs, {}, attrs, op_domain, use_contrib_qdq),
@@ -780,6 +782,7 @@ TEST_F(QnnHTPBackendTests, QuantAccuracyTest) {
 #else
   provider_options["backend_path"] = "libQnnHtp.so";
 #endif
+  provider_options["offload_graph_io_quantization"] = "0";
 
   // Note: a graph input -> Q -> DQ -> is optimized by Qnn to have a perfectly accurate output.
   // ORT's CPU EP, on the otherhand, actually quantizes and dequantizes the input, which leads to different outputs.
@@ -1206,6 +1209,7 @@ TEST_F(QnnHTPBackendTests, Add_U8_U16_Convert) {
 #else
   provider_options["backend_path"] = "libQnnHtp.so";
 #endif
+  provider_options["offload_graph_io_quantization"] = "0";
 
   TestQDQModelAccuracy(BuildOpTestCase<float>("Add", {input0_def, input1_def}, {}, {}, kOnnxDomain),
                        BuildQDQConvertAddTestCase(input0_def, input1_def),
@@ -1271,6 +1275,7 @@ TEST_F(QnnHTPBackendTests, DQ_Q_ConvertFusion_SameType) {
 #else
   provider_options["backend_path"] = "libQnnHtp.so";
 #endif
+  provider_options["offload_graph_io_quantization"] = "0";
 
   QuantParams<uint8_t> out_qparams_u8 = {1.0f, 128};
   QuantParams<uint16_t> out_qparams_u16 = {1.0f, 32768};
diff --git a/onnxruntime/test/providers/qnn/slice_htp_test.cc b/onnxruntime/test/providers/qnn/slice_htp_test.cc
index 07c97d2d7b..dc16192188 100644
--- a/onnxruntime/test/providers/qnn/slice_htp_test.cc
+++ b/onnxruntime/test/providers/qnn/slice_htp_test.cc
@@ -84,6 +84,7 @@ static void RunSliceQDQTest(const TestInputDef<float>& data_def,
 #else
   provider_options["backend_path"] = "libQnnHtp.so";
 #endif
+  provider_options["offload_graph_io_quantization"] = "0";
 
   const std::vector<TestInputDef<float>> f32_inputs = {data_def};
   const std::vector<TestInputDef<int64_t>> int64_inputs = {starts_def, ends_def, axes_def, steps_def};
diff --git a/onnxruntime/test/providers/qnn/split_op_test.cc b/onnxruntime/test/providers/qnn/split_op_test.cc
index 6dc721edb4..2b8a913ba4 100644
--- a/onnxruntime/test/providers/qnn/split_op_test.cc
+++ b/onnxruntime/test/providers/qnn/split_op_test.cc
@@ -276,6 +276,7 @@ static void RunQDQSplitOpTestOnHTP(const TestInputDef<float>& input_def,
 #else
   provider_options["backend_path"] = "libQnnHtp.so";
 #endif
+  provider_options["offload_graph_io_quantization"] = "0";
 
   const bool split_is_input = opset >= 13;
   auto f32_model_builder = BuildSplitTestCase<float>(input_def, split, split_is_input, axis, num_outputs);
diff --git a/onnxruntime/test/providers/qnn/squeeze_unsqueeze_op_test.cc b/onnxruntime/test/providers/qnn/squeeze_unsqueeze_op_test.cc
index 33d2f64c03..249a5da733 100644
--- a/onnxruntime/test/providers/qnn/squeeze_unsqueeze_op_test.cc
+++ b/onnxruntime/test/providers/qnn/squeeze_unsqueeze_op_test.cc
@@ -161,6 +161,7 @@ static void RunQDQSqueezeTestOnHTP(const std::string& op_type,
 #else
   provider_options["backend_path"] = "libQnnHtp.so";
 #endif
+  provider_options["offload_graph_io_quantization"] = "0";
 
   auto f32_model_builder = BuildOpTestCase<float, int64_t>(op_type, {input_def}, {axes_def}, {});
   auto qdq_model_builder = BuildQDQSqueezeTestCase<QType>(op_type, input_def, axes_def, use_contrib_qdq);
@@ -219,6 +220,7 @@ TEST_F(QnnHTPBackendTests, Squeeze_Rank5_Rank2_f32) {
 #else
   provider_options["backend_path"] = "libQnnHtp.so";
 #endif
+  provider_options["offload_graph_io_quantization"] = "0";
 
   RunQnnModelTest(model_fn,
                   provider_options,
@@ -275,6 +277,7 @@ TEST_F(QnnHTPBackendTests, Unsqueeze_Rank3_Rank5_f32) {
 #else
   provider_options["backend_path"] = "libQnnHtp.so";
 #endif
+  provider_options["offload_graph_io_quantization"] = "0";
 
   RunQnnModelTest(model_fn,
                   provider_options,
diff --git a/onnxruntime/test/providers/qnn/tile_op_test.cc b/onnxruntime/test/providers/qnn/tile_op_test.cc
index 2b35c730ee..20fb280117 100644
--- a/onnxruntime/test/providers/qnn/tile_op_test.cc
+++ b/onnxruntime/test/providers/qnn/tile_op_test.cc
@@ -98,6 +98,7 @@ static void RunQDQTileTestOnHTP(const TestInputDef<float>& input_def,
 #else
   provider_options["backend_path"] = "libQnnHtp.so";
 #endif
+  provider_options["offload_graph_io_quantization"] = "0";
 
   auto f32_model_builder = BuildOpTestCase<float, int64_t>("Tile", {input_def}, {repeats_def}, {});
   auto qdq_model_builder = BuildQDQTileTestCase<QType>(input_def, repeats_def, use_contrib_qdq);
diff --git a/onnxruntime/test/providers/qnn/topk_op_test.cc b/onnxruntime/test/providers/qnn/topk_op_test.cc
index 5a9351b936..aee86428b1 100644
--- a/onnxruntime/test/providers/qnn/topk_op_test.cc
+++ b/onnxruntime/test/providers/qnn/topk_op_test.cc
@@ -154,6 +154,7 @@ static void RunQDQTopKTestOnHTP(const TestInputDef<float>& input_def,
 #else
   provider_options["backend_path"] = "libQnnHtp.so";
 #endif
+  provider_options["offload_graph_io_quantization"] = "0";
 
   auto f32_model_builder = BuildTopKTestCase<float>(input_def, k_def, attrs);
   auto qdq_model_builder = BuildQDQTopKTestCase<QType>(input_def, k_def, attrs, use_contrib_qdq);
diff --git a/onnxruntime/test/providers/qnn/transpose_htp_test.cc b/onnxruntime/test/providers/qnn/transpose_htp_test.cc
index 63746e22d2..b7bec34f7d 100644
--- a/onnxruntime/test/providers/qnn/transpose_htp_test.cc
+++ b/onnxruntime/test/providers/qnn/transpose_htp_test.cc
@@ -70,6 +70,7 @@ static void RunTransposeQDQTest(const TestInputDef<float>& input_def,
 #else
   provider_options["backend_path"] = "libQnnHtp.so";
 #endif
+  provider_options["offload_graph_io_quantization"] = "0";
 
   // Runs model with DQ-> Transpose -> Q and compares the outputs of the CPU and QNN EPs.
   TestQDQModelAccuracy(BuildTransposeTestCase<float>(input_def, attrs),
diff --git a/onnxruntime/test/providers/qnn/where_htp_test.cc b/onnxruntime/test/providers/qnn/where_htp_test.cc
index ec525ef4eb..e1b0604b31 100644
--- a/onnxruntime/test/providers/qnn/where_htp_test.cc
+++ b/onnxruntime/test/providers/qnn/where_htp_test.cc
@@ -79,6 +79,7 @@ static void RunWhereQDQTest(const TestInputDef<bool>& condition_def,
 #else
   provider_options["backend_path"] = "libQnnHtp.so";
 #endif
+  provider_options["offload_graph_io_quantization"] = "0";
 
   // Runs model with DQ-> Where -> Q and compares the outputs of the CPU and QNN EPs.
   TestQDQModelAccuracy(BuildWhereTestCase(condition_def, x_def, y_def),