diff --git a/onnxruntime/core/framework/node_unit.cc b/onnxruntime/core/framework/node_unit.cc
index 174942b903..4e2f22dea1 100644
--- a/onnxruntime/core/framework/node_unit.cc
+++ b/onnxruntime/core/framework/node_unit.cc
@@ -283,6 +283,7 @@ ProviderType NodeUnit::GetExecutionProviderType() const noexcept { return target
 void NodeUnit::InitForSingleNode() {
   const auto& input_defs = target_node_.InputDefs();
   const auto& output_defs = target_node_.OutputDefs();
+  const auto& node_attrs = target_node_.GetAttributes();
   auto qlinear_type = GetQLinearOpType(target_node_);
   if (qlinear_type == QLinearOpType::Unknown || IsVariadicQLinearOp(qlinear_type)) {  // TODO, add variadic support
     // Not a Qlinear op, add all inputs / outputs
@@ -321,19 +322,35 @@ void NodeUnit::InitForSingleNode() {
     // DequantizeLinear has 3 inputs
     // x, x_scale, x_zp
     // output is not quantized
-    inputs_.push_back(NodeUnitIODef{*input_defs[0], NodeUnitIODef::QuantParam{*input_defs[1], input_defs.size() == 3
-                                                                                                  ? input_defs[2]
-                                                                                                  : nullptr}});
+
+    // Get the DQ axis attribute if available.
+    std::optional<int64_t> axis;
+    if (auto entry = node_attrs.find("axis"); entry != node_attrs.end()) {
+      axis = entry->second.i();
+    }
+
+    inputs_.push_back(NodeUnitIODef{*input_defs[0],
+                                    NodeUnitIODef::QuantParam{*input_defs[1],
+                                                              input_defs.size() == 3 ? input_defs[2] : nullptr,
+                                                              axis}});
     outputs_.push_back(NodeUnitIODef{*output_defs[0], std::nullopt});
 
   } else if (qlinear_type == QLinearOpType::QuantizeLinear) {
     // QuantizeLinear the input is not quantized and has 3 inputs
     // x, y_scale, y_zp (optional)
     // The output is quantized
+
+    // Get the Q axis attribute if available.
+    std::optional<int64_t> axis;
+    if (auto entry = node_attrs.find("axis"); entry != node_attrs.end()) {
+      axis = entry->second.i();
+    }
+
     inputs_.push_back(NodeUnitIODef{*input_defs[0], std::nullopt});
-    outputs_.push_back(NodeUnitIODef{*output_defs[0], NodeUnitIODef::QuantParam{*input_defs[1], input_defs.size() == 3
-                                                                                                    ? input_defs[2]
-                                                                                                    : nullptr}});
+    outputs_.push_back(NodeUnitIODef{*output_defs[0],
+                                     NodeUnitIODef::QuantParam{*input_defs[1],
+                                                               input_defs.size() == 3 ? input_defs[2] : nullptr,
+                                                               axis}});
   } else {
     ORT_THROW("The QLinear op [", static_cast<uint8_t>(qlinear_type), "] is not supported");
   }
diff --git a/onnxruntime/core/providers/qnn/builder/opbuilder/conv_op_builder.cc b/onnxruntime/core/providers/qnn/builder/opbuilder/conv_op_builder.cc
index 4eeca98454..1713f201c9 100644
--- a/onnxruntime/core/providers/qnn/builder/opbuilder/conv_op_builder.cc
+++ b/onnxruntime/core/providers/qnn/builder/opbuilder/conv_op_builder.cc
@@ -120,7 +120,8 @@ Status ConvOpBuilder::IsOpSupported(QnnModelWrapper& qnn_model_wrapper,
   if (is_npu_backend) {
     const auto& input_1 = inputs[1];  // weight
     bool is_per_axis_quant = false;
-    ORT_RETURN_IF_ERROR(qnn_model_wrapper.IsPerChannelQuantized(input_1, is_per_axis_quant));
+    int64_t quant_axis = 0;
+    ORT_RETURN_IF_ERROR(qnn_model_wrapper.IsPerChannelQuantized(input_1, is_per_axis_quant, quant_axis));
 
     if (is_per_axis_quant) {
       int32_t elem_data_type = 0;
@@ -129,6 +130,13 @@ Status ConvOpBuilder::IsOpSupported(QnnModelWrapper& qnn_model_wrapper,
       const bool is_signed_type = (elem_data_type == ONNX_NAMESPACE::TensorProto_DataType_INT8) ||
                                   (elem_data_type == ONNX_NAMESPACE::TensorProto_DataType_INT16);
       ORT_RETURN_IF_NOT(is_signed_type, "Conv weights must be of a signed quantized type if quantized per-channel");
+
+      if (conv_type == OnnxConvType::kConvTranspose) {
+        ORT_RETURN_IF_NOT(quant_axis == 1,
+                          "ConvTranspose's input[1] must be use axis == 1 for per-channel quantization");
+      } else {
+        ORT_RETURN_IF_NOT(quant_axis == 0, "Conv's input[1] must be use axis == 0 for per-channel quantization");
+      }
     }
   }
 
diff --git a/onnxruntime/core/providers/qnn/builder/opbuilder/simple_op_builder.cc b/onnxruntime/core/providers/qnn/builder/opbuilder/simple_op_builder.cc
index 7e2d1ef05b..285781aaa3 100644
--- a/onnxruntime/core/providers/qnn/builder/opbuilder/simple_op_builder.cc
+++ b/onnxruntime/core/providers/qnn/builder/opbuilder/simple_op_builder.cc
@@ -41,7 +41,7 @@ class SimpleOpBuilder : public BaseOpBuilder {
                                   QnnQuantParamsWrapper& quant_param) const override ORT_MUST_USE_RESULT;
 
  private:
-  Status ExplicitOpCheck(const NodeUnit& node_unit) const;
+  Status ExplicitOpCheck(QnnModelWrapper& qnn_model_wrapper, const NodeUnit& node_unit) const;
   Status ProcessSigmoidOrTanhOutput(QnnModelWrapper& qnn_model_wrapper,
                                     const NodeUnit& node_unit,
                                     std::vector<std::string>&& input_names,
@@ -138,7 +138,8 @@ Status SimpleOpBuilder::ProcessInputs(QnnModelWrapper& qnn_model_wrapper,
   return Status::OK();
 }
 
-Status SimpleOpBuilder::ExplicitOpCheck(const NodeUnit& node_unit) const {
+Status SimpleOpBuilder::ExplicitOpCheck(QnnModelWrapper& qnn_model_wrapper,
+                                        const NodeUnit& node_unit) const {
   const std::string& op_type = node_unit.OpType();
 
   if (op_type == "GridSample") {
@@ -158,6 +159,20 @@ Status SimpleOpBuilder::ExplicitOpCheck(const NodeUnit& node_unit) const {
                       "QNN EP only supports Min and Max operators with exactly 2 inputs.");
   }
 
+  if (op_type == "DequantizeLinear") {
+    bool is_per_chan_quant = false;
+    int64_t quant_axis = 0;
+    ORT_RETURN_IF_ERROR(qnn_model_wrapper.IsPerChannelQuantized(node_unit.Inputs()[0], is_per_chan_quant, quant_axis));
+    ORT_RETURN_IF(is_per_chan_quant, "QNN EP does not support a standalone DQ op with per-channel quantization");
+  }
+
+  if (op_type == "QuantizeLinear") {
+    bool is_per_chan_quant = false;
+    int64_t quant_axis = 0;
+    ORT_RETURN_IF_ERROR(qnn_model_wrapper.IsPerChannelQuantized(node_unit.Outputs()[0], is_per_chan_quant, quant_axis));
+    ORT_RETURN_IF(is_per_chan_quant, "QNN EP does not support a standalone Q op with per-channel quantization");
+  }
+
   return Status::OK();
 }
 
@@ -475,7 +490,7 @@ Status SimpleOpBuilder::ProcessAttributesAndOutputs(QnnModelWrapper& qnn_model_w
   const std::string& op_type = node_unit.OpType();
 
   if (do_op_validation) {
-    ORT_RETURN_IF_ERROR(ExplicitOpCheck(node_unit));
+    ORT_RETURN_IF_ERROR(ExplicitOpCheck(qnn_model_wrapper, node_unit));
     // Skip the op validation for DepthToSpace & SpaceToDepth if it's not NHWC data layout
     if (node_unit.Domain() != kMSInternalNHWCDomain && (op_type == "DepthToSpace" || op_type == "SpaceToDepth" || op_type == "GridSample")) {
       return Status::OK();
diff --git a/onnxruntime/core/providers/qnn/builder/qnn_model_wrapper.cc b/onnxruntime/core/providers/qnn/builder/qnn_model_wrapper.cc
index 6f9ac20285..3a8a8af17b 100644
--- a/onnxruntime/core/providers/qnn/builder/qnn_model_wrapper.cc
+++ b/onnxruntime/core/providers/qnn/builder/qnn_model_wrapper.cc
@@ -412,9 +412,10 @@ Status QnnModelWrapper::UnpackScales(const std::string& initializer_name, std::v
 
 // Checks if a tensor in the ONNX graph is per-channel quantized.
 Status QnnModelWrapper::IsPerChannelQuantized(const onnxruntime::NodeUnitIODef& io_def,
-                                              /*out*/ bool& is_per_axis) const {
+                                              /*out*/ bool& is_per_channel,
+                                              /*out*/ int64_t& axis) const {
   if (!io_def.quant_param) {
-    is_per_axis = false;
+    is_per_channel = false;
     return Status::OK();
   }
 
@@ -432,7 +433,12 @@ Status QnnModelWrapper::IsPerChannelQuantized(const onnxruntime::NodeUnitIODef&
   const bool is_scalar_or_1_elem_vector = scale_shape.NumDimensions() == 0 ||
                                           (scale_shape.NumDimensions() == 1 && scale_shape.Size() == 1);
 
-  is_per_axis = !is_scalar_or_1_elem_vector;
+  is_per_channel = !is_scalar_or_1_elem_vector;
+
+  if (is_per_channel) {
+    axis = io_def.quant_param->axis.value_or(1);  // 1 is default axis for Q/DQ ops.
+  }
+
   return Status::OK();
 }
 
diff --git a/onnxruntime/core/providers/qnn/builder/qnn_model_wrapper.h b/onnxruntime/core/providers/qnn/builder/qnn_model_wrapper.h
index 3fd268d1b3..0705a1d1b8 100644
--- a/onnxruntime/core/providers/qnn/builder/qnn_model_wrapper.h
+++ b/onnxruntime/core/providers/qnn/builder/qnn_model_wrapper.h
@@ -218,8 +218,10 @@ class QnnModelWrapper {
   // Unpack zero-points from initializer and convert to int32_t (1 zero-point for per-tensor, > 1 for per-channel).
   Status UnpackZeroPoints(const std::string& initializer_name, std::vector<int32_t>& zero_points) const;
 
-  // Checks if a tensor in the ONNX graph is per-axis quantized.
-  Status IsPerChannelQuantized(const onnxruntime::NodeUnitIODef& io_def, /*out*/ bool& is_per_axis) const;
+  // Checks if a tensor in the ONNX graph is per-channel quantized.
+  Status IsPerChannelQuantized(const onnxruntime::NodeUnitIODef& io_def,
+                               /*out*/ bool& is_per_channel,
+                               /*out*/ int64_t& axis) const;
 
  private:
   bool CreateQnnInputOutputTensors(const std::string& qnn_node_name,
diff --git a/onnxruntime/test/providers/qnn/conv_test.cc b/onnxruntime/test/providers/qnn/conv_test.cc
index b88578a915..5177a629ce 100644
--- a/onnxruntime/test/providers/qnn/conv_test.cc
+++ b/onnxruntime/test/providers/qnn/conv_test.cc
@@ -154,6 +154,7 @@ static GetTestQDQModelFn<ActivationQType> BuildQDQPerChannelConvTestCase(const s
                                                                          const TestInputDef<float>& input_def,
                                                                          const TestInputDef<float>& weights_def,
                                                                          const TestInputDef<float>& bias_def,
+                                                                         int64_t weight_quant_axis,
                                                                          const std::vector<int64_t>& strides,
                                                                          const std::vector<int64_t>& pads,
                                                                          const std::vector<int64_t>& dilations,
@@ -161,8 +162,9 @@ static GetTestQDQModelFn<ActivationQType> BuildQDQPerChannelConvTestCase(const s
                                                                          const std::string& auto_pad = "NOTSET",
                                                                          bool use_contrib_qdq = false) {
   return [conv_op_type, input_def, weights_def, bias_def, strides, pads,
-          dilations, group, auto_pad, use_contrib_qdq](ModelTestBuilder& builder,
-                                                       std::vector<QuantParams<ActivationQType>>& output_qparams) {
+          dilations, group, auto_pad, use_contrib_qdq,
+          weight_quant_axis](ModelTestBuilder& builder,
+                             std::vector<QuantParams<ActivationQType>>& output_qparams) {
     std::vector<NodeArg*> conv_inputs;
 
     // input -> Q/DQ ->
@@ -174,7 +176,6 @@ static GetTestQDQModelFn<ActivationQType> BuildQDQPerChannelConvTestCase(const s
 
     // Quantized(weights) -> DQ ->
     ORT_ENFORCE(weights_def.IsInitializer() && weights_def.IsRawData());
-    int64_t weight_quant_axis = conv_op_type == "Conv" ? 0 : 1;  // 0 for Conv, 1 for ConvTranspose
     std::vector<float> weight_scales;
     std::vector<WeightQType> weight_zero_points;
     GetTestInputQuantParamsPerChannel<WeightQType>(weights_def, weight_scales, weight_zero_points,
@@ -283,6 +284,7 @@ template <typename ActivationQType, typename WeightQType>
 static void RunHTPConvOpPerChannelTest(const std::string& conv_op_type, const TestInputDef<float>& input_def,
                                        const TestInputDef<float>& weights_def,
                                        const TestInputDef<float>& bias_def,
+                                       int64_t weight_quant_axis,
                                        const std::vector<int64_t>& strides,
                                        const std::vector<int64_t>& pads,
                                        const std::vector<int64_t>& dilations,
@@ -303,8 +305,9 @@ static void RunHTPConvOpPerChannelTest(const std::string& conv_op_type, const Te
   auto f32_fn = BuildF32ConvTestCase(conv_op_type, input_def, weights_def, bias_def, strides, pads, dilations,
                                      group, auto_pad);
   auto qdq_fn = BuildQDQPerChannelConvTestCase<ActivationQType, WeightQType>(conv_op_type, input_def, weights_def,
-                                                                             bias_def, strides, pads, dilations,
-                                                                             group, auto_pad, use_contrib_qdq);
+                                                                             bias_def, weight_quant_axis, strides,
+                                                                             pads, dilations, group, auto_pad,
+                                                                             use_contrib_qdq);
   TestQDQModelAccuracy(f32_fn, qdq_fn, provider_options, opset, expected_ep_assignment, tolerance);
 }
 
@@ -713,6 +716,7 @@ TEST_F(QnnHTPBackendTests, ConvU8S8S32_PerChannel) {
                                               input_def,
                                               weight_def,
                                               bias_def,
+                                              0,             // weight quant axis
                                               {1, 1},        // Strides
                                               {0, 0, 0, 0},  // Pads
                                               {1, 1},        // Dilations
@@ -723,6 +727,34 @@ TEST_F(QnnHTPBackendTests, ConvU8S8S32_PerChannel) {
                                               13);    // opset
 }
 
+// Test per-channel QDQ Conv is rejected with weight axis != 0
+TEST_F(QnnHTPBackendTests, Conv_PerChannel_UnsupportedAxis) {
+  std::vector<int64_t> input_shape = {1, 2, 4, 4};
+  std::vector<int64_t> weight_shape = {3, 2, 3, 3};
+  std::vector<int64_t> bias_shape = {3};
+
+  TestInputDef<float> input_def(input_shape, false,
+                                GetFloatDataInRange(-10.0f, 10.0f, TensorShape(input_shape).Size()));
+  TestInputDef<float> weight_def(weight_shape, true,
+                                 GetFloatDataInRange(-1.0f, 5.0f, TensorShape(weight_shape).Size()));
+  TestInputDef<float> bias_def(bias_shape, true,
+                               GetFloatDataInRange(-1.0f, 1.0f, TensorShape(bias_shape).Size()));
+
+  RunHTPConvOpPerChannelTest<uint8_t, int8_t>("Conv",
+                                              input_def,
+                                              weight_def,
+                                              bias_def,
+                                              2,             // weight quant axis
+                                              {1, 1},        // Strides
+                                              {0, 0, 0, 0},  // Pads
+                                              {1, 1},        // Dilations
+                                              1,             // default group
+                                              "NOTSET",
+                                              ExpectedEPNodeAssignment::None,
+                                              false,  // use_qdq_contrib_ops
+                                              13);    // opset
+}
+
 // Test per-channel QDQ Conv. in0: u8, in1 (weight): s8, in2 (bias): s32, out: u8
 // \QNN\HTP\HTP\src\hexagon\prepare\graph_prepare.cc:203:ERROR:could not create op: q::QNN_Conv3d_w_scale
 // \QNN\HTP\HTP\src\hexagon\prepare\graph_prepare.cc:1187:ERROR:Op 0x1a preparation failed with err:-1
@@ -748,6 +780,7 @@ TEST_F(QnnHTPBackendTests, DISABLED_Conv3D_U8S8S32_PerChannel) {
                                               input_def,
                                               weight_def,
                                               bias_def,
+                                              0,                   // weight quant axis
                                               {1, 1, 1},           // Strides
                                               {0, 0, 0, 0, 0, 0},  // Pads
                                               {1, 1, 1},           // Dilations
@@ -776,6 +809,7 @@ TEST_F(QnnHTPBackendTests, ConvDepthwiseU8S8S32_PerChannel) {
                                               input_def,
                                               weight_def,
                                               bias_def,
+                                              0,             // weight quant axis
                                               {1, 1},        // Strides
                                               {0, 0, 0, 0},  // Pads
                                               {1, 1},        // Dilations
@@ -811,6 +845,7 @@ TEST_F(QnnHTPBackendTests, DISABLED_Conv3D_U8S8S32_PerChannel2) {
                                               input_def,
                                               weight_def,
                                               bias_def,
+                                              0,                   // weight quant axis
                                               {1, 1, 1},           // Strides
                                               {0, 0, 0, 0, 0, 0},  // Pads
                                               {1, 1, 1},           // Dilations
@@ -838,6 +873,7 @@ TEST_F(QnnHTPBackendTests, ConvTransposeU8S8S32_PerChannel) {
                                               input_def,
                                               weight_def,
                                               bias_def,
+                                              1,             // weight quant axis
                                               {1, 1},        // Strides
                                               {0, 0, 0, 0},  // Pads
                                               {1, 1},        // Dilations
@@ -848,6 +884,34 @@ TEST_F(QnnHTPBackendTests, ConvTransposeU8S8S32_PerChannel) {
                                               13);    // opset
 }
 
+// Test per-channel QDQ ConvTranspose is unsupported with weight axis != 1.
+TEST_F(QnnHTPBackendTests, ConvTranspose_PerChannel_UnsupportedAxis) {
+  std::vector<int64_t> input_shape = {1, 2, 4, 4};
+  std::vector<int64_t> weight_shape = {2, 3, 3, 3};
+  std::vector<int64_t> bias_shape = {3};
+
+  TestInputDef<float> input_def(input_shape, false,
+                                GetFloatDataInRange(-10.0f, 10.0f, TensorShape(input_shape).Size()));
+  TestInputDef<float> weight_def(weight_shape, true,
+                                 GetFloatDataInRange(-1.0f, 5.0f, TensorShape(weight_shape).Size()));
+  TestInputDef<float> bias_def(bias_shape, true,
+                               GetFloatDataInRange(-1.0f, 1.0f, TensorShape(bias_shape).Size()));
+
+  RunHTPConvOpPerChannelTest<uint8_t, int8_t>("ConvTranspose",
+                                              input_def,
+                                              weight_def,
+                                              bias_def,
+                                              2,             // weight quant axis
+                                              {1, 1},        // Strides
+                                              {0, 0, 0, 0},  // Pads
+                                              {1, 1},        // Dilations
+                                              1,             // default group
+                                              "NOTSET",
+                                              ExpectedEPNodeAssignment::None,
+                                              false,  // use_qdq_contrib_ops
+                                              13);    // opset
+}
+
 // ConvTranspose3D per-channel
 // Disable it for 2.21 since it failed, re-enabled it for 2.22
 TEST_F(QnnHTPBackendTests, DISABLED_ConvTranspose3D_U8S8S32_PerChannel) {
@@ -866,6 +930,7 @@ TEST_F(QnnHTPBackendTests, DISABLED_ConvTranspose3D_U8S8S32_PerChannel) {
                                               input_def,
                                               weight_def,
                                               bias_def,
+                                              1,                   // weight quant axis
                                               {1, 1, 1},           // Strides
                                               {0, 0, 0, 0, 0, 0},  // Pads
                                               {1, 1, 1},           // Dilations
@@ -893,6 +958,7 @@ TEST_F(QnnHTPBackendTests, ConvU16S8S32_PerChannel) {
                                                input_def,
                                                weight_def,
                                                bias_def,
+                                               0,             // weight quant axis
                                                {1, 1},        // Strides
                                                {0, 0, 0, 0},  // Pads
                                                {1, 1},        // Dilations
@@ -928,6 +994,7 @@ TEST_F(QnnHTPBackendTests, DISABLED_Conv3D_U16S8S32_PerChannel) {
                                                input_def,
                                                weight_def,
                                                bias_def,
+                                               0,                   // weight quant axis
                                                {1, 1, 1},           // Strides
                                                {0, 0, 0, 0, 0, 0},  // Pads
                                                {1, 1, 1},           // Dilations
@@ -955,6 +1022,7 @@ TEST_F(QnnHTPBackendTests, ConvTransposeU16S8S32_PerChannel) {
                                                input_def,
                                                weight_def,
                                                bias_def,
+                                               1,             // weight quant axis
                                                {1, 1},        // Strides
                                                {0, 0, 0, 0},  // Pads
                                                {1, 1},        // Dilations
@@ -982,6 +1050,7 @@ TEST_F(QnnHTPBackendTests, DISABLED_ConvTranspose3D_U16S8S32_PerChannel) {
                                                input_def,
                                                weight_def,
                                                bias_def,
+                                               1,                   // weight quant axis
                                                {1, 1, 1},           // Strides
                                                {0, 0, 0, 0, 0, 0},  // Pads
                                                {1, 1, 1},           // Dilations
@@ -1010,6 +1079,7 @@ TEST_F(QnnHTPBackendTests, ConvDepthwiseU16S8S32_PerChannel) {
                                                input_def,
                                                weight_def,
                                                bias_def,
+                                               0,             // weight quant axis
                                                {1, 1},        // Strides
                                                {0, 0, 0, 0},  // Pads
                                                {1, 1},        // Dilations
@@ -1045,6 +1115,7 @@ TEST_F(QnnHTPBackendTests, DISABLED_Conv3D_U16S8S32_PerChannel2) {
                                                input_def,
                                                weight_def,
                                                bias_def,
+                                               0,                   // weight quant axis
                                                {1, 1, 1},           // Strides
                                                {0, 0, 0, 0, 0, 0},  // Pads
                                                {1, 1, 1},           // Dilations