diff --git a/onnxruntime/core/providers/nnapi/nnapi_builtin/builders/helper.cc b/onnxruntime/core/providers/nnapi/nnapi_builtin/builders/helper.cc
index 58b0a269d4..ce1e8c359e 100644
--- a/onnxruntime/core/providers/nnapi/nnapi_builtin/builders/helper.cc
+++ b/onnxruntime/core/providers/nnapi/nnapi_builtin/builders/helper.cc
@@ -62,6 +62,8 @@ QLinearOpType GetQLinearOpType(const onnxruntime::Node& node) {
     return QLinearOpType::QLinearMatMul;
   else if (op_type == "QLinearAdd")
     return QLinearOpType::QLinearAdd;
+  else if (op_type == "QLinearSigmoid")
+    return QLinearOpType::QLinearSigmoid;
 
   return QLinearOpType::Unknown;
 }
@@ -232,8 +234,10 @@ bool HasValidQuantizationZeroPoints(const InitializedTensorSet& initializers, co
 
         std::unique_ptr<uint8_t[]> unpacked_tensor;
         size_t tensor_byte_size;
-        auto status = onnxruntime::utils::UnpackInitializerData(zero_tensor, node.ModelPath(),
-                                                                unpacked_tensor, tensor_byte_size);
+        auto status = onnxruntime::utils::UnpackInitializerData(
+            zero_tensor,
+            node.ModelPath(),
+            unpacked_tensor, tensor_byte_size);
         if (!status.IsOK()) {
           LOGS_DEFAULT(ERROR) << "QLinearConv erro when unpack zero tensor:" << status.ErrorMessage();
           return false;
@@ -264,6 +268,24 @@ bool HasValidQuantizationZeroPoints(const InitializedTensorSet& initializers, co
   return true;
 }
 
+float GetQuantizationScale(const InitializedTensorSet& initializers, const Node& node, size_t idx) {
+  const auto& scale_tensor = *initializers.at(node.InputDefs()[idx]->Name());
+  return GetTensorFloatData(scale_tensor)[0];
+}
+
+common::Status GetQuantizationZeroPoint(const InitializedTensorSet& initializers,
+                                        const Node& node, size_t idx, int32_t& zero_point) {
+  std::unique_ptr<uint8_t[]> unpacked_tensor;
+  size_t tensor_byte_size;
+  const auto& zero_point_tensor = *initializers.at(node.InputDefs()[idx]->Name());
+  ORT_RETURN_IF_ERROR(
+      onnxruntime::utils::UnpackInitializerData(zero_point_tensor, node.ModelPath(),
+                                                unpacked_tensor, tensor_byte_size));
+  // Onnx quantization uses uint8 [int8 not yet supported], need to cast to int32_t used by NNAPI
+  zero_point = static_cast<int32_t>(unpacked_tensor.get()[0]);
+  return Status::OK();
+}
+
 #define GET_TENSOR_DATA(FUNC_NAME, ELEMENT_TYPE, DATA)                                  \
   const ELEMENT_TYPE* GetTensor##FUNC_NAME(const ONNX_NAMESPACE::TensorProto& tensor) { \
     return tensor.DATA().empty()                                                        \
@@ -348,13 +370,13 @@ void GetFlattenOutputShape(const Node& node, const Shape& input_shape, int32_t&
   dim_2 = std::accumulate(input_shape.cbegin() + axis, input_shape.cend(), 1, std::multiplies<int32_t>());
 }
 
-bool IsValidSupportedNodesVec(const std::vector<size_t>& supported_node_vec, const GraphViewer& graph_viewer) {
-  if (supported_node_vec.empty())
+bool IsValidSupportedNodesGroup(const std::vector<size_t>& supported_node_group, const GraphViewer& graph_viewer) {
+  if (supported_node_group.empty())
     return false;
 
-  if (supported_node_vec.size() == 1) {
+  if (supported_node_group.size() == 1) {
     const auto& node_indices = graph_viewer.GetNodesInTopologicalOrder();
-    const auto* node(graph_viewer.GetNode(node_indices[supported_node_vec[0]]));
+    const auto* node(graph_viewer.GetNode(node_indices[supported_node_group[0]]));
     const auto& op = node->OpType();
     // It is not worth it to perform a single Reshape/Flatten/Identity operator
     // which is only copying the data in NNAPI
@@ -368,49 +390,116 @@ bool IsValidSupportedNodesVec(const std::vector<size_t>& supported_node_vec, con
   return true;
 }
 
+bool IsInternalQuantizedNode(const Node& node) {
+  // These operators can use uint8 input without specific QLinear version of it
+  // However, the mode has to be internal to the graph/partition (they cannot consume graph inputs)
+  static const std::unordered_set<std::string> internal_quantized_op_types =
+      {
+          "Transpose",
+          "Resize",
+          "Concat",
+          "MaxPool",
+      };
+
+  if (!Contains(internal_quantized_op_types, node.OpType()))
+    return false;
+
+  int32_t input_type;
+  ORT_ENFORCE(GetType(*node.InputDefs()[0], input_type));
+
+  return input_type == ONNX_NAMESPACE::TensorProto_DataType_UINT8;
+}
+
+// We support some operators running using uint8 internally
+// These nodes cannot use a graph input as input since onnx graph input does not carry scale/zero point info
+bool IsInternalQuantizationSupported(const Node& node, const std::unordered_set<std::string>& node_outputs_in_group) {
+  const auto& op_type = node.OpType();
+
+  // The node's input(s) have to be an output of node(s) within the group
+  // If not, then this node is using graph/partition input(s) as input(s)
+  const auto& input_defs = node.InputDefs();
+
+  // We only need to check input0 for all operators except "Concat"
+  bool check_all_inputs = op_type == "Concat";
+
+  for (size_t i = 0; i < (check_all_inputs ? input_defs.size() : 1); i++) {
+    if (!Contains(node_outputs_in_group, input_defs[i]->Name())) {
+      LOGS_DEFAULT(VERBOSE) << "Node [" << node.Name() << "] type: [" << op_type
+                            << "] has input [" << input_defs[i]->Name()
+                            << "] does not support using graph input(quantized) as node input";
+      return false;
+    }
+  }
+
+  return true;
+}
+
 bool IsNodeSupported(const Node& node, const GraphViewer& graph_viewer, const OpSupportCheckParams& params) {
   const auto& op_support_checkers = GetOpSupportCheckers();
-  if (Contains(op_support_checkers, node.OpType())) {
-    const auto* op_support_checker = op_support_checkers.at(node.OpType());
-    return op_support_checker->IsOpSupported(graph_viewer.GetAllInitializedTensors(), node, params);
-  } else {
+  if (!Contains(op_support_checkers, node.OpType()))
     return false;
-  }
+
+  const auto* op_support_checker = op_support_checkers.at(node.OpType());
+  return op_support_checker->IsOpSupported(graph_viewer.GetAllInitializedTensors(), node, params);
+}
+
+bool IsNodeSupportedInternal(const Node& node, const GraphViewer& graph_viewer,
+                             const OpSupportCheckParams& params,
+                             const std::unordered_set<std::string>& node_outputs_in_group) {
+  if (!IsNodeSupported(node, graph_viewer, params))
+    return false;
+
+  // We also want to check if the node is supported as an internal quantized node
+  if (IsInternalQuantizedNode(node))
+    return IsInternalQuantizationSupported(node, node_outputs_in_group);
+  else  // This is not a internal quantized node, it is supported
+    return true;
 }
 
 std::vector<std::vector<size_t>> GetSupportedNodes(const GraphViewer& graph_viewer, const OpSupportCheckParams& params) {
-  std::vector<std::vector<size_t>> supported_node_vecs;
+  std::vector<std::vector<size_t>> supported_node_groups;
   if (params.android_sdk_ver < ORT_NNAPI_MIN_API_LEVEL) {
     LOGS_DEFAULT(WARNING) << "All ops will fallback to CPU EP, because Android API level [" << params.android_sdk_ver
                           << "] is lower than minimal supported API level [" << ORT_NNAPI_MIN_API_LEVEL
                           << "] of this build for NNAPI";
-    return supported_node_vecs;
+    return supported_node_groups;
   }
 
-  std::vector<size_t> supported_node_vec;
+  // This holds the supported node's topological index
+  std::vector<size_t> supported_node_group;
+  // This holds the NodeIndex of the nodes in the above group
+  std::unordered_set<std::string> node_outputs_in_group;
   const auto& node_indices = graph_viewer.GetNodesInTopologicalOrder();
   for (size_t i = 0; i < node_indices.size(); i++) {
     const auto* node(graph_viewer.GetNode(node_indices[i]));
-    bool supported = IsNodeSupported(*node, graph_viewer, params);
+    bool supported = IsNodeSupportedInternal(*node, graph_viewer, params, node_outputs_in_group);
     LOGS_DEFAULT(VERBOSE) << "Operator type: [" << node->OpType()
                           << "] index: [" << i
                           << "] name: [" << node->Name()
                           << "] supported: [" << supported
                           << "]";
     if (supported) {
-      supported_node_vec.push_back(i);
-    } else {
-      if (IsValidSupportedNodesVec(supported_node_vec, graph_viewer)) {
-        supported_node_vecs.push_back(supported_node_vec);
-        supported_node_vec.clear();
+      supported_node_group.push_back(i);
+
+      // We want to put all the output names of nodes in the current group for easy query
+      // See IsInternalQuantizationSupported()
+      for (const auto* output : node->OutputDefs()) {
+        node_outputs_in_group.insert(output->Name());
       }
+    } else {
+      if (IsValidSupportedNodesGroup(supported_node_group, graph_viewer)) {
+        supported_node_groups.push_back(supported_node_group);
+      }
+
+      supported_node_group.clear();
+      node_outputs_in_group.clear();
     }
   }
 
-  if (IsValidSupportedNodesVec(supported_node_vec, graph_viewer))
-    supported_node_vecs.push_back(supported_node_vec);
+  if (IsValidSupportedNodesGroup(supported_node_group, graph_viewer))
+    supported_node_groups.push_back(supported_node_group);
 
-  return supported_node_vecs;
+  return supported_node_groups;
 }
 
 std::string Shape2String(const std::vector<uint32_t>& shape) {
diff --git a/onnxruntime/core/providers/nnapi/nnapi_builtin/builders/helper.h b/onnxruntime/core/providers/nnapi/nnapi_builtin/builders/helper.h
index cad22541ce..bc9229017b 100644
--- a/onnxruntime/core/providers/nnapi/nnapi_builtin/builders/helper.h
+++ b/onnxruntime/core/providers/nnapi/nnapi_builtin/builders/helper.h
@@ -76,6 +76,7 @@ enum class QLinearOpType : uint8_t {
   QLinearConv,
   QLinearMatMul,
   QLinearAdd,
+  QLinearSigmoid,
   // Not yet supported
   // QLinearAveragePool,
   // QLinearMul,
@@ -107,6 +108,11 @@ bool HasValidQuantizationScales(const InitializedTensorSet& initializers, const
 bool HasValidQuantizationZeroPoints(const InitializedTensorSet& initializers, const Node& node,
                                     const std::vector<size_t>& indices);
 
+float GetQuantizationScale(const InitializedTensorSet& initializers, const Node& node, size_t idx);
+
+common::Status GetQuantizationZeroPoint(const InitializedTensorSet& initializers,
+                                        const Node& node, size_t idx, int32_t& zero_point) ORT_MUST_USE_RESULT;
+
 // Get initialize tensort float/int32/int64 data without unpacking
 // TODO, move to ort framework
 const float* GetTensorFloatData(const ONNX_NAMESPACE::TensorProto& tensor);
diff --git a/onnxruntime/core/providers/nnapi/nnapi_builtin/builders/model_builder.cc b/onnxruntime/core/providers/nnapi/nnapi_builtin/builders/model_builder.cc
index c9c4eb72ce..ead5a7eb1b 100644
--- a/onnxruntime/core/providers/nnapi/nnapi_builtin/builders/model_builder.cc
+++ b/onnxruntime/core/providers/nnapi/nnapi_builtin/builders/model_builder.cc
@@ -143,7 +143,13 @@ std::unordered_map<std::string, vector<const Node*>> GetAllQuantizedOpInputs(con
   for (const auto& node_idx : node_indices) {
     const auto* node(graph_viewer.GetNode(node_idx));
     auto qlinear_op_type = GetQLinearOpType(*node);
-    if (qlinear_op_type == QLinearOpType::DequantizeLinear || IsQLinearBinaryOp(qlinear_op_type)) {
+
+    // Not a qlinear op
+    if (qlinear_op_type == QLinearOpType::Unknown)
+      continue;
+
+    // All qlinear ops EXCEPT QuantizeLinear has quantized input
+    if (qlinear_op_type != QLinearOpType::QuantizeLinear) {
       const auto& input_name = node->InputDefs()[0]->Name();
       if (Contains(all_quantized_op_inputs, input_name))
         all_quantized_op_inputs.at(input_name).push_back(node);
@@ -293,7 +299,7 @@ Status ModelBuilder::RegisterModelInputs() {
           if (!Contains(all_quantized_op_inputs, input_name)) {
             // We current do not support uint8 input if it is not a quantized input
             return ORT_MAKE_STATUS(ONNXRUNTIME, INVALID_ARGUMENT,
-                                   "The input of graph doesn't have valid type, name: ", input_name,
+                                   "The input of graph has unsupported quantized type, name: ", input_name,
                                    " type: ", type_proto->tensor_type().elem_type());
           }
 
@@ -305,7 +311,7 @@ Status ModelBuilder::RegisterModelInputs() {
         default: {
           // TODO: support other type
           return ORT_MAKE_STATUS(ONNXRUNTIME, INVALID_ARGUMENT,
-                                 "The input of graph doesn't have valid type, name: ", input_name,
+                                 "The input of graph has unsupported type, name: ", input_name,
                                  " type: ", type_proto->tensor_type().elem_type());
         }
       }
@@ -369,6 +375,7 @@ void ModelBuilder::RegisterModelShaper() {
 Status ModelBuilder::AddNewOperand(const std::string& name,
                                    const OperandType& operand_type,
                                    bool is_nhwc, uint32_t& index) {
+  LOGS_DEFAULT(VERBOSE) << "operand name: " << name;
   ORT_RETURN_IF_ERROR(AddNewNNAPIOperand(operand_type, index));
   RegisterOperand(name, index, operand_type, is_nhwc);
   return Status::OK();
@@ -535,6 +542,12 @@ Status ModelBuilder::Compile(std::unique_ptr<Model>& model) {
 
 int32_t ModelBuilder::FindActivation(const Node& node, const NodeArg& output) {
   int32_t fuse_code = ANEURALNETWORKS_FUSED_NONE;
+
+  // We do not support activation fusion for quantized operators for now
+  auto qlinear_op_type = GetQLinearOpType(node);
+  if (qlinear_op_type != QLinearOpType::Unknown)
+    return fuse_code;
+
   for (auto it = node.OutputEdgesBegin(), end = node.OutputEdgesEnd(); it != end; ++it) {
     const auto& dst_node = it->GetNode();
     const auto* dst_input = dst_node.InputDefs()[it->GetDstArgIndex()];
diff --git a/onnxruntime/core/providers/nnapi/nnapi_builtin/builders/op_builder.cc b/onnxruntime/core/providers/nnapi/nnapi_builtin/builders/op_builder.cc
index 1c80a9f962..111d3ed68e 100644
--- a/onnxruntime/core/providers/nnapi/nnapi_builtin/builders/op_builder.cc
+++ b/onnxruntime/core/providers/nnapi/nnapi_builtin/builders/op_builder.cc
@@ -495,24 +495,6 @@ static Status HandleAutoPad(const Shape& input_shape,
   return Status::OK();
 }
 
-static float GetQuantizationScale(const ModelBuilder& model_builder, const Node& node, size_t idx) {
-  const auto& scale_tensor = *model_builder.GetInitializerTensors().at(node.InputDefs()[idx]->Name());
-  return GetTensorFloatData(scale_tensor)[0];
-}
-
-static Status GetQuantizationZeroPoint(const ModelBuilder& model_builder, const Node& node, size_t idx, int32_t& zero_point)
-    ORT_MUST_USE_RESULT;
-static Status GetQuantizationZeroPoint(const ModelBuilder& model_builder, const Node& node, size_t idx, int32_t& zero_point) {
-  std::unique_ptr<uint8_t[]> unpacked_tensor;
-  size_t tensor_byte_size;
-  const auto& zero_point_tensor = *model_builder.GetInitializerTensors().at(node.InputDefs()[idx]->Name());
-  ORT_RETURN_IF_ERROR(
-      onnxruntime::utils::UnpackInitializerData(zero_point_tensor, model_builder.GetGraphViewer().ModelPath(),
-                                                unpacked_tensor, tensor_byte_size));
-  zero_point = static_cast<int32_t>(unpacked_tensor.get()[0]);
-  return Status::OK();
-}
-
 // Get scales and zero points for the qlinear binary ops (which has 2 input and 1 output)
 // QLinearConv, QLinearMatmul, QLinearAdd
 // a, b are inputs, and y is output
@@ -524,13 +506,14 @@ static Status GetBinaryOpQuantizationScaleAndZeroPoint(
     const ModelBuilder& model_builder, const Node& node,
     float& a_scale, float& b_scale, float& y_scale,
     int32_t& a_zero_point, int32_t& b_zero_point, int32_t& y_zero_point) {
-  a_scale = GetQuantizationScale(model_builder, node, 1);
-  b_scale = GetQuantizationScale(model_builder, node, 4);
-  y_scale = GetQuantizationScale(model_builder, node, 6);
+  const auto& initializers = model_builder.GetInitializerTensors();
+  a_scale = GetQuantizationScale(initializers, node, 1);
+  b_scale = GetQuantizationScale(initializers, node, 4);
+  y_scale = GetQuantizationScale(initializers, node, 6);
 
-  ORT_RETURN_IF_ERROR(GetQuantizationZeroPoint(model_builder, node, 2, a_zero_point));
-  ORT_RETURN_IF_ERROR(GetQuantizationZeroPoint(model_builder, node, 5, b_zero_point));
-  ORT_RETURN_IF_ERROR(GetQuantizationZeroPoint(model_builder, node, 7, y_zero_point));
+  ORT_RETURN_IF_ERROR(GetQuantizationZeroPoint(initializers, node, 2, a_zero_point));
+  ORT_RETURN_IF_ERROR(GetQuantizationZeroPoint(initializers, node, 5, b_zero_point));
+  ORT_RETURN_IF_ERROR(GetQuantizationZeroPoint(initializers, node, 7, y_zero_point));
 
   return Status::OK();
 }
@@ -660,7 +643,8 @@ Status GetQuantizedInputScaleAndZeroPoint(const ModelBuilder& model_builder,
          qlinear_op_type != QLinearOpType::QuantizeLinear);
 
   size_t scale_idx, zero_point_idx;
-  if (qlinear_op_type == QLinearOpType::DequantizeLinear) {
+  if (qlinear_op_type == QLinearOpType::DequantizeLinear ||
+      qlinear_op_type == QLinearOpType::QLinearSigmoid) {
     scale_idx = 1;
     zero_point_idx = 2;
   } else if (IsQLinearBinaryOp(qlinear_op_type)) {
@@ -679,10 +663,10 @@ Status GetQuantizedInputScaleAndZeroPoint(const ModelBuilder& model_builder,
     return ORT_MAKE_STATUS(ONNXRUNTIME, INVALID_ARGUMENT, "Unsupported op: ", op_type);
   }
 
-  scale = GetQuantizationScale(model_builder, node, scale_idx);
+  scale = GetQuantizationScale(model_builder.GetInitializerTensors(), node, scale_idx);
   zero_point = 0;
   if (node.InputDefs().size() > 2) {
-    ORT_RETURN_IF_ERROR(GetQuantizationZeroPoint(model_builder, node, zero_point_idx, zero_point));
+    ORT_RETURN_IF_ERROR(GetQuantizationZeroPoint(model_builder.GetInitializerTensors(), node, zero_point_idx, zero_point));
   }
 
   return Status::OK();
@@ -811,7 +795,7 @@ Status BinaryOpBuilder::AddToModelBuilderImpl(ModelBuilder& model_builder, const
                                                                  a_zero_point, b_zero_point, y_zero_point));
   }
 
-  // Verify if the scale and zero point matchs from onnx input and nnapi input
+  // Verify if the scale and zero point matchs from onnx input and nnapi input match
   if (op_is_qlinear) {
     ORT_RETURN_IF_ERROR(IsValidInputQuantizedType(model_builder, input1, a_scale, a_zero_point));
     ORT_RETURN_IF_ERROR(IsValidInputQuantizedType(model_builder, input2, b_scale, b_zero_point));
@@ -1260,7 +1244,8 @@ Status PoolOpBuilder::AddToModelBuilderImpl(ModelBuilder& model_builder, const N
                                   onnx_pads, onnx_strides, kernel_shape,
                                   use_nchw,
                                   output));
-  const OperandType output_operand_type(operand_types.at(input).type, shaper[output]);
+  OperandType output_operand_type = operand_types.at(input);
+  output_operand_type.SetDimensions(shaper[output]);
   ORT_RETURN_IF_ERROR(model_builder.AddOperation(op_code, input_indices,
                                                  {output}, {output_operand_type}, {output_is_nhwc}));
   return Status::OK();
@@ -1802,12 +1787,29 @@ Status GemmOpBuilder::AddToModelBuilderImpl(ModelBuilder& model_builder, const N
 
 class UnaryOpBuilder : public BaseOpBuilder {
  public:
+  void AddInitializersToSkip(ModelBuilder& model_builder, const Node& node) const override;
   static void CreateSharedOpBuilder(const std::string& op_type, OpBuilderRegistrations& op_registrations);
 
  private:
   Status AddToModelBuilderImpl(ModelBuilder& model_builder, const Node& node) const override ORT_MUST_USE_RESULT;
 };
 
+void UnaryOpBuilder::AddInitializersToSkip(ModelBuilder& model_builder, const Node& node) const {
+  const auto& op = node.OpType();
+  if (op != "QLinearSigmoid")
+    return;
+
+  const auto input_defs = node.InputDefs();
+
+  // skip input/output scales and zeropoints
+  model_builder.AddInitializerToSkip(input_defs[1]->Name());  // X_scale
+  model_builder.AddInitializerToSkip(input_defs[2]->Name());  // X_zero_point
+  model_builder.AddInitializerToSkip(input_defs[3]->Name());  // Y_scale
+
+  if (input_defs.size() == 5)                                   // has Y_zero_point input
+    model_builder.AddInitializerToSkip(input_defs[4]->Name());  // Y_zero_point
+}
+
 /* static */ void UnaryOpBuilder::CreateSharedOpBuilder(
     const std::string& op_type, OpBuilderRegistrations& op_registrations) {
   CreateSharedOpBuilderImpl<UnaryOpBuilder>(
@@ -1822,6 +1824,7 @@ class UnaryOpBuilder : public BaseOpBuilder {
           "Sin",
           "Sqrt",
           "Tanh",
+          "QLinearSigmoid",
       });
 }
 
@@ -1836,7 +1839,7 @@ Status UnaryOpBuilder::AddToModelBuilderImpl(ModelBuilder& model_builder, const
   bool output_is_nhwc = model_builder.IsOperandNHWC(input);
 
   ORT_RETURN_IF_ERROR(shaper.Identity(input, output));
-  const OperandType output_operand_type(operand_types.at(input).type, shaper[output]);
+  bool is_qlinear_sigmoid = op_type == "QLinearSigmoid";
 
   int32_t op_code;
   if (op_type == "Abs")
@@ -1847,7 +1850,7 @@ Status UnaryOpBuilder::AddToModelBuilderImpl(ModelBuilder& model_builder, const
     op_code = ANEURALNETWORKS_FLOOR;
   else if (op_type == "Log")
     op_code = ANEURALNETWORKS_LOG;
-  else if (op_type == "Sigmoid")
+  else if (op_type == "Sigmoid" || is_qlinear_sigmoid)
     op_code = ANEURALNETWORKS_LOGISTIC;
   else if (op_type == "Neg")
     op_code = ANEURALNETWORKS_NEG;
@@ -1860,8 +1863,26 @@ Status UnaryOpBuilder::AddToModelBuilderImpl(ModelBuilder& model_builder, const
   else {
     return ORT_MAKE_STATUS(ONNXRUNTIME, INVALID_ARGUMENT, "UnaryOpBuilder, unknown op: ", op_type);
   }
+
+  float y_scale = 0.0f;
+  int32_t y_zero_point = 0;
+  if (is_qlinear_sigmoid) {
+    const auto& initializers = model_builder.GetInitializerTensors();
+    float x_scale = GetQuantizationScale(initializers, node, 1);
+    int32_t x_zero_point = 0;
+    ORT_RETURN_IF_ERROR(GetQuantizationZeroPoint(initializers, node, 2, x_zero_point));
+
+    // Verify if the scale and zero point values from onnx input and nnapi input match
+    ORT_RETURN_IF_ERROR(IsValidInputQuantizedType(model_builder, input, x_scale, x_zero_point));
+
+    // We already verified this in  UnaryOpSupportChecker::IsOpSupportedImpl
+    y_scale = 1.f / 256;
+    y_zero_point = 0;
+  }
+
   std::vector<uint32_t> input_indices;
   input_indices.push_back(operand_indices.at(input));
+  const OperandType output_operand_type(operand_types.at(input).type, shaper[output], y_scale, y_zero_point);
   ORT_RETURN_IF_ERROR(model_builder.AddOperation(op_code, input_indices,
                                                  {output}, {output_operand_type}, {output_is_nhwc}));
   return Status::OK();
@@ -1888,6 +1909,24 @@ Status ConcatOpBuilder::AddToModelBuilderImpl(ModelBuilder& model_builder, const
   bool output_is_nhwc = false;
   const auto node_input_size = node.InputDefs().size();
 
+  // First if the inputs are uint8, we need verify all the inputs have same scale and zero points
+  if (operand_types.at(input0).type == android::nn::wrapper::Type::TENSOR_QUANT8_ASYMM) {
+    auto scale = operand_types.at(input0).operandType.scale;
+    auto zero_point = operand_types.at(input0).operandType.zeroPoint;
+
+    // Compare scale and zp of input0 to input1~n
+    for (size_t i = 1; i < node_input_size; i++) {
+      const auto& type = operand_types.at(node.InputDefs()[i]->Name());
+      ORT_RETURN_IF_NOT(scale == type.operandType.scale,
+                        "Input[", i, "]'s scale: ", type.operandType.scale,
+                        " is different than input[0]'s scale: ", scale);
+
+      ORT_RETURN_IF_NOT(zero_point == type.operandType.zeroPoint,
+                        "Input[", i, "]'s zero_point: ", type.operandType.zeroPoint,
+                        " is different than input[0]'s zero_point: ", zero_point);
+    }
+  }
+
   // First we want to see if all the input are same layout
   for (size_t i = 0; i < node_input_size - 1; i++) {
     all_input_have_same_layout =
@@ -1934,7 +1973,8 @@ Status ConcatOpBuilder::AddToModelBuilderImpl(ModelBuilder& model_builder, const
 
   const auto& output = node.OutputDefs()[0]->Name();
   ORT_RETURN_IF_ERROR(shaper.Concat(inputs, axis, output));
-  const OperandType output_operand_type(operand_types.at(input0).type, shaper[output]);
+  OperandType output_operand_type = operand_types.at(input0);
+  output_operand_type.SetDimensions(shaper[output]);
   ORT_RETURN_IF_ERROR(model_builder.AddOperation(ANEURALNETWORKS_CONCATENATION, input_indices,
                                                  {output}, {output_operand_type}, {output_is_nhwc}));
   return Status::OK();
@@ -2023,12 +2063,12 @@ Status QuantizeLinearOpBuilder::AddToModelBuilderImpl(ModelBuilder& model_builde
   const auto& output = node.OutputDefs()[0]->Name();
   bool output_is_nhwc = model_builder.IsOperandNHWC(input);
 
-  float scale = GetQuantizationScale(model_builder, node, 1);
+  float scale = GetQuantizationScale(model_builder.GetInitializerTensors(), node, 1);
   int32_t zero_point = 0;
   Type output_type = Type::TENSOR_QUANT8_ASYMM;
 
   if (input_defs.size() == 3) {  // Get zero point
-    ORT_RETURN_IF_ERROR(GetQuantizationZeroPoint(model_builder, node, 2, zero_point));
+    ORT_RETURN_IF_ERROR(GetQuantizationZeroPoint(model_builder.GetInitializerTensors(), node, 2, zero_point));
   }
 
   ORT_RETURN_IF_ERROR(shaper.Identity(input, output));
@@ -2070,10 +2110,10 @@ Status DequantizeLinearOpBuilder::AddToModelBuilderImpl(ModelBuilder& model_buil
   const auto& output = node.OutputDefs()[0]->Name();
   bool output_is_nhwc = model_builder.IsOperandNHWC(input);
 
-  float scale = GetQuantizationScale(model_builder, node, 1);
+  float scale = GetQuantizationScale(model_builder.GetInitializerTensors(), node, 1);
   int32_t zero_point = 0;
   if (input_defs.size() == 3) {  // Get zero point
-    ORT_RETURN_IF_ERROR(GetQuantizationZeroPoint(model_builder, node, 2, zero_point));
+    ORT_RETURN_IF_ERROR(GetQuantizationZeroPoint(model_builder.GetInitializerTensors(), node, 2, zero_point));
   }
 
   ORT_RETURN_IF_ERROR(IsValidInputQuantizedType(model_builder, input, scale, zero_point));
@@ -2296,7 +2336,8 @@ Status ResizeOpBuilder::AddToModelBuilderImpl(ModelBuilder& model_builder, const
     }
   }
 
-  const OperandType output_operand_type(operand_types.at(input).type, output_shape);
+  OperandType output_operand_type = operand_types.at(input);
+  output_operand_type.SetDimensions(output_shape);
   ORT_RETURN_IF_ERROR(model_builder.AddOperation(operationCode, input_indices,
                                                  {output}, {output_operand_type}, {output_is_nhwc}));
 
@@ -2468,6 +2509,7 @@ static OpBuilderRegistrations CreateOpBuilderRegistrations() {
     NNAPI_EP_ADD_SHARED_OP_BUILDER("Sin", UnaryOpBuilder);
     NNAPI_EP_ADD_SHARED_OP_BUILDER("Sqrt", UnaryOpBuilder);
     NNAPI_EP_ADD_SHARED_OP_BUILDER("Tanh", UnaryOpBuilder);
+    NNAPI_EP_ADD_SHARED_OP_BUILDER("QLinearSigmoid", UnaryOpBuilder);
   }
 
   NNAPI_EP_ADD_SINGLE_OP_BUILDER("Concat", ConcatOpBuilder);
diff --git a/onnxruntime/core/providers/nnapi/nnapi_builtin/builders/op_support_checker.cc b/onnxruntime/core/providers/nnapi/nnapi_builtin/builders/op_support_checker.cc
index 8c3e000315..2caa7ab323 100644
--- a/onnxruntime/core/providers/nnapi/nnapi_builtin/builders/op_support_checker.cc
+++ b/onnxruntime/core/providers/nnapi/nnapi_builtin/builders/op_support_checker.cc
@@ -321,6 +321,8 @@ class TransposeOpSupportChecker : public BaseOpSupportChecker {
   int32_t GetMinSupportedSdkVer(const Node& /* node */, const OpSupportCheckParams& /* params */) const override {
     return 28;
   }
+
+  bool HasSupportedInputsImpl(const Node& node) const override;
 };
 
 bool TransposeOpSupportChecker::IsOpSupportedImpl(const InitializedTensorSet& /* initializers */, const Node& node,
@@ -339,6 +341,22 @@ bool TransposeOpSupportChecker::IsOpSupportedImpl(const InitializedTensorSet& /*
   return true;
 }
 
+bool TransposeOpSupportChecker::HasSupportedInputsImpl(const Node& node) const {
+  int32_t input_type;
+  if (!GetType(*node.InputDefs()[0], input_type))
+    return false;
+
+  if (input_type != ONNX_NAMESPACE::TensorProto_DataType_FLOAT &&
+      input_type != ONNX_NAMESPACE::TensorProto_DataType_UINT8) {
+    LOGS_DEFAULT(VERBOSE) << "[" << node.OpType()
+                          << "] Input type: [" << input_type
+                          << "] is not supported for now";
+    return false;
+  }
+
+  return true;
+}
+
 #pragma endregion
 
 #pragma region op_reshape
@@ -465,6 +483,8 @@ class PoolOpSupportChecker : public BaseOpSupportChecker {
   int32_t GetMinSupportedSdkVer(const Node& /* node */, const OpSupportCheckParams& params) const override {
     return params.use_nchw ? 29 : 28;
   }
+
+  bool HasSupportedInputsImpl(const Node& node) const override;
 };
 
 /* static */ void PoolOpSupportChecker::CreateSharedOpSupportChecker(
@@ -537,6 +557,25 @@ bool PoolOpSupportChecker::IsOpSupportedImpl(const InitializedTensorSet& /* init
   return true;
 }
 
+bool PoolOpSupportChecker::HasSupportedInputsImpl(const Node& node) const {
+  if (node.OpType() != "MaxPool")
+    return BaseOpSupportChecker::HasSupportedInputsImpl(node);
+
+  int32_t input_type;
+  if (!GetType(*node.InputDefs()[0], input_type))
+    return false;
+
+  if (input_type != ONNX_NAMESPACE::TensorProto_DataType_FLOAT &&
+      input_type != ONNX_NAMESPACE::TensorProto_DataType_UINT8) {
+    LOGS_DEFAULT(VERBOSE) << "[" << node.OpType()
+                          << "] Input type: [" << input_type
+                          << "] is not supported for now";
+    return false;
+  }
+
+  return true;
+}
+
 #pragma endregion op_pool
 
 #pragma region op_conv
@@ -917,11 +956,17 @@ class UnaryOpSupportChecker : public BaseOpSupportChecker {
       const std::string& op_type, OpSupportCheckerRegistrations& op_registrations);
 
  private:
+  bool IsOpSupportedImpl(const InitializedTensorSet& initializers, const Node& node,
+                         const OpSupportCheckParams& params) const override;
+
   int32_t GetMinSupportedSdkVer(const Node& node, const OpSupportCheckParams& params) const override;
 
-  // All ops except "Sin" opset 5- uses consumed_inputs attribute which is not supported for now
-  // "Sin" op has support from opset 7, return 6 here for all ops
-  int GetMinSupportedOpSet(const Node& /* node */) const override { return 6; }
+  bool HasSupportedInputsImpl(const Node& node) const override;
+
+  int GetMinSupportedOpSet(const Node& node) const override;
+
+  static bool IsQuantizedOpSupported(const InitializedTensorSet& initializers, const Node& node,
+                                     const OpSupportCheckParams& params);
 };
 
 /* static */ void UnaryOpSupportChecker::CreateSharedOpSupportChecker(
@@ -938,9 +983,18 @@ class UnaryOpSupportChecker : public BaseOpSupportChecker {
           "Sin",
           "Sqrt",
           "Tanh",
+          "QLinearSigmoid",
       });
 }
 
+bool UnaryOpSupportChecker::IsOpSupportedImpl(const InitializedTensorSet& initializers, const Node& node,
+                                              const OpSupportCheckParams& params) const {
+  if (node.OpType() == "QLinearSigmoid")
+    return IsQuantizedOpSupported(initializers, node, params);
+  else  // Everything except "QLinearSigmoid" are by default supported
+    return true;
+}
+
 int32_t UnaryOpSupportChecker::GetMinSupportedSdkVer(
     const Node& node, const OpSupportCheckParams& /* params */) const {
   const auto& op(node.OpType());
@@ -956,6 +1010,86 @@ int32_t UnaryOpSupportChecker::GetMinSupportedSdkVer(
   return 27;
 }
 
+bool UnaryOpSupportChecker::HasSupportedInputsImpl(const Node& node) const {
+  // We only need to override input check for QLinearSigmoid
+  if (node.OpType() != "QLinearSigmoid")
+    return BaseOpSupportChecker::HasSupportedInputsImpl(node);
+
+  int32_t input_type;
+  if (!GetType(*node.InputDefs()[0], input_type))
+    return false;
+
+  if (input_type != ONNX_NAMESPACE::TensorProto_DataType_UINT8) {
+    LOGS_DEFAULT(VERBOSE) << "[" << node.OpType()
+                          << "] Input type: [" << input_type
+                          << "] is not supported for now";
+    return false;
+  }
+
+  return true;
+}
+
+// All ops except "Sin" opset 5- uses consumed_inputs attribute which is not supported for now
+// "Sin" op has support from opset 7, return 6 here for all ops
+// "QLinearSigmoid" is a contrib op, OpSet will always be 1
+int UnaryOpSupportChecker::GetMinSupportedOpSet(const Node& node) const {
+  if (node.OpType() == "QLinearSigmoid")
+    return 1;
+
+  return 6;
+}
+
+/* static */ bool UnaryOpSupportChecker::IsQuantizedOpSupported(
+    const InitializedTensorSet& initializers, const Node& node, const OpSupportCheckParams& params) {
+  const auto& op_type = node.OpType();
+  ORT_ENFORCE(op_type == "QLinearSigmoid");
+
+  const auto& op_name = node.Name();
+  const auto input_defs(node.InputDefs());
+  // const auto output_defs(node.OutputDefs());
+
+  if (input_defs.size() < 4)
+    return false;
+
+  bool has_output_zp = input_defs.size() == 5;
+
+  if (!HasValidQuantizationScales(initializers, node, {1, 3}, params))
+    return false;
+
+  if (!HasValidQuantizationZeroPoints(initializers, node,
+                                      has_output_zp
+                                          ? std::vector<size_t>{2}
+                                          : std::vector<size_t>{2, 4}))
+    return false;
+
+  // NNAPI requires the scale be 1.f/256 and zero point to be 0
+  // See https://android.googlesource.com/platform/frameworks/ml/+/refs/heads/android10-c2f2-release/nn/common/operations/Activation.cpp#180
+  auto output_scale = GetQuantizationScale(initializers, node, 3);
+  if (output_scale != 1.f / 256) {
+    LOGS_DEFAULT(VERBOSE) << "Op [" << op_type << "] name [" << op_name
+                          << "] output scale can only be 1.f/256, actual scale: " << output_scale;
+    return false;
+  }
+
+  int32_t output_zp;
+  if (has_output_zp) {
+    auto status = GetQuantizationZeroPoint(initializers, node, 4, output_zp);
+    if (!status.IsOK()) {
+      LOGS_DEFAULT(ERROR) << "Op [" << op_type << "] name [" << op_name
+                          << "] GetQuantizationZeroPoint failed, message: " << status.ErrorMessage();
+      return false;
+    }
+
+    if (output_zp != 0) {
+      LOGS_DEFAULT(VERBOSE) << "Op [" << op_type << "] name [" << op_name
+                            << "] output zero point can only be 0, actual zero point: " << output_scale;
+      return false;
+    }
+  }
+
+  return true;
+}
+
 #pragma endregion
 
 #pragma region op_concat
@@ -964,6 +1098,8 @@ class ConcatOpSupportChecker : public BaseOpSupportChecker {
  private:
   bool IsOpSupportedImpl(const InitializedTensorSet& initializers, const Node& node,
                          const OpSupportCheckParams& params) const override;
+
+  bool HasSupportedInputsImpl(const Node& node) const override;
 };
 
 bool ConcatOpSupportChecker::IsOpSupportedImpl(const InitializedTensorSet& /* initializers */, const Node& node,
@@ -982,6 +1118,22 @@ bool ConcatOpSupportChecker::IsOpSupportedImpl(const InitializedTensorSet& /* in
   return true;
 }
 
+bool ConcatOpSupportChecker::HasSupportedInputsImpl(const Node& node) const {
+  int32_t input_type;
+  if (!GetType(*node.InputDefs()[0], input_type))
+    return false;
+
+  if (input_type != ONNX_NAMESPACE::TensorProto_DataType_FLOAT &&
+      input_type != ONNX_NAMESPACE::TensorProto_DataType_UINT8) {
+    LOGS_DEFAULT(VERBOSE) << "[" << node.OpType()
+                          << "] Input type: [" << input_type
+                          << "] is not supported for now";
+    return false;
+  }
+
+  return true;
+}
+
 #pragma endregion
 
 #pragma region op_squeeze
@@ -1172,13 +1324,13 @@ class ResizeOpSupportChecker : public BaseOpSupportChecker {
   bool IsOpSupportedImpl(const InitializedTensorSet& initializers, const Node& node,
                          const OpSupportCheckParams& params) const override;
 
-  int32_t GetMinSupportedSdkVer(const Node& /* node */, const OpSupportCheckParams& /* params */) const override {
-    return 28;
-  }
+  int32_t GetMinSupportedSdkVer(const Node& /* node */, const OpSupportCheckParams& /* params */) const override;
 
   // Resize opset 10- is very different than Resize opset 11+, with many key attributes missing
   // We only support Resize opset 11+ here
   int GetMinSupportedOpSet(const Node& /* node */) const override { return 11; }
+
+  bool HasSupportedInputsImpl(const Node& node) const override;
 };
 
 bool ResizeOpSupportChecker::IsOpSupportedImpl(const InitializedTensorSet& initializers, const Node& node,
@@ -1291,6 +1443,35 @@ bool ResizeOpSupportChecker::IsOpSupportedImpl(const InitializedTensorSet& initi
   return true;
 }
 
+int32_t ResizeOpSupportChecker::GetMinSupportedSdkVer(const Node& node, const OpSupportCheckParams& /* params */) const {
+  int32_t input_type;
+
+  // This should not happen, but if it happens make sure this will require an impossible version
+  if (!GetType(*node.InputDefs()[0], input_type))
+    return std::numeric_limits<int32_t>::max();
+
+  if (input_type != ONNX_NAMESPACE::TensorProto_DataType_UINT8)
+    return 29;
+
+  return 28;
+}
+
+bool ResizeOpSupportChecker::HasSupportedInputsImpl(const Node& node) const {
+  int32_t input_type;
+  if (!GetType(*node.InputDefs()[0], input_type))
+    return false;
+
+  if (input_type != ONNX_NAMESPACE::TensorProto_DataType_FLOAT &&
+      input_type != ONNX_NAMESPACE::TensorProto_DataType_UINT8) {
+    LOGS_DEFAULT(VERBOSE) << "[" << node.OpType()
+                          << "] Input type: [" << input_type
+                          << "] is not supported for now";
+    return false;
+  }
+
+  return true;
+}
+
 #pragma endregion
 
 #pragma region op_flatten
@@ -1439,6 +1620,7 @@ static OpSupportCheckerRegistrations CreateOpSupportCheckerRegistrations() {
     NNAPI_EP_ADD_SHARED_OP_SUPPORT_CHECKER("Sin", UnaryOpSupportChecker);
     NNAPI_EP_ADD_SHARED_OP_SUPPORT_CHECKER("Sqrt", UnaryOpSupportChecker);
     NNAPI_EP_ADD_SHARED_OP_SUPPORT_CHECKER("Tanh", UnaryOpSupportChecker);
+    NNAPI_EP_ADD_SHARED_OP_SUPPORT_CHECKER("QLinearSigmoid", UnaryOpSupportChecker);
   }
 
   NNAPI_EP_ADD_SINGLE_OP_SUPPORT_CHECKER("Concat", ConcatOpSupportChecker);
diff --git a/onnxruntime/test/contrib_ops/qlinear_lookup_table_test.cc b/onnxruntime/test/contrib_ops/qlinear_lookup_table_test.cc
index fcf2b5b6c6..e005f53afe 100644
--- a/onnxruntime/test/contrib_ops/qlinear_lookup_table_test.cc
+++ b/onnxruntime/test/contrib_ops/qlinear_lookup_table_test.cc
@@ -89,5 +89,31 @@ TEST(QLinearLookupTableBasedOperatorTests, QLinearSigmoid_UInt8) {
   std::fesetround(origin_round_mode);
 }
 
+// NNAPI can only take 0 as Y_zero_point
+TEST(QLinearLookupTableBasedOperatorTests, QLinearSigmoid_UInt8_0_Y_ZP) {
+  auto run_test = [](bool scales_and_zp_are_initializers) {
+    OpTester test("QLinearSigmoid", 1, onnxruntime::kMSDomain);
+    float X_scale = 0.025f;
+    uint8_t X_zero_point = 128;
+    float Y_scale = 1.0f / 256.0f;
+    uint8_t Y_zero_point = 0;
+
+    std::vector<int64_t> dims = {16};
+    test.AddInput<uint8_t>("X", dims, {0, 16, 17, 18, 19, 90, 91, 127, 128, 136, 137, 138, 216, 217, 218, 255});
+    test.AddInput<float>("X_scale", {}, {X_scale}, scales_and_zp_are_initializers);
+    test.AddInput<uint8_t>("X_zero_point", {}, {X_zero_point}, scales_and_zp_are_initializers);
+    test.AddInput<float>("Y_scale", {}, {Y_scale}, scales_and_zp_are_initializers);
+    test.AddInput<uint8_t>("Y_zero_point", {}, {Y_zero_point}, scales_and_zp_are_initializers);
+    test.AddOutput<uint8_t>("Y", dims, {10, 15, 15, 15, 16, 71, 73, 126, 128, 141, 142, 144, 230, 231, 232, 246});
+    auto origin_round_mode = std::fegetround();
+    std::fesetround(FE_TONEAREST);
+    test.Run();
+    std::fesetround(origin_round_mode);
+  };
+
+  run_test(false);
+  run_test(true);
+}
+
 }  // namespace test
 }  // namespace onnxruntime
diff --git a/onnxruntime/test/providers/nnapi/nnapi_basic_test.cc b/onnxruntime/test/providers/nnapi/nnapi_basic_test.cc
index 1b5c5343c3..07487edd12 100644
--- a/onnxruntime/test/providers/nnapi/nnapi_basic_test.cc
+++ b/onnxruntime/test/providers/nnapi/nnapi_basic_test.cc
@@ -72,6 +72,39 @@ TEST(NnapiExecutionProviderTest, ReshapeFlattenTest) {
 #endif
 }
 
+// This is to test the uint8 handling of operators without "QLinear" such as Concat and Transpose
+// NNAPI will require scale and zero point for inputs of all quantized operations
+// For these operators without "Qlinear", there is no information about the scale and zero point, we can
+// only fetch these from the output of the previous node
+// So uint8 support of these operators will only be enabled when they are internal to the graph
+// by not consuming graph inputs
+TEST(NnapiExecutionProviderTest, InternalUint8SupportTest) {
+  const ORTCHAR_T* model_file_name = ORT_TSTR("testdata/nnapi_internal_uint8_support.onnx");
+
+#if defined(__ANDROID__)
+  std::vector<int64_t> dims_x = {1, 3};
+  std::vector<float> values_x = {0.0f, 256.0f, 512.0f};
+  OrtValue ml_value_x;
+  CreateMLValue<float>(TestNnapiExecutionProvider()->GetAllocator(0, OrtMemTypeDefault), dims_x, values_x,
+                       &ml_value_x);
+  NameMLValMap feeds;
+  feeds.insert(std::make_pair("X", ml_value_x));
+
+  RunAndVerifyOutputsWithEP(model_file_name, "NnapiExecutionProviderTest.InternalUint8SupportTest",
+                            onnxruntime::make_unique<NnapiExecutionProvider>(0),
+                            feeds);
+#else
+  // test load only
+  SessionOptions so;
+  InferenceSessionWrapper session_object{so, GetEnvironment()};
+  ASSERT_STATUS_OK(session_object.RegisterExecutionProvider(onnxruntime::make_unique<NnapiExecutionProvider>(0)));
+  ASSERT_STATUS_OK(session_object.Load(model_file_name));
+  ASSERT_STATUS_OK(session_object.Initialize());
+  ASSERT_GT(CountAssignedNodes(session_object.GetGraph(), kNnapiExecutionProvider), 0)
+      << "Some nodes should have been taken by the NNAPI EP";
+#endif
+}
+
 #if defined(__ANDROID__)
 // This is to verify the op_builders and op_support_checkers are consistent
 TEST(NnapiExecutionProviderTest, CreateOpBuilderAndOpSupportCheckerTest) {
diff --git a/onnxruntime/test/testdata/nnapi_internal_uint8_support.onnx b/onnxruntime/test/testdata/nnapi_internal_uint8_support.onnx
new file mode 100644
index 0000000000..daee608f7c
Binary files /dev/null and b/onnxruntime/test/testdata/nnapi_internal_uint8_support.onnx differ
diff --git a/onnxruntime/test/testdata/nnapi_internal_uint8_support.py b/onnxruntime/test/testdata/nnapi_internal_uint8_support.py
new file mode 100644
index 0000000000..f419df2dc7
--- /dev/null
+++ b/onnxruntime/test/testdata/nnapi_internal_uint8_support.py
@@ -0,0 +1,39 @@
+import onnx
+from onnx import helper
+from onnx import TensorProto
+
+
+# This is to test the operators without "Qlinear" support but still support uint8 input
+# These operators need to be internal to a graph/partition
+# def GenerateModel(model_name):
+def GenerateModel(model_name):
+    nodes = [
+        helper.make_node("QuantizeLinear", ["X", "Scale", "Zero_point"], ["X_quantized"], "quantize"),
+        helper.make_node("Concat", ["X_quantized", "X_quantized"], ["X_concat"], axis=0, name="concat"),
+        helper.make_node("Transpose", ["X_concat"], ["X_transposed"], "transpose"),
+        helper.make_node("DequantizeLinear", ["X_transposed", "Scale", "Zero_point"], ["Y"], "dequantize"),
+    ]
+
+    initializers = [
+        helper.make_tensor('Scale', TensorProto.FLOAT, [1], [256.0]),
+        helper.make_tensor('Zero_point', TensorProto.UINT8, [1], [0]),
+    ]
+
+    inputs = [
+        helper.make_tensor_value_info('X', TensorProto.FLOAT, [1, 3]),
+    ]
+
+    graph = helper.make_graph(
+        nodes,
+        "NNAPI_Internal_uint8_Test",
+        inputs,
+        [helper.make_tensor_value_info('Y', TensorProto.FLOAT, [3, 2])],
+        initializers
+    )
+
+    model = helper.make_model(graph)
+    onnx.save(model, model_name)
+
+
+if __name__ == "__main__":
+    GenerateModel('nnapi_internal_uint8_support.onnx')