diff --git a/onnxruntime/core/providers/nnapi/nnapi_builtin/nnapi_execution_provider.cc b/onnxruntime/core/providers/nnapi/nnapi_builtin/nnapi_execution_provider.cc
index e6ff9ba6fb..11f771d007 100644
--- a/onnxruntime/core/providers/nnapi/nnapi_builtin/nnapi_execution_provider.cc
+++ b/onnxruntime/core/providers/nnapi/nnapi_builtin/nnapi_execution_provider.cc
@@ -175,6 +175,27 @@ NnapiExecutionProvider::GetCapability(const onnxruntime::GraphViewer& graph_view
   result = utils::CreateSupportedPartitions(graph_viewer, is_node_supported, on_group_closed,
                                             gen_metadef_name, NNAPI, kNnapiExecutionProvider);
 
+  // Generally, NNAPI support graph with inputs and outputs except constant initializer.
+  // So far, we have a few cases that sub-graph has zero inputs,
+  // a) A sub-graph has only initializer as inputs
+  // b) A sub-graph has zero inputs
+  // So we just remove these sub-graph which is captured by NNAPI.
+  // A existing example is CastLike, as which can't be fold in constant folding pass.
+  // CastLike Op will be inlined into Cast after Pass transform.
+  // Can we remove it if support CastLike in CF or support Pass transform after InlineNodes?
+  std::for_each(result.begin(), result.end(), [&graph_viewer](auto& capability) {
+    if (capability && capability->sub_graph && capability->sub_graph->GetMetaDef()) {
+      const auto* meta_def = capability->sub_graph->GetMetaDef();
+      bool not_empty_inputs = std::any_of(meta_def->inputs.begin(), meta_def->inputs.end(), [&graph_viewer](const auto& input) {
+        return !graph_viewer.IsConstantInitializer(input, true);
+      });
+
+      if (!not_empty_inputs || meta_def->outputs.empty()) {
+        capability.reset();
+      }
+    }
+  });
+
   const auto num_of_partitions = result.size();
   const auto num_of_supported_nodes = std::accumulate(
       result.begin(), result.end(), size_t{0},
@@ -298,7 +319,7 @@ common::Status NnapiExecutionProvider::Compile(const std::vector<FusedNodeAndGra
 
     compute_info.compute_func = [](FunctionState state, const OrtApi* /* api */, OrtKernelContext* context) {
       Ort::KernelContext ctx(context);
-      
+
       nnapi::Model* model = reinterpret_cast<nnapi::Model*>(state);
       const size_t num_inputs = ctx.GetInputCount();
       const size_t num_outputs = ctx.GetOutputCount();
diff --git a/onnxruntime/core/providers/partitioning_utils.cc b/onnxruntime/core/providers/partitioning_utils.cc
index 85b95bd638..d537a4cf58 100644
--- a/onnxruntime/core/providers/partitioning_utils.cc
+++ b/onnxruntime/core/providers/partitioning_utils.cc
@@ -96,9 +96,9 @@ std::vector<std::vector<const Node*>> CreateSupportedPartitionNodeGroups(
   ORT_ENFORCE(is_node_supported_fn, "Node support test is required.");
 
   /*
-  * NOTE: when making change here PLEASE update the logic that replicates the C++ partitioning in 
-  * /tools/python/util/mobile_helpers/usability_checker.py:check_partitioning
-  */
+   * NOTE: when making change here PLEASE update the logic that replicates the C++ partitioning in
+   * /tools/python/util/mobile_helpers/usability_checker.py:check_partitioning
+   */
   std::vector<std::vector<const Node*>> supported_groups{};
 
   // number of inputs from unprocessed nodes (in-degree) per node
diff --git a/onnxruntime/test/providers/nnapi/nnapi_basic_test.cc b/onnxruntime/test/providers/nnapi/nnapi_basic_test.cc
index dbf71d00b4..106bc632a3 100644
--- a/onnxruntime/test/providers/nnapi/nnapi_basic_test.cc
+++ b/onnxruntime/test/providers/nnapi/nnapi_basic_test.cc
@@ -16,6 +16,7 @@
 #include "test/util/include/inference_session_wrapper.h"
 #include "test/util/include/test/test_environment.h"
 #include "test/util/include/test_utils.h"
+#include "core/framework/data_types_internal.h"
 
 #if !defined(ORT_MINIMAL_BUILD)
 // if this is a full build we need the provider test utils
@@ -504,6 +505,18 @@ TEST(NnapiExecutionProviderTest, TestQDQMatMul) {
                   {ExpectedEPNodeAssignment::All});
 }
 
+// zero inputs test
+TEST(NnapiExecutionProviderTest, TestCast) {
+  std::vector<int64_t> input1_shape{1, 2, 3, 4};
+  auto build_func = [input1_shape](ModelTestBuilder& builder) {
+    auto* input_arg = builder.MakeInitializer<float>(input1_shape, -100.f, 100.f);
+    auto* output_arg = builder.MakeOutput();
+
+    builder.AddNode("CastLike", {input_arg, input_arg}, {output_arg});
+  };
+  RunQDQModelTest(build_func, "nnapi_qdq_test_graph_cast", {ExpectedEPNodeAssignment::None});
+}
+
 #endif  // !(ORT_MINIMAL_BUILD)
 
 TEST(NnapiExecutionProviderTest, NNAPIFlagsTest) {