diff --git a/onnxruntime/core/framework/allocation_planner.cc b/onnxruntime/core/framework/allocation_planner.cc
index f7c3fbc76a..8c72823f88 100644
--- a/onnxruntime/core/framework/allocation_planner.cc
+++ b/onnxruntime/core/framework/allocation_planner.cc
@@ -427,6 +427,15 @@ class PlannerImpl {
     plan_.allocation_plan.resize(num_ml_values);
   }
 
+  bool ExternalOutputs(const Node& node) const {
+    const KernelCreateInfo& ci = GetKernelCreateInfo(kernel_create_info_map_, node.Index());
+    if (ci.kernel_def == nullptr) {
+      return false;
+    }
+
+    return ci.kernel_def->ExternalOutputs();
+  }
+
   Status ComputeUseCounts() {
     // Note: for every ml-value, its definition must appear before all its uses in a topological sort of a valid model
     std::unordered_set<std::string> graph_inputs;
@@ -511,12 +520,14 @@ class PlannerImpl {
 
       auto outputs = pnode->OutputDefs();
       auto num_outputs = outputs.size();
+      bool external_outputs = ExternalOutputs(*pnode);
       for (size_t i = 0; i < num_outputs; ++i) {
         auto* node_output = outputs[i];
         if (!node_output->Exists()) continue;
         OrtValueIndex index = Index(node_output->Name());
         ProcessDef(index, node_output);
-        ++UseCount(index);
+        // Ensures external outputs will not be reused.
+        UseCount(index) += (external_outputs ? 2 : 1);
         auto allocator = exec_provider->GetAllocator(0, p_kernel_def->OutputMemoryType(i));
         ORT_ENFORCE(allocator);
         plan_.SetLocation(static_cast<size_t>(index),
@@ -600,15 +611,6 @@ class PlannerImpl {
     return Status::OK();
   }
 
-  bool ExternalOutputs(const Node& node) const {
-    const KernelCreateInfo& ci = GetKernelCreateInfo(kernel_create_info_map_, node.Index());
-    if (ci.kernel_def == nullptr) {
-      return false;
-    }
-
-    return ci.kernel_def->ExternalOutputs();
-  }
-
   // Should only be used after ProcessDef()
   Status ComputeReusePlan() {
     std::vector<SequentialExecutionPlan::NodeExecutionPlan>& execution_plan(plan_.execution_plan);
diff --git a/onnxruntime/test/framework/allocation_planner_test.cc b/onnxruntime/test/framework/allocation_planner_test.cc
index 078f284f0e..8b35ca101a 100644
--- a/onnxruntime/test/framework/allocation_planner_test.cc
+++ b/onnxruntime/test/framework/allocation_planner_test.cc
@@ -156,6 +156,7 @@ class PlannerTest : public ::testing::Test {
 
   std::unique_ptr<::onnxruntime::KernelDef> std_kernel_;       // a unary kernel with no-aliasing and no-in-place
   std::unique_ptr<::onnxruntime::KernelDef> in_place_kernel_;  // a unary kernel with in-place
+  std::unique_ptr<::onnxruntime::KernelDef> external_outputs_kernel_; // an unary kernel with external outputs
 
   std::unordered_map<std::string, onnxruntime::NodeArg*> name_to_arg_;
   std::vector<std::unique_ptr<UnaryNode>> nodes_;
@@ -178,6 +179,8 @@ class PlannerTest : public ::testing::Test {
     std_kernel_ = KernelDefBuilder().SetName("Transpose").Provider(kCpuExecutionProvider).SinceVersion(1, 10).Build();
     in_place_kernel_ =
         KernelDefBuilder().SetName("Relu").Provider(kCpuExecutionProvider).SinceVersion(1, 10).MayInplace(0, 0).Build();
+    external_outputs_kernel_ =
+        KernelDefBuilder().SetName("Tanh").Provider(kCpuExecutionProvider).SinceVersion(1, 10).ExternalOutputs().Build();
     CPUExecutionProviderInfo epi;
     auto execution_provider = onnxruntime::make_unique<CPUExecutionProvider>(epi);
     execution_providers_.Add("CPUExecutionProvider", std::move(execution_provider));
@@ -209,6 +212,10 @@ class PlannerTest : public ::testing::Test {
     return AddNode(*in_place_kernel_, input, output);
   }
 
+  onnxruntime::Node* AddExternalOutputsNode(std::string& input, std::string& output) {
+    return AddNode(*external_outputs_kernel_, input, output);
+  }
+
   void BindKernel(onnxruntime::Node* p_node, ::onnxruntime::KernelDef& kernel_def, KernelRegistry* reg,
                   std::unordered_map<NodeIndex, gsl::not_null<const KernelCreateInfo*>>& kernel_create_info_map) {
     const IExecutionProvider* ep = execution_providers_.Get(*p_node);
@@ -403,6 +410,35 @@ TEST_F(PlannerTest, InPlaceTest) {
   CheckFreed(2, {X2});
 }
 
+TEST_F(PlannerTest, ExternalOutputsTest) {
+  // tensor variables:
+  std::string X1("X1"), X2("X2"), X3("X3"), X4("X4");
+
+  // graph structure:
+  AddExternalOutputsNode(X1, X2);   // external-outputs operator; X1: input; X2: temporary
+  AddNormalNode(X2, X3);  // normal operator; X3: temporary
+  AddNormalNode(X3, X4);   // normal operator; X4: output
+
+  // simulate shape-inference results:
+  Shape shape1{"M", "N"};
+  auto shape = &shape1.value;
+  SetShape({{X1, shape}, {X2, shape}, {X3, shape}, {X4, shape}});
+
+  CreatePlan();
+
+  // check allocation kind:
+  CheckAllocKind(X1, AllocKind::kPreExisting);
+  CheckAllocKind(X2, AllocKind::kPreExisting);
+  CheckAllocKind(X3, AllocKind::kAllocate);
+  CheckAllocKind(X4, AllocKind::kAllocateOutput);
+
+  // check each ml-value is freed at appropriate step
+  // X2 will not be reused and will not be freed. X3 will be allocated and will be freed.
+  CheckFreed(0, {});
+  CheckFreed(1, {});
+  CheckFreed(2, {X3});
+}
+
 // InPlaceSizeMismatchTest: Check that Inplace reuse is not allowed when sizes don't match.
 // Also tests reuse of disjoint lifetime tensors.
 TEST_F(PlannerTest, InPlaceSizeMismatchTest) {