diff --git a/cmake/onnxruntime_framework.cmake b/cmake/onnxruntime_framework.cmake
index 67dc3a28ef..aea834e8ff 100644
--- a/cmake/onnxruntime_framework.cmake
+++ b/cmake/onnxruntime_framework.cmake
@@ -18,7 +18,7 @@ set_target_properties(onnxruntime_framework PROPERTIES FOLDER "ONNXRuntime")
 add_dependencies(onnxruntime_framework ${onnxruntime_EXTERNAL_DEPENDENCIES})
 
 if (onnxruntime_DEBUG_NODE_INPUTS_OUTPUTS)
-  target_compile_definitions(onnxruntime_framework PRIVATE DEBUG_NODE_INPUTS_OUTPUTS)
+  target_compile_definitions(onnxruntime_framework PRIVATE DEBUG_NODE_INPUTS_OUTPUTS=${onnxruntime_DEBUG_NODE_INPUTS_OUTPUTS})
 endif()
 
 
diff --git a/onnxruntime/core/framework/utils.cc b/onnxruntime/core/framework/utils.cc
index 166804ae2c..85d7dc8aa1 100644
--- a/onnxruntime/core/framework/utils.cc
+++ b/onnxruntime/core/framework/utils.cc
@@ -557,15 +557,15 @@ static void DumpTensor(const Tensor& tensor, const TensorShape& shape) {
   auto data = tensor.DataAsSpan<T>();
 
   auto print_val = [](const T& value) {
-    if (std::is_floating_point_v<T>)
+    if (std::is_floating_point<T>::value)
       std::cout << std::setprecision(8) << value;
     else
       std::cout << value;
   };
 
-  for (int row = 0; row < num_rows; ++row) {
+  for (size_t row = 0; row < num_rows; ++row) {
     print_val(data[row * row_size]);
-    for (int i = 1; i < row_size; ++i) {
+    for (size_t i = 1; i < row_size; ++i) {
       std::cout << ", ";
       print_val(data[row * row_size + i]);
     }
@@ -622,22 +622,24 @@ void DumpNodeOutputs(OpKernelContext& context, const Node& node, const SessionSt
       if (type) {
         if (type->IsTensorType()) {
           const auto& tensor = *context.Output<Tensor>(i);
-          const auto data_type = tensor.DataType();
           const auto& shape = tensor.Shape();
 
           std::cout << " Shape: " << shape << "\n";
 
-          // check tensor is on CPU before dumping it
-          auto& tensor_location = tensor.Location();
-          auto* provider = execution_providers.Get(tensor_location);
-          if (!provider) {
-            provider = cpu_execution_provider;
-          }
+          if (DEBUG_NODE_INPUTS_OUTPUTS > 1) {
+            // check tensor is on CPU before dumping it
+            auto& tensor_location = tensor.Location();
+            auto* provider = execution_providers.Get(tensor_location);
+            if (!provider) {
+              provider = cpu_execution_provider;
+            }
 
-          if (provider == cpu_execution_provider || tensor_location.mem_type == OrtMemTypeCPUOutput) {
-            DispatchOnTensorType(data_type, DumpTensor, tensor, shape);
-          } else {
-            std::cout << " is not on CPU. Provider=" << provider->Type() << "\n";
+            if (provider == cpu_execution_provider || tensor_location.mem_type == OrtMemTypeCPUOutput) {
+              const auto data_type = tensor.DataType();
+              DispatchOnTensorType(data_type, DumpTensor, tensor, shape);
+            } else {
+              std::cout << " is not on CPU. Provider=" << provider->Type() << "\n";
+            }
           }
         } else {
           std::cout << " is non-tensor type.\n";
diff --git a/onnxruntime/core/framework/utils.h b/onnxruntime/core/framework/utils.h
index 21f77075bb..a33cde9430 100644
--- a/onnxruntime/core/framework/utils.h
+++ b/onnxruntime/core/framework/utils.h
@@ -78,8 +78,11 @@ common::Status ExecuteSubgraph(const SessionState& session_state, const FeedsFet
                                ExecutionMode execution_mode, const bool& terminate_flag, const logging::Logger& logger);
 
 #if defined(DEBUG_NODE_INPUTS_OUTPUTS)
-// to create a build with these enabled run the build script with
-//   --cmake_extra_defines onnxruntime_DEBUG_NODE_INPUTS_OUTPUTS=ON
+// to create a build with these enabled run the build script with 1 to dump just shapes, or 2 to dump shapes and data
+// e.g.
+//   --cmake_extra_defines onnxruntime_DEBUG_NODE_INPUTS_OUTPUTS=1
+// To unset you'll need to either delete CMakeCache.txt or run with
+//   --cmake_extra_defines onnxruntime_DEBUG_NODE_INPUTS_OUTPUTS=0
 void DumpNodeInputs(const OpKernelContext& context, const Node& node);
 void DumpNodeOutputs(OpKernelContext& context, const Node& node, const SessionState& session_state);
 #endif