[WebNN EP] Remove NHWC preferred layout (#21570)

Currently WebNN CPU backend has supported NCHW layout in Chromium, we can now drop NHWC preferred layout for CPU backend in WebNN EP to simplify the code.
2026-05-14 20:48:00 +00:00 · 2024-08-29 04:17:34 +08:00 · 2024-08-29 04:17:34 +08:00 · 59114227fd
commit 59114227fd
parent bf8855ba3c
10 changed files with 39 additions and 296 deletions
--- a/onnxruntime/core/providers/webnn/builders/impl/builder_utils.cc
+++ b/onnxruntime/core/providers/webnn/builders/impl/builder_utils.cc
@ -19,10 +19,9 @@ common::Status ComputeConvPads(const std::vector<int64_t> input_shape,
                               const std::vector<int64_t>& onnx_strides,
                               const std::vector<int64_t>& onnx_dilations,
                               AutoPadType auto_pad_type,
-                               std::vector<int64_t>& pads_out,
-                               bool use_nchw) {
-  const int64_t input_size_y = use_nchw ? input_shape[2] : input_shape[1];
-  const int64_t input_size_x = use_nchw ? input_shape[3] : input_shape[2];
+                               std::vector<int64_t>& pads_out) {
+  const int64_t input_size_y = input_shape[2];
+  const int64_t input_size_x = input_shape[3];
  const int64_t stride_y = onnx_strides[0];
  const int64_t stride_x = onnx_strides[1];
  const int64_t dilation_y = onnx_dilations[0];
@ -54,16 +53,15 @@ common::Status HandleAutoPad(const std::vector<int64_t> input_shape,
                             const std::vector<int64_t>& onnx_strides,
                             const std::vector<int64_t>& onnx_dilations,
                             AutoPadType auto_pad_type,
-                             std::vector<int64_t>& pads_out,
-                             bool use_nchw) {
+                             std::vector<int64_t>& pads_out) {
  if (AutoPadType::SAME_UPPER == auto_pad_type) {
    ORT_RETURN_IF_ERROR(ComputeConvPads(input_shape, weight_size_y, weight_size_x,
                                        onnx_pads, onnx_strides, onnx_dilations,
-                                        AutoPadType::SAME_UPPER, pads_out, use_nchw));
+                                        AutoPadType::SAME_UPPER, pads_out));
  } else {
    ORT_RETURN_IF_ERROR(ComputeConvPads(input_shape, weight_size_y, weight_size_x,
                                        onnx_pads, onnx_strides, onnx_dilations,
-                                        AutoPadType::SAME_LOWER, pads_out, use_nchw));
+                                        AutoPadType::SAME_LOWER, pads_out));
  }
  return Status::OK();
 }
@ -111,10 +109,9 @@ common::Status ComputeConvTransposePadsAndOutputShape(const std::vector<int64_t>
                                                      const std::vector<int64_t>& onnx_output_padding,
                                                      AutoPadType auto_pad_type,
                                                      std::vector<int64_t>& pads_out,
-                                                      std::vector<int64_t>& output_shape_out,
-                                                      bool use_nchw) {
-  const int64_t input_size_y = use_nchw ? input_shape[2] : input_shape[1];
-  const int64_t input_size_x = use_nchw ? input_shape[3] : input_shape[2];
+                                                      std::vector<int64_t>& output_shape_out) {
+  const int64_t input_size_y = input_shape[2];
+  const int64_t input_size_x = input_shape[3];
  const int64_t stride_y = onnx_strides[0];
  const int64_t stride_x = onnx_strides[1];
  const int64_t dilation_y = onnx_dilations[0];
--- a/onnxruntime/core/providers/webnn/builders/impl/builder_utils.h
+++ b/onnxruntime/core/providers/webnn/builders/impl/builder_utils.h
@ -21,8 +21,7 @@ common::Status HandleAutoPad(const std::vector<int64_t> input_shape,
                             const std::vector<int64_t>& onnx_strides,
                             const std::vector<int64_t>& onnx_dilations,
                             AutoPadType auto_pad_type,
-                             std::vector<int64_t>& pads_out,
-                             bool use_nchw) ORT_MUST_USE_RESULT;
+                             std::vector<int64_t>& pads_out) ORT_MUST_USE_RESULT;

 // Compute pads and output shape for ConvTranspose.
 common::Status ComputeConvTransposePadsAndOutputShape(const std::vector<int64_t> input_shape,
@ -34,8 +33,7 @@ common::Status ComputeConvTransposePadsAndOutputShape(const std::vector<int64_t>
                                                      const std::vector<int64_t>& onnx_output_padding,
                                                      AutoPadType auto_pad_type,
                                                      std::vector<int64_t>& pads_out,
-                                                      std::vector<int64_t>& output_shape_out,
-                                                      bool use_nchw) ORT_MUST_USE_RESULT;
+                                                      std::vector<int64_t>& output_shape_out) ORT_MUST_USE_RESULT;

 }  // namespace webnn
 }  // namespace onnxruntime
--- a/onnxruntime/core/providers/webnn/builders/impl/conv_op_builder.cc
+++ b/onnxruntime/core/providers/webnn/builders/impl/conv_op_builder.cc
@ -18,9 +18,6 @@ namespace webnn {

 class ConvOpBuilder : public BaseOpBuilder {
  // Add operator related.
- public:
-  void AddInitializersToSkip(ModelBuilder& model_builder, const Node& node) const override;
-
 private:
  Status AddToModelBuilderImpl(ModelBuilder& model_builder, const Node& node,
                               const logging::Logger& logger) const override ORT_MUST_USE_RESULT;
@ -33,13 +30,6 @@ class ConvOpBuilder : public BaseOpBuilder {
                              const logging::Logger& logger) const override;
 };

-void ConvOpBuilder::AddInitializersToSkip(ModelBuilder& model_builder, const Node& node) const {
-  // skip the weight for conv as we need to transpose for preferred layout NHWC.
-  if (model_builder.GetPreferredLayout() == DataLayout::NHWC) {
-    model_builder.AddInitializerToSkip(node.InputDefs()[1]->Name());  // W
-  }
-}
-
 // Helper functions
 common::Status SetConvBaseOptions(ModelBuilder& model_builder,
                                  const Node& node, emscripten::val& options,
@ -48,7 +38,6 @@ common::Status SetConvBaseOptions(ModelBuilder& model_builder,
                                  const std::vector<int64_t>& strides,
                                  const std::vector<int64_t>& dilations,
                                  std::vector<int64_t>& pads,
-                                  const bool is_nhwc,
                                  const bool is_conv1d,
                                  const logging::Logger& logger) {
  NodeAttrHelper helper(node);
@ -61,7 +50,7 @@ common::Status SetConvBaseOptions(ModelBuilder& model_builder,
    // Calculate explicit padding for autoPad.
    if (AutoPadType::SAME_UPPER == auto_pad_type || AutoPadType::SAME_LOWER == auto_pad_type) {
      ORT_RETURN_IF_ERROR(HandleAutoPad(input_shape, weight_shape[2], weight_shape[3],
-                                        pads, strides, dilations, auto_pad_type, pads_out, !is_nhwc));
+                                        pads, strides, dilations, auto_pad_type, pads_out));
      pads = pads_out;
    }
  } else if (node.OpType() == "ConvTranspose") {
@ -82,7 +71,7 @@ common::Status SetConvBaseOptions(ModelBuilder& model_builder,
    // Otherwise compute the output shape, as well as the pads if the auto_pad attribute is SAME_UPPER/SAME_LOWER.
    ORT_RETURN_IF_ERROR(ComputeConvTransposePadsAndOutputShape(input_shape, weight_shape[2], weight_shape[3],
                                                               pads, strides, dilations, output_padding,
-                                                               auto_pad_type, pads_out, output_shape, !is_nhwc));
+                                                               auto_pad_type, pads_out, output_shape));

    if (output_shape[0] != -1 && output_shape[1] != -1) {
      options.set("outputSizes", emscripten::val::array(GetVecUint32FromVecInt64(output_shape)));
@ -111,89 +100,6 @@ common::Status SetConvBaseOptions(ModelBuilder& model_builder,
  return Status::OK();
 }

-// Both depthwise Conv and ConvTranspose share the same logic to add the layout.
-Status AddInitializerInNewLayout(ModelBuilder& model_builder,
-                                 const std::string& name,
-                                 bool is_conv,
-                                 bool is_conv1d) {
-  const auto& tensor = *model_builder.GetInitializerTensors().at(name);
-  auto data_type = tensor.data_type();
-
-  const auto& shape = tensor.dims();
-  std::vector<uint32_t> dims = GetVecUint32FromVecInt64(std::vector<int64_t>(std::begin(shape), std::end(shape)));
-
-  if (is_conv1d) {
-    // Support conv1d by prepending a 1 size dimension.
-    dims.push_back(1);
-  }
-
-  const uint8_t* src = nullptr;
-  Initializer unpacked_tensor(tensor, model_builder.GetGraphViewer().ModelPath());
-  src = unpacked_tensor.DataAsByteSpan().data();
-  const auto out_t = dims[0], in_t = dims[1],
-             h_t = dims[2], w_t = dims[3];
-  std::vector<uint32_t> dest_shape;
-  if (is_conv == 1)
-    dest_shape = {out_t, h_t, w_t, in_t};  // L_0231
-  else
-    dest_shape = {in_t, h_t, w_t, out_t};  // L_1230 for depthwise conv and convTranspose weight
-
-  SafeInt<size_t> num_elements = SafeInt<size_t>(Product(dest_shape));
-
-  size_t element_size{0};
-  switch (data_type) {
-    case ONNX_NAMESPACE::TensorProto_DataType_UINT8:
-      element_size = sizeof(uint8_t);
-      break;
-    case ONNX_NAMESPACE::TensorProto_DataType_INT8:
-      element_size = sizeof(int8_t);
-      break;
-    case ONNX_NAMESPACE::TensorProto_DataType_FLOAT16:
-      element_size = sizeof(uint16_t);
-      break;
-    case ONNX_NAMESPACE::TensorProto_DataType_FLOAT:
-      element_size = sizeof(float);
-      break;
-    default:
-      break;
-  }
-  std::unique_ptr<uint8_t[]> buffer_holder(new uint8_t[element_size * num_elements]);
-  uint8_t* buffer = buffer_holder.get();
-
-  for (uint32_t out = 0; out < out_t; out++) {
-    for (uint32_t in = 0; in < in_t; in++) {
-      for (uint32_t h = 0; h < h_t; h++) {
-        for (uint32_t w = 0; w < w_t; w++) {
-          auto onnx_idx = out * in_t * h_t * w_t +
-                          in * h_t * w_t +
-                          h * w_t +
-                          w;
-
-          uint32_t nnapi_idx;
-          if (is_conv == 1) {  // L_0231
-            nnapi_idx = out * h_t * w_t * in_t +
-                        h * w_t * in_t +
-                        w * in_t +
-                        in;
-          } else {  // L_1230 for depthwise conv weight
-            nnapi_idx = in * h_t * w_t * out_t +
-                        h * w_t * out_t +
-                        w * out_t +
-                        out;
-          }
-
-          for (size_t i = 0; i < element_size; i++) {
-            buffer[element_size * nnapi_idx + i] = src[element_size * onnx_idx + i];
-          }
-        }
-      }
-    }
-  }
-  ORT_RETURN_IF_ERROR(model_builder.AddOperandFromPersistMemoryBuffer(name, buffer, num_elements * element_size,
-                                                                      dest_shape, data_type));
-  return Status::OK();
-}
-
 // Add operator related.

 Status ConvOpBuilder::AddToModelBuilderImpl(ModelBuilder& model_builder, const Node& node,
@ -203,7 +109,6 @@ Status ConvOpBuilder::AddToModelBuilderImpl(ModelBuilder& model_builder, const N
  const auto& op_type = node.OpType();
  emscripten::val input = model_builder.GetOperand(input_defs[0]->Name());
  emscripten::val output = emscripten::val::object();
-  const auto& initializers(model_builder.GetInitializerTensors());

  std::vector<int64_t> input_shape;
  ORT_RETURN_IF_NOT(GetShape(*input_defs[0], input_shape, logger), "Cannot get input shape");
@ -216,19 +121,11 @@ Status ConvOpBuilder::AddToModelBuilderImpl(ModelBuilder& model_builder, const N
  auto dilations = helper.Get("dilations", std::vector<int64_t>{1, 1});
  auto pads = helper.Get("pads", std::vector<int64_t>{0, 0, 0, 0});

-  const bool is_nhwc = model_builder.GetPreferredLayout() == DataLayout::NHWC;
  const bool is_conv1d = input_shape.size() == 3 && weight_shape.size() == 3;
-  const bool is_constant_weight = Contains(initializers, weight_name);
  // Support conv1d by prepending a 1 or 2 size dimensions.
  if (is_conv1d) {
    // Reshape input.
-    if (is_nhwc) {
-      // For NHWC preferred layout, the input has been transposed.
-      // For conv1d it is NCD1 -> ND1C, so we need to prepend 1 to the index 2.
-      input_shape.insert(input_shape.begin() + 2, 1);
-    } else {
-      input_shape.push_back(1);
-    }
+    input_shape.push_back(1);
    std::vector<uint32_t> new_shape = GetVecUint32FromVecInt64(input_shape);
    input = model_builder.GetBuilder().call<emscripten::val>("reshape", input, emscripten::val::array(new_shape));

@ -244,63 +141,19 @@ Status ConvOpBuilder::AddToModelBuilderImpl(ModelBuilder& model_builder, const N
  emscripten::val options = emscripten::val::object();
  options.set("label", node.Name());
  ORT_RETURN_IF_ERROR(SetConvBaseOptions(
-      model_builder, node, options, input_shape, weight_shape, strides, dilations, pads, is_nhwc, is_conv1d, logger));
-  bool depthwise = false;
-  if (op_type == "Conv" || op_type == "ConvInteger") {
-    int groups = options["groups"].as<int>();
-    if (is_nhwc) {
-      depthwise = (groups == input_shape[3] && groups != 1);
-      options.set("inputLayout", emscripten::val("nhwc"));
-      if (is_constant_weight) {
-        ORT_RETURN_IF_ERROR(AddInitializerInNewLayout(model_builder, weight_name, !depthwise, is_conv1d));
-      }
-      if (!depthwise) {
-        options.set("filterLayout", emscripten::val("ohwi"));
-      } else {
-        options.set("filterLayout", emscripten::val("ihwo"));
-      }
-    }
-  } else {  // ConvTranspose
-    if (is_nhwc) {
-      options.set("inputLayout", emscripten::val("nhwc"));
-      options.set("filterLayout", emscripten::val("ohwi"));
-      if (is_constant_weight) {
-        ORT_RETURN_IF_ERROR(AddInitializerInNewLayout(model_builder, weight_name, true, is_conv1d));
-      }
-    }
-  }
-
+      model_builder, node, options, input_shape, weight_shape, strides, dilations, pads, is_conv1d, logger));
  emscripten::val filter = model_builder.GetOperand(weight_name);

  if (is_conv1d) {
    // Reshape weight to 4D for conv1d.
-    if (!is_nhwc || !is_constant_weight) {
-      // The weight_shape has been appended 1's, reshape weight operand.
-      std::vector<uint32_t> new_shape = GetVecUint32FromVecInt64(weight_shape);
-      emscripten::val reshape_options = emscripten::val::object();
-      reshape_options.set("label", node.Name() + "_reshape_filter");
-      filter = model_builder.GetBuilder().call<emscripten::val>("reshape",
-                                                                filter,
-                                                                emscripten::val::array(new_shape),
-                                                                reshape_options);
-    }
-  }
-
-  emscripten::val transpose_options = emscripten::val::object();
-  if (is_nhwc && !is_constant_weight) {
-    // For NHWC preferred layout, if the weight is input:
-    // - Transpose it from iohw -> ohwi for convTranspose.
-    // - Transpose it from oihw -> ihwo for depthwise conv.
-    // - Transpose it from oihw -> ohwi for conv.
-    std::vector<uint32_t> perm(4);
-    if (op_type == "ConvTranspose" || depthwise) {
-      perm = {1, 2, 3, 0};  // L_1230 for depthwise conv and convTranspose weight
-    } else {
-      perm = {0, 2, 3, 1};  // L_0231
-    }
-    transpose_options.set("permutation", emscripten::val::array(perm));
-    transpose_options.set("label", node.Name() + "_transpose_filter");
-    filter = model_builder.GetBuilder().call<emscripten::val>("transpose", filter, transpose_options);
+    // The weight_shape has been appended 1's, reshape weight operand.
+    std::vector<uint32_t> new_shape = GetVecUint32FromVecInt64(weight_shape);
+    emscripten::val reshape_options = emscripten::val::object();
+    reshape_options.set("label", node.Name() + "_reshape_filter");
+    filter = model_builder.GetBuilder().call<emscripten::val>("reshape",
+                                                              filter,
+                                                              emscripten::val::array(new_shape),
+                                                              reshape_options);
  }

  if (op_type == "Conv") {
--- a/onnxruntime/core/providers/webnn/builders/impl/normalization_op_builder.cc
+++ b/onnxruntime/core/providers/webnn/builders/impl/normalization_op_builder.cc
@ -79,9 +79,6 @@ Status NormalizationOpBuilder::AddToModelBuilderImpl(ModelBuilder& model_builder
    ORT_RETURN_IF_NOT(input_defs.size() == 5, "BatchNormalization requires five inputs.");
    emscripten::val mean = model_builder.GetOperand(input_defs[3]->Name());
    emscripten::val variance = model_builder.GetOperand(input_defs[4]->Name());
-    if (model_builder.GetPreferredLayout() == DataLayout::NHWC) {
-      options.set("axis", rank - 1);
-    }

    output = model_builder.GetBuilder().call<emscripten::val>("batchNormalization", input, mean, variance, options);
  } else if (op_type == "LayerNormalization") {
@ -104,9 +101,8 @@ Status NormalizationOpBuilder::AddToModelBuilderImpl(ModelBuilder& model_builder
                     std::back_inserter(new_shape),
                     [](int64_t dim) -> uint32_t { return SafeInt<uint32_t>(dim); });

-      size_t insertion_offset = (model_builder.GetPreferredLayout() == DataLayout::NHWC) ? 2 : 3;
      ptrdiff_t excess_rank = new_shape.size() - webnn_shape_rank;
-      auto insertion_point = new_shape.begin() + insertion_offset;
+      auto insertion_point = new_shape.begin() + 3;
      if (input_shape.size() < webnn_shape_rank) {
        // Pad the shape with extra 1's to satisfy WebNN v1's rank requirements.
        new_shape.insert(insertion_point, -excess_rank, 1);
@ -125,9 +121,6 @@ Status NormalizationOpBuilder::AddToModelBuilderImpl(ModelBuilder& model_builder
                                                               reshape_input_options);
    }

-    if (model_builder.GetPreferredLayout() == DataLayout::NHWC) {
-      options.set("layout", emscripten::val("nhwc"));
-    }
    output = model_builder.GetBuilder().call<emscripten::val>("instanceNormalization", input, options);
    // Reshape back to the original output shape for 3D input.
    if (input_shape.size() != 4) {
--- a/onnxruntime/core/providers/webnn/builders/impl/pool_op_builder.cc
+++ b/onnxruntime/core/providers/webnn/builders/impl/pool_op_builder.cc
@ -70,11 +70,7 @@ Status PoolOpBuilder::AddToModelBuilderImpl(ModelBuilder& model_builder,
  options.set("strides", emscripten::val::array(strides));
  const auto dilations = helper.Get("dilations", std::vector<int32_t>{1, 1});
  options.set("dilations", emscripten::val::array(dilations));
-  if (model_builder.GetPreferredLayout() == DataLayout::NHWC) {
-    options.set("layout", emscripten::val("nhwc"));
-  } else {
-    options.set("layout", emscripten::val("nchw"));
-  }
+  options.set("layout", emscripten::val("nchw"));

  // Add Padding.
  // Usually using autopadding is more efficient than using explicit padding.
@ -93,8 +89,7 @@ Status PoolOpBuilder::AddToModelBuilderImpl(ModelBuilder& model_builder,
                                      helper.Get("strides", std::vector<int64_t>{1, 1}),
                                      helper.Get("dilations", std::vector<int64_t>{1, 1}),
                                      auto_pad_type,
-                                      pads_out,
-                                      model_builder.GetPreferredLayout() == DataLayout::NCHW));
+                                      pads_out));
    pads = GetVecUint32FromVecInt64(pads_out);
  }
  // Permute the ONNX's pads, which is [beginning_height, beginning_width, ending_height, ending_width],
--- a/onnxruntime/core/providers/webnn/builders/impl/resize_op_builder.cc
+++ b/onnxruntime/core/providers/webnn/builders/impl/resize_op_builder.cc
@ -120,18 +120,10 @@ Status ResizeOpBuilder::AddToModelBuilderImpl(ModelBuilder& model_builder,

  std::vector<float> scales;
  std::vector<int32_t> sizes;
-  std::vector<float> scales_hw;
-  std::vector<int32_t> sizes_hw;
-  std::vector<int32_t> axes;
  std::string scales_name = GetTensorName(input_defs, 2);
-  const bool is_nhwc = model_builder.GetPreferredLayout() == DataLayout::NHWC;
  if (!scales_name.empty()) {  // Use scales.
    ORT_RETURN_IF_NOT(GetResizeScales(initializers, node, scales, logger), "Error getting resize scales");
-    if (is_nhwc) {
-      scales_hw = {scales[1], scales[2]};
-    } else {
-      scales_hw = {scales[2], scales[3]};
-    }
+    std::vector<float> scales_hw = {scales[2], scales[3]};
    options.set("scales", emscripten::val::array(scales_hw));
  } else {  // Use sizes, we already checked inputs in IsOpSupportedImpl.
    std::vector<int64_t> output_sizes;
@ -140,19 +132,11 @@ Status ResizeOpBuilder::AddToModelBuilderImpl(ModelBuilder& model_builder,
    std::transform(output_sizes.cbegin(), output_sizes.cend(),
                   std::back_inserter(sizes),
                   [](int64_t dim) -> int32_t { return SafeInt<int32_t>(dim); });
-    if (is_nhwc) {
-      sizes_hw = {sizes[1], sizes[2]};
-    } else {
-      sizes_hw = {sizes[2], sizes[3]};
-    }
+    std::vector<int32_t> sizes_hw = {sizes[2], sizes[3]};
    options.set("sizes", emscripten::val::array(sizes_hw));
  }

-  if (is_nhwc) {
-    axes = {1, 2};
-  } else {
-    axes = {2, 3};
-  }
+  std::vector<int32_t> axes = {2, 3};
  options.set("axes", emscripten::val::array(axes));

  emscripten::val input = model_builder.GetOperand(input_defs[0]->Name());
@ -221,7 +205,6 @@ bool ResizeOpBuilder::IsOpSupportedImpl(const InitializedTensorSet& initializers
      return false;
    }

-    const bool is_nhwc = node.Domain() == kMSInternalNHWCDomain;
    // We want to check if the scales or sizes are not trying to resize on N/C channels here.
    if (has_scales) {  // We are using scales.
      std::vector<float> scales;
@ -229,7 +212,7 @@ bool ResizeOpBuilder::IsOpSupportedImpl(const InitializedTensorSet& initializers
        return false;

      float scale_n = scales[0];
-      float scale_c = is_nhwc ? scales[3] : scales[1];
+      float scale_c = scales[1];
      if (scale_n != 1.0f || scale_c != 1.0f) {
        LOGS(logger, VERBOSE) << "Scales of N/C channel should be 1"
                              << "Resize of N/C channels are not supported"
@ -239,8 +222,8 @@ bool ResizeOpBuilder::IsOpSupportedImpl(const InitializedTensorSet& initializers

      // For now we only support upscale, so the scale_h and scale_w should be an integer >= 1.
      // TODO support ResizeBilinear.
-      float scale_h = is_nhwc ? scales[1] : scales[2];
-      float scale_w = is_nhwc ? scales[2] : scales[3];
+      float scale_h = scales[2];
+      float scale_w = scales[3];

      // Onnx spec requires scale to be a positive float, so we are not checking that here.
      if (roundf(scale_h) != scale_h) {
@ -261,12 +244,11 @@ bool ResizeOpBuilder::IsOpSupportedImpl(const InitializedTensorSet& initializers
        return false;

      auto output_size_n = output_sizes[0];
-      const int c_idx = is_nhwc ? 3 : 1;
-      if (output_size_n != input_shape[0] || output_sizes[c_idx] != input_shape[c_idx]) {
+      if (output_size_n != input_shape[0] || output_sizes[1] != input_shape[1]) {
        LOGS(logger, VERBOSE) << "Output sizes of N/C chanel should match the input sizes, "
                              << "Resize of N/C channels are not supported"
                              << ", input_size_n, " << input_shape[0] << ", output_size_n, " << output_size_n
-                              << ". input_size_c, " << input_shape[c_idx] << ", output_size_c, " << output_sizes[c_idx];
+                              << ". input_size_c, " << input_shape[1] << ", output_size_c, " << output_sizes[1];
        return false;
      }
    }
--- a/onnxruntime/core/providers/webnn/builders/model_builder.cc
+++ b/onnxruntime/core/providers/webnn/builders/model_builder.cc
@ -20,12 +20,10 @@ namespace onnxruntime {
 namespace webnn {

 ModelBuilder::ModelBuilder(const GraphViewer& graph_viewer, const logging::Logger& logger,
-                           const emscripten::val& context, const DataLayout preferred_layout,
-                           const WebnnDeviceType wnn_device_type)
+                           const emscripten::val& context, const WebnnDeviceType wnn_device_type)
    : graph_viewer_(graph_viewer),
      logger_(logger),
      wnn_context_(context),
-      preferred_layout_(preferred_layout),
      wnn_device_type_(wnn_device_type) {
  // Create WebNN MLGraphBuilder for each ModelBuilder, because MLGraphBuilder.build()
  // is only allowed to be called once.
@ -254,64 +252,6 @@ Status ModelBuilder::AddOperations() {
  return Status::OK();
 }

-Status ModelBuilder::AddOperandFromPersistMemoryBuffer(
-    const std::string& name, const void* buffer, const size_t size,
-    const std::vector<uint32_t> shape, const int32_t data_type) {
-  auto persist_buffer = std::make_unique<uint8_t[]>(size);
-  uint8_t* dest = persist_buffer.get();
-  memcpy(dest, buffer, size);
-  emscripten::val view = emscripten::val::undefined();
-  emscripten::val desc = emscripten::val::object();
-  ORT_RETURN_IF_NOT(SetWebnnDataType(desc, data_type), "Unsupported data type");
-  switch (data_type) {
-    case ONNX_NAMESPACE::TensorProto_DataType_BOOL:
-    case ONNX_NAMESPACE::TensorProto_DataType_UINT8:
-      view = emscripten::val{emscripten::typed_memory_view(size / sizeof(uint8_t),
-                                                           reinterpret_cast<const uint8_t*>(dest))};
-      break;
-    case ONNX_NAMESPACE::TensorProto_DataType_INT8:
-      view = emscripten::val{emscripten::typed_memory_view(size / sizeof(int8_t),
-                                                           reinterpret_cast<const int8_t*>(dest))};
-      break;
-    case ONNX_NAMESPACE::TensorProto_DataType_FLOAT16:
-      view = emscripten::val{emscripten::typed_memory_view(size / sizeof(uint16_t),
-                                                           reinterpret_cast<const uint16_t*>(dest))};
-      break;
-    case ONNX_NAMESPACE::TensorProto_DataType_FLOAT:
-      view = emscripten::val{emscripten::typed_memory_view(size / sizeof(float),
-                                                           reinterpret_cast<const float*>(dest))};
-      break;
-    case ONNX_NAMESPACE::TensorProto_DataType_INT32:
-      view = emscripten::val{emscripten::typed_memory_view(size / sizeof(int32_t),
-                                                           reinterpret_cast<const int32_t*>(dest))};
-      break;
-    case ONNX_NAMESPACE::TensorProto_DataType_INT64:
-      view = emscripten::val{emscripten::typed_memory_view(size / sizeof(int64_t),
-                                                           reinterpret_cast<const int64_t*>(dest))};
-      break;
-    case ONNX_NAMESPACE::TensorProto_DataType_UINT32:
-      view = emscripten::val{emscripten::typed_memory_view(size / sizeof(uint32_t),
-                                                           reinterpret_cast<const uint32_t*>(dest))};
-      break;
-    case ONNX_NAMESPACE::TensorProto_DataType_UINT64:
-      view = emscripten::val{emscripten::typed_memory_view(size / sizeof(uint64_t),
-                                                           reinterpret_cast<const uint64_t*>(dest))};
-      break;
-    default:
-      break;
-  }
-
-  desc.set("dimensions", emscripten::val::array(shape));
-  emscripten::val operand = emscripten::val::object();
-  // Wasm memory grow will cause all array buffers reallocation, which will be treated as detached
-  // buffers in JS side. Simply create a copy to fix it.
-  operand = wnn_builder_.call<emscripten::val>("constant", desc, view.call<emscripten::val>("slice"));
-
-  AddOperand(name, operand);
-  mem_persist_buffers_.push_back(std::move(persist_buffer));
-  return Status::OK();
-}
-
 Status ModelBuilder::RegisterModelOutputs() {
  for (const auto* node_arg : graph_viewer_.GetOutputs()) {
    ORT_RETURN_IF_ERROR(RegisterModelInputOutput(*node_arg, false /* is_input */));
--- a/onnxruntime/core/providers/webnn/builders/model_builder.h
+++ b/onnxruntime/core/providers/webnn/builders/model_builder.h
@ -22,8 +22,7 @@ class IOpBuilder;
 class ModelBuilder {
 public:
  ModelBuilder(const GraphViewer& graph_viewer, const logging::Logger& logger,
-               const emscripten::val& context, const DataLayout preferred_layout,
-               const WebnnDeviceType wnn_device_type);
+               const emscripten::val& context, const WebnnDeviceType wnn_device_type);
  ~ModelBuilder() = default;

  Status Compile(std::unique_ptr<Model>& model) ORT_MUST_USE_RESULT;
@ -37,15 +36,6 @@ class ModelBuilder {
  const emscripten::val& GetOperand(const std::string& name) const { return wnn_operands_.at(name); }
  void AddOperand(const std::string& name, const emscripten::val& operand);
  const emscripten::val& GetZeroConstant(const std::string& data_type);
-  // Use the buffers to persist WebNN allocated data like transposed weight.
-  // It ensures the validity during inference session.
-  std::vector<std::unique_ptr<uint8_t[]>> mem_persist_buffers_;
-  // Add a constant operand (allocate persist buffer and move the ownership to mem_persist_buffers_).
-  Status AddOperandFromPersistMemoryBuffer(
-      const std::string& name, const void* buffer,
-      const size_t size, const std::vector<uint32_t> shape, const int32_t data_type);
-
-  DataLayout GetPreferredLayout() const { return preferred_layout_; }

  WebnnDeviceType GetWebnnDeviceType() const { return wnn_device_type_; }

@ -64,7 +54,6 @@ class ModelBuilder {

  emscripten::val wnn_context_ = emscripten::val::undefined();
  emscripten::val wnn_builder_ = emscripten::val::undefined();
-  DataLayout preferred_layout_;
  WebnnDeviceType wnn_device_type_;
  InlinedHashMap<std::string, emscripten::val> wnn_operands_;
  std::vector<std::string> input_names_;
--- a/onnxruntime/core/providers/webnn/webnn_execution_provider.cc
+++ b/onnxruntime/core/providers/webnn/webnn_execution_provider.cc
@ -19,12 +19,9 @@ namespace onnxruntime {

 WebNNExecutionProvider::WebNNExecutionProvider(const std::string& webnn_device_flags)
    : IExecutionProvider{onnxruntime::kWebNNExecutionProvider} {
-  // WebNN EP uses NHWC layout for CPU XNNPACK backend and NCHW for GPU DML backend.
  if (webnn_device_flags.compare("cpu") == 0) {
-    preferred_layout_ = DataLayout::NHWC;
    wnn_device_type_ = webnn::WebnnDeviceType::CPU;
  } else {
-    preferred_layout_ = DataLayout::NCHW;
    if (webnn_device_flags.compare("gpu") == 0) {
      wnn_device_type_ = webnn::WebnnDeviceType::GPU;
    } else if (webnn_device_flags.compare("npu") == 0) {
@ -212,8 +209,7 @@ common::Status WebNNExecutionProvider::Compile(const std::vector<FusedNodeAndGra
    Node& fused_node = fused_node_and_graph.fused_node;
    const onnxruntime::GraphViewer& graph_viewer(fused_node_and_graph.filtered_graph);

-    webnn::ModelBuilder builder(graph_viewer, *GetLogger(), wnn_context_,
-                                preferred_layout_, wnn_device_type_);
+    webnn::ModelBuilder builder(graph_viewer, *GetLogger(), wnn_context_, wnn_device_type_);
    std::unique_ptr<webnn::Model> model;
    ORT_RETURN_IF_ERROR(builder.Compile(model));

--- a/onnxruntime/core/providers/webnn/webnn_execution_provider.h
+++ b/onnxruntime/core/providers/webnn/webnn_execution_provider.h
@ -26,7 +26,8 @@ class WebNNExecutionProvider : public IExecutionProvider {
  GetCapability(const onnxruntime::GraphViewer& graph_viewer,
                const IKernelLookup& /*kernel_registries*/) const override;

-  DataLayout GetPreferredLayout() const override { return preferred_layout_; }
+  // WebNN EP uses default NCHW layout for all backends.
+  DataLayout GetPreferredLayout() const override { return DataLayout::NCHW; }

  // We implement the Compile that takes FusedNodeAndGraph instances.
  FusionStyle GetFusionStyle() const override { return FusionStyle::FilteredGraphViewer; }
@ -44,7 +45,6 @@ class WebNNExecutionProvider : public IExecutionProvider {
 private:
  emscripten::val wnn_context_ = emscripten::val::undefined();

-  DataLayout preferred_layout_;
  webnn::WebnnDeviceType wnn_device_type_;
  InlinedHashMap<std::string, std::unique_ptr<onnxruntime::webnn::Model>> models_;
  ModelMetadefIdGenerator metadef_id_generator_;