Shape inference: ReduceMean dispatcher, quant_pre_process: skip_symbolic_shape bugfix (#23558)

### Description - Add symbolic shape inference dispatcher for `ReduceMean`. - Reducemean is used in RMSNorm so shape inference fails for llama, phi, etc torch exported models. - Reuse the dispatcher for ReduceSum since ReduceMean 18+ and ReduceSum 13+ have the same specs other than the type of reduction done. - Fix an issue with `quant_pre_process` tool where the external data file is missing if `skip_symbolic_shape=True` and `skip_optimization=False`. - Add `"session.optimized_model_external_initializers_file_name"` to session options so that the external data gets saved in the same temp directory as the optimized model. ### Motivation and Context
2026-07-07 17:15:29 +00:00 · 2025-01-31 19:37:07 -08:00 · 2025-01-31 19:37:07 -08:00 · e8b0bdb127
commit e8b0bdb127
parent 267b49353b
2 changed files with 12 additions and 0 deletions
--- a/onnxruntime/python/tools/quantization/shape_inference.py
+++ b/onnxruntime/python/tools/quantization/shape_inference.py
@ -119,6 +119,12 @@ def quant_pre_process(
                    external_names, external_values = extract_raw_data_from_model(input_model)
                    sess_option.add_external_initializers(list(external_names), list(external_values))
                    input_model = input_model.SerializeToString()
+                # the saved optimized model otherwise points to the original external data file name
+                # which is not available relative to the optimized model file
+                elif skip_symbolic_shape and save_as_external_data:
+                    sess_option.add_session_config_entry(
+                        "session.optimized_model_external_initializers_file_name", "optimized.onnx.data"
+                    )

                sess = onnxruntime.InferenceSession(input_model, sess_option, providers=["CPUExecutionProvider"])
                # Close the session to avoid the cleanup error on Windows for temp folders
--- a/onnxruntime/python/tools/symbolic_shape_infer.py
+++ b/onnxruntime/python/tools/symbolic_shape_infer.py
@ -166,6 +166,7 @@ class SymbolicShapeInference:
            "Range": self._infer_Range,
            "Reciprocal": self._pass_on_shape_and_type,
            "ReduceSum": self._infer_ReduceSum,
+            "ReduceMean": self._infer_ReduceMean,
            "ReduceProd": self._infer_ReduceProd,
            "Reshape": self._infer_Reshape,
            "Resize": self._infer_Resize,
@ -1603,6 +1604,11 @@ class SymbolicShapeInference:
                    )
                )

+    def _infer_ReduceMean(self, node):  # noqa: N802
+        if get_opset(self.out_mp_) >= 18:
+            # reduce mean spec 18+ is same as reduce sum spec 13+
+            self._infer_ReduceSum(node)
+
    def _infer_ReduceProd(self, node):  # noqa: N802
        axes = get_attribute(node, "axes")
        keep_dims = get_attribute(node, "keepdims", 1)