Update default cast propagation strategy from None to FloodFill (#9713)

* Changed the default cast propagation strategy from None to FloodFill.
2026-07-19 19:00:47 +00:00 · 2021-11-16 13:15:57 -08:00 · 2021-11-16 13:15:57 -08:00 · 421e4c03ce
commit 421e4c03ce
parent 9acbfeba09
8 changed files with 26 additions and 21 deletions
--- a/include/onnxruntime/core/optimizer/graph_transformer_config.h
+++ b/include/onnxruntime/core/optimizer/graph_transformer_config.h
@ -11,16 +11,16 @@ struct GraphTransformerConfiguration {
  struct PropagateCastOpsConfiguration {
    // Propagate FP16 Cast operations up and FP32 operations down
    /*
-    * Cast propagation strategy.
-    * One strategy is to insert casts around all the nodes with the allowed opcodes
-    * and reduce, by removing redundent-casts and back-to-back-casts etc., and
-    * the other is to propagate casts using flood-fill approach, expanding float16 regions in the graph
-    * traversing the graph up/down.
-    */
+     * Cast propagation strategy.
+     * One strategy is to insert casts around all the nodes with the allowed opcodes
+     * and reduce, by removing redundent-casts and back-to-back-casts etc., and
+     * the other is to propagate casts using flood-fill approach, expanding float16 regions in the graph
+     * traversing the graph up/down.
+     */
    enum class Strategy {
      None = 0,
      InsertAndReduce = 1,
-      FloodFill = 2,                   /* Propagate FP16 Cast operations up and FP32 operations down */
+      FloodFill = 2, /* Propagate FP16 Cast operations up and FP32 operations down */
    };
    using Strategy_t = std::underlying_type<Strategy>::type;
    friend constexpr Strategy operator|(const Strategy s1, const Strategy s2) {
@ -54,7 +54,7 @@ struct GraphTransformerConfiguration {
                       1 => use ORT predefined list of level 1 opcodes in addition to the user specified allow opcodes
                       2 => use ORT predefined list of level 2 opcodes in addition to the user specified allow opcodes
                    */
-    Strategy strategy = Strategy::None;
+    Strategy strategy = Strategy::FloodFill;
    // List of allowed opcodes to consider as safe to execute in float16, while moving cast operations
    std::vector<std::string> allow;
  };
--- a/onnxruntime/core/optimizer/propagate_cast_ops.cc
+++ b/onnxruntime/core/optimizer/propagate_cast_ops.cc
@ -15,9 +15,14 @@ using namespace onnxruntime::common;
 * and 2. Level 2 being the most agressive, may consider moving float operations to float16 which may result in different numerical results 
 * due to loss of precision. The user may choose level 0, whereby the user chooses the opcodes which are "FP16 Safe" instead of a list
 * predetermined opcodes as in levels 1 and 2.
-* Currently two strategies are available, InsertAndReduce and FloodFill.
+* Currently three strategies are available, None, InsertAndReduce and FloodFill.
+* None:
+*   Although no new cast operations are inserted or propagated using this strategy some optimizations are performed
+*   1. Remove back-to-back casts
+*   2. Fuse subgraphs
+*   3. Remove unnecessary casts
 * InsertAndReduce :
-* This transformation converts all FP16 operations to float16. The transformation first 
+*   This transformation converts all FP16 operations to float16. The transformation first
 *   1. Inserts float16 cast operation on all the float inputs
 *   2. Changes all float outputs to float16
 *   3. Inserts float cast operations on all float outputs as expected
--- a/onnxruntime/core/optimizer/propagate_cast_ops.h
+++ b/onnxruntime/core/optimizer/propagate_cast_ops.h
@ -16,7 +16,7 @@ Propagate FP16 Cast operations up the graph and FP32 Cast operations down the gr
 class PropagateCastOps : public GraphTransformer {
 public:
  PropagateCastOps(GraphTransformerConfiguration::PropagateCastOpsConfiguration::Strategy strategy =
-                       GraphTransformerConfiguration::PropagateCastOpsConfiguration::Strategy::InsertAndReduce,
+                       GraphTransformerConfiguration::PropagateCastOpsConfiguration::Strategy::FloodFill,
                   size_t level = 0, const std::vector<std::string>& allow_list = {},
                   const std::unordered_set<std::string>& compatible_execution_providers = {}) noexcept;

--- a/orttraining/orttraining/python/orttraining_pybind_state.cc
+++ b/orttraining/orttraining/python/orttraining_pybind_state.cc
@ -113,7 +113,7 @@ struct TrainingParameters {
  int propagate_cast_ops_level = 1;
  std::vector<std::string> propagate_cast_ops_allow;
  GraphTransformerConfiguration::PropagateCastOpsConfiguration::Strategy propagate_cast_ops_strategy =
-      GraphTransformerConfiguration::PropagateCastOpsConfiguration::Strategy::None;
+      GraphTransformerConfiguration::PropagateCastOpsConfiguration::Strategy::FloodFill;
  bool allow_layer_norm_mod_precision = false;

  // graph dumping
--- a/orttraining/orttraining/python/training/ortmodule/_graph_execution_manager.py
+++ b/orttraining/orttraining/python/training/ortmodule/_graph_execution_manager.py
@ -112,8 +112,8 @@ class GraphExecutionManager(GraphExecutionInterface):

        # Graph transformer config
        # Specify cast propagation strategy. Currently three strategies are available, NONE, INSERT-AND-REDUCE and FLOOD-FILL
-        # The default is NONE, which implies the transformer does no cast-propagation transformation.
-        self._propagate_cast_ops_strategy = C.PropagateCastOpsStrategy.NONE
+        # The default is FLOOD_FILL, expand FP16 computation regions in the graph using allowed opcodes for the given level.
+        self._propagate_cast_ops_strategy = C.PropagateCastOpsStrategy.FLOOD_FILL
        # Optimize by moving Cast operations if propagate_cast_ops_level is non-negative.
        # - If the _propagate_cast_ops_level is set to zero, then the transformation considers only the opcodes specified by _propagate_cast_ops_allow
        #   as "FP16 safe", in order to insert/(re)move cast operations before/after to perform such operations in reduced (16-bit) precision.
--- a/orttraining/orttraining/python/training/ortmodule/experimental/json_config/_load_config_from_json.py
+++ b/orttraining/orttraining/python/training/ortmodule/experimental/json_config/_load_config_from_json.py
@ -213,7 +213,7 @@ def load_from_json(ortmodule, path=None):
    {
        "PropagateCastOps":
        {
-            "Strategy": "FLOOD_FILL", # str representing strategy (like "NONE", "FLOOD_FILL"...)
+            "Strategy": "FLOOD_FILL", # str representing strategy ("NONE", "FLOOD_FILL", or "INSERT_AND_REDUCE")
            "Level": 3, # propagate cast ops level as an int
            "Allow": ["ABC", "DEF"] # propagate cast ops allow as list of strs
        },
--- a/orttraining/orttraining/python/training/orttrainer_options.py
+++ b/orttraining/orttraining/python/training/orttrainer_options.py
@ -197,7 +197,7 @@ class ORTTrainerOptions(object):
                            'schema': {
                                'propagate_cast_ops_strategy': {
                                    'type': 'onnxruntime.training.PropagateCastOpsStrategy',
-                                    'default': PropagateCastOpsStrategy.NONE
+                                    'default': PropagateCastOpsStrategy.FLOOD_FILL
                                },
                                'propagate_cast_ops_level': {
                                    'type': 'integer',
@ -374,7 +374,7 @@ class ORTTrainerOptions(object):
        graph_transformer.transformer_layer_recompute(bool, default False)
        graph_transformer.number_recompute_layers(bool, default False)
        graph_transformer.propagate_cast_ops_config (dict):
-            graph_transformer.propagate_cast_ops_config.strategy(PropagateCastOpsStrategy, default NONE)
+            graph_transformer.propagate_cast_ops_config.strategy(PropagateCastOpsStrategy, default FLOOD_FILL)
                Specify the choice of the cast propagation optimization strategy, either, NONE, INSERT_AND_REDUCE or FLOOD_FILL.
                NONE strategy does not perform any cast propagation transformation on the graph, although other optimizations
                locally change cast operations, for example, in order to fuse Transpose and MatMul nodes, the TransposeMatMulFunsion optimization could
@ -726,12 +726,12 @@ _ORTTRAINER_OPTIONS_SCHEMA = {
                    'strategy': {
                        'type': 'propagate_cast_ops_strategy',
                        'nullable': True,
-                        'default': PropagateCastOpsStrategy.NONE
+                        'default': PropagateCastOpsStrategy.FLOOD_FILL
                    },
                    'level': {
                        'type': 'integer',
                        'min': -1,
-                        'default': -1
+                        'default': 1
                    },
                    'allow': {
                        'type': 'list',
--- a/orttraining/orttraining/test/python/orttraining_test_orttrainer_frontend.py
+++ b/orttraining/orttraining/test/python/orttraining_test_orttrainer_frontend.py
@ -80,8 +80,8 @@ def testORTTrainerOptionsDefaultValues(test_input):
            'number_recompute_layers': 0,
            'allow_layer_norm_mod_precision': False,
            'propagate_cast_ops_config': {
-                'strategy': PropagateCastOpsStrategy.NONE,
-                'level': -1,
+                'strategy': PropagateCastOpsStrategy.FLOOD_FILL,
+                'level': 1,
                'allow': []
            }
        },