mirror of
https://github.com/saymrwulf/onnxruntime.git
synced 2026-06-04 23:59:56 +00:00
Update default cast propagation strategy from None to FloodFill (#9713)
* Changed the default cast propagation strategy from None to FloodFill.
This commit is contained in:
parent
9acbfeba09
commit
421e4c03ce
8 changed files with 26 additions and 21 deletions
|
|
@ -11,16 +11,16 @@ struct GraphTransformerConfiguration {
|
|||
struct PropagateCastOpsConfiguration {
|
||||
// Propagate FP16 Cast operations up and FP32 operations down
|
||||
/*
|
||||
* Cast propagation strategy.
|
||||
* One strategy is to insert casts around all the nodes with the allowed opcodes
|
||||
* and reduce, by removing redundent-casts and back-to-back-casts etc., and
|
||||
* the other is to propagate casts using flood-fill approach, expanding float16 regions in the graph
|
||||
* traversing the graph up/down.
|
||||
*/
|
||||
* Cast propagation strategy.
|
||||
* One strategy is to insert casts around all the nodes with the allowed opcodes
|
||||
* and reduce, by removing redundent-casts and back-to-back-casts etc., and
|
||||
* the other is to propagate casts using flood-fill approach, expanding float16 regions in the graph
|
||||
* traversing the graph up/down.
|
||||
*/
|
||||
enum class Strategy {
|
||||
None = 0,
|
||||
InsertAndReduce = 1,
|
||||
FloodFill = 2, /* Propagate FP16 Cast operations up and FP32 operations down */
|
||||
FloodFill = 2, /* Propagate FP16 Cast operations up and FP32 operations down */
|
||||
};
|
||||
using Strategy_t = std::underlying_type<Strategy>::type;
|
||||
friend constexpr Strategy operator|(const Strategy s1, const Strategy s2) {
|
||||
|
|
@ -54,7 +54,7 @@ struct GraphTransformerConfiguration {
|
|||
1 => use ORT predefined list of level 1 opcodes in addition to the user specified allow opcodes
|
||||
2 => use ORT predefined list of level 2 opcodes in addition to the user specified allow opcodes
|
||||
*/
|
||||
Strategy strategy = Strategy::None;
|
||||
Strategy strategy = Strategy::FloodFill;
|
||||
// List of allowed opcodes to consider as safe to execute in float16, while moving cast operations
|
||||
std::vector<std::string> allow;
|
||||
};
|
||||
|
|
|
|||
|
|
@ -15,9 +15,14 @@ using namespace onnxruntime::common;
|
|||
* and 2. Level 2 being the most agressive, may consider moving float operations to float16 which may result in different numerical results
|
||||
* due to loss of precision. The user may choose level 0, whereby the user chooses the opcodes which are "FP16 Safe" instead of a list
|
||||
* predetermined opcodes as in levels 1 and 2.
|
||||
* Currently two strategies are available, InsertAndReduce and FloodFill.
|
||||
* Currently three strategies are available, None, InsertAndReduce and FloodFill.
|
||||
* None:
|
||||
* Although no new cast operations are inserted or propagated using this strategy some optimizations are performed
|
||||
* 1. Remove back-to-back casts
|
||||
* 2. Fuse subgraphs
|
||||
* 3. Remove unnecessary casts
|
||||
* InsertAndReduce :
|
||||
* This transformation converts all FP16 operations to float16. The transformation first
|
||||
* This transformation converts all FP16 operations to float16. The transformation first
|
||||
* 1. Inserts float16 cast operation on all the float inputs
|
||||
* 2. Changes all float outputs to float16
|
||||
* 3. Inserts float cast operations on all float outputs as expected
|
||||
|
|
|
|||
|
|
@ -16,7 +16,7 @@ Propagate FP16 Cast operations up the graph and FP32 Cast operations down the gr
|
|||
class PropagateCastOps : public GraphTransformer {
|
||||
public:
|
||||
PropagateCastOps(GraphTransformerConfiguration::PropagateCastOpsConfiguration::Strategy strategy =
|
||||
GraphTransformerConfiguration::PropagateCastOpsConfiguration::Strategy::InsertAndReduce,
|
||||
GraphTransformerConfiguration::PropagateCastOpsConfiguration::Strategy::FloodFill,
|
||||
size_t level = 0, const std::vector<std::string>& allow_list = {},
|
||||
const std::unordered_set<std::string>& compatible_execution_providers = {}) noexcept;
|
||||
|
||||
|
|
|
|||
|
|
@ -113,7 +113,7 @@ struct TrainingParameters {
|
|||
int propagate_cast_ops_level = 1;
|
||||
std::vector<std::string> propagate_cast_ops_allow;
|
||||
GraphTransformerConfiguration::PropagateCastOpsConfiguration::Strategy propagate_cast_ops_strategy =
|
||||
GraphTransformerConfiguration::PropagateCastOpsConfiguration::Strategy::None;
|
||||
GraphTransformerConfiguration::PropagateCastOpsConfiguration::Strategy::FloodFill;
|
||||
bool allow_layer_norm_mod_precision = false;
|
||||
|
||||
// graph dumping
|
||||
|
|
|
|||
|
|
@ -112,8 +112,8 @@ class GraphExecutionManager(GraphExecutionInterface):
|
|||
|
||||
# Graph transformer config
|
||||
# Specify cast propagation strategy. Currently three strategies are available, NONE, INSERT-AND-REDUCE and FLOOD-FILL
|
||||
# The default is NONE, which implies the transformer does no cast-propagation transformation.
|
||||
self._propagate_cast_ops_strategy = C.PropagateCastOpsStrategy.NONE
|
||||
# The default is FLOOD_FILL, expand FP16 computation regions in the graph using allowed opcodes for the given level.
|
||||
self._propagate_cast_ops_strategy = C.PropagateCastOpsStrategy.FLOOD_FILL
|
||||
# Optimize by moving Cast operations if propagate_cast_ops_level is non-negative.
|
||||
# - If the _propagate_cast_ops_level is set to zero, then the transformation considers only the opcodes specified by _propagate_cast_ops_allow
|
||||
# as "FP16 safe", in order to insert/(re)move cast operations before/after to perform such operations in reduced (16-bit) precision.
|
||||
|
|
|
|||
|
|
@ -213,7 +213,7 @@ def load_from_json(ortmodule, path=None):
|
|||
{
|
||||
"PropagateCastOps":
|
||||
{
|
||||
"Strategy": "FLOOD_FILL", # str representing strategy (like "NONE", "FLOOD_FILL"...)
|
||||
"Strategy": "FLOOD_FILL", # str representing strategy ("NONE", "FLOOD_FILL", or "INSERT_AND_REDUCE")
|
||||
"Level": 3, # propagate cast ops level as an int
|
||||
"Allow": ["ABC", "DEF"] # propagate cast ops allow as list of strs
|
||||
},
|
||||
|
|
|
|||
|
|
@ -197,7 +197,7 @@ class ORTTrainerOptions(object):
|
|||
'schema': {
|
||||
'propagate_cast_ops_strategy': {
|
||||
'type': 'onnxruntime.training.PropagateCastOpsStrategy',
|
||||
'default': PropagateCastOpsStrategy.NONE
|
||||
'default': PropagateCastOpsStrategy.FLOOD_FILL
|
||||
},
|
||||
'propagate_cast_ops_level': {
|
||||
'type': 'integer',
|
||||
|
|
@ -374,7 +374,7 @@ class ORTTrainerOptions(object):
|
|||
graph_transformer.transformer_layer_recompute(bool, default False)
|
||||
graph_transformer.number_recompute_layers(bool, default False)
|
||||
graph_transformer.propagate_cast_ops_config (dict):
|
||||
graph_transformer.propagate_cast_ops_config.strategy(PropagateCastOpsStrategy, default NONE)
|
||||
graph_transformer.propagate_cast_ops_config.strategy(PropagateCastOpsStrategy, default FLOOD_FILL)
|
||||
Specify the choice of the cast propagation optimization strategy, either, NONE, INSERT_AND_REDUCE or FLOOD_FILL.
|
||||
NONE strategy does not perform any cast propagation transformation on the graph, although other optimizations
|
||||
locally change cast operations, for example, in order to fuse Transpose and MatMul nodes, the TransposeMatMulFunsion optimization could
|
||||
|
|
@ -726,12 +726,12 @@ _ORTTRAINER_OPTIONS_SCHEMA = {
|
|||
'strategy': {
|
||||
'type': 'propagate_cast_ops_strategy',
|
||||
'nullable': True,
|
||||
'default': PropagateCastOpsStrategy.NONE
|
||||
'default': PropagateCastOpsStrategy.FLOOD_FILL
|
||||
},
|
||||
'level': {
|
||||
'type': 'integer',
|
||||
'min': -1,
|
||||
'default': -1
|
||||
'default': 1
|
||||
},
|
||||
'allow': {
|
||||
'type': 'list',
|
||||
|
|
|
|||
|
|
@ -80,8 +80,8 @@ def testORTTrainerOptionsDefaultValues(test_input):
|
|||
'number_recompute_layers': 0,
|
||||
'allow_layer_norm_mod_precision': False,
|
||||
'propagate_cast_ops_config': {
|
||||
'strategy': PropagateCastOpsStrategy.NONE,
|
||||
'level': -1,
|
||||
'strategy': PropagateCastOpsStrategy.FLOOD_FILL,
|
||||
'level': 1,
|
||||
'allow': []
|
||||
}
|
||||
},
|
||||
|
|
|
|||
Loading…
Reference in a new issue