diff --git a/onnxruntime/core/optimizer/conv_activation_fusion.cc b/onnxruntime/core/optimizer/conv_activation_fusion.cc index 90d99fdeab..5230871223 100644 --- a/onnxruntime/core/optimizer/conv_activation_fusion.cc +++ b/onnxruntime/core/optimizer/conv_activation_fusion.cc @@ -4,6 +4,7 @@ #include #include "core/graph/graph_utils.h" #include "core/optimizer/conv_activation_fusion.h" +#include "core/optimizer/initializer.h" using namespace ONNX_NAMESPACE; using namespace ::onnxruntime::common; @@ -27,6 +28,65 @@ void HandleActivationNodeEdges(Graph& g, const Node& act, Node& fused_conv) { } } +// get min/max values from Clip if they are constant. Returns false if mutable and cannot be used +static bool GetClipConstantMinMax(const Graph& graph, const Node& node, float& min, float& max) { + min = std::numeric_limits::lowest(); + max = std::numeric_limits::max(); + + // Clip opset 6 has min and max as attributes. they're inputs from opset 11 on. + bool min_max_are_attributes = graph_utils::IsSupportedOptypeVersionAndDomain(node, "Clip", {6}); + bool min_max_are_constant_values = true; + + if (min_max_are_attributes) { + min = graph_utils::GetNodeAttribute(node, "min")->f(); + max = graph_utils::GetNodeAttribute(node, "max")->f(); + } else { + // update min/max if provided via a constant initializer + // return true if value is default or coming from a constant initializer and update 'value' + // return false if value is mutable + auto update_if_constant_value = [&graph](const Node& node, size_t input_idx, float& value) { + const auto& input_defs = node.InputDefs(); + const NodeArg* input = (input_defs.size() > input_idx) ? input_defs[input_idx] : nullptr; + + if (input == nullptr || !input->Exists()) { + // optional input not specified so using default value + return true; + } + + bool is_constant = true; + const ONNX_NAMESPACE::TensorProto* initializer = graph_utils::GetConstantInitializer(graph, input->Name()); + if (initializer) { + Initializer i(*initializer); + switch (initializer->data_type()) { + case ONNX_NAMESPACE::TensorProto_DataType_FLOAT: + value = *i.data(); + break; + // double isn't currently supported + //case ONNX_NAMESPACE::TensorProto_DataType_DOUBLE: + // value = static_cast(*i.data()); + // break; + case ONNX_NAMESPACE::TensorProto_DataType_FLOAT16: + value = math::halfToFloat(i.data()->val); + break; + default: + ORT_THROW("Unexpected data type for Clip input of ", initializer->data_type()); + } + } else { + is_constant = false; + } + + return is_constant; + }; + + // 'min' is input 1, 'max' is input 2. both are optional. + // if the input is constant, 'min' or 'max' is updated by the call to get_if_constant_value + min_max_are_constant_values = update_if_constant_value(node, 1, min) && + update_if_constant_value(node, 2, max); + } + + return min_max_are_constant_values; +} + } // namespace Status ConvActivationFusion::ApplyImpl(Graph& graph, bool& modified, int graph_level) const { @@ -57,9 +117,14 @@ Status ConvActivationFusion::ApplyImpl(Graph& graph, bool& modified, int graph_l !graph_utils::IsSupportedOptypeVersionAndDomain(next_node, "Tanh", {6})) { if (graph_utils::IsSupportedOptypeVersionAndDomain(next_node, "LeakyRelu", {6})) { activation_params.push_back(graph_utils::GetNodeAttribute(next_node, "alpha")->f()); - } else if (graph_utils::IsSupportedOptypeVersionAndDomain(next_node, "Clip", {6})) { - activation_params.push_back(graph_utils::GetNodeAttribute(next_node, "min")->f()); - activation_params.push_back(graph_utils::GetNodeAttribute(next_node, "max")->f()); + } else if (graph_utils::IsSupportedOptypeVersionAndDomain(next_node, "Clip", {6, 11})) { + float min, max; + if (GetClipConstantMinMax(graph, next_node, min, max)) { + activation_params.push_back(min); + activation_params.push_back(max); + } else { + continue; + } } else { continue; } diff --git a/onnxruntime/core/optimizer/relu_clip_fusion.cc b/onnxruntime/core/optimizer/relu_clip_fusion.cc index 4e82916d4e..3e5265ae59 100644 --- a/onnxruntime/core/optimizer/relu_clip_fusion.cc +++ b/onnxruntime/core/optimizer/relu_clip_fusion.cc @@ -5,21 +5,102 @@ #include "core/graph/graph.h" #include "core/graph/graph_utils.h" #include "core/graph/op.h" +#include "core/optimizer/initializer.h" namespace onnxruntime { Status FuseReluClip::Apply(Graph& graph, Node& node, RewriteRuleEffect& rule_effect) const { - // get the following Clip node before we delete the Relu node const auto& next_node = *node.OutputNodesBegin(); + // Clip opset 6 has min and max as attributes. they're inputs from opset 11 on. + bool min_max_are_attributes = graph_utils::IsSupportedOptypeVersionAndDomain(next_node, "Clip", {6}); + + // if Clip had a min < 0 we need to replace that value with 0 to do what Relu would have done using just Clip + bool replace_min = false; + ONNX_NAMESPACE::TensorProto replacement_min; + + if (min_max_are_attributes) { + replace_min = graph_utils::GetNodeAttribute(next_node, "min")->f() < 0.f; + } else { + // we can fuse if the optional 'min' input is not provided, or if it is provided via a constant initializer + const auto& clip_inputs = next_node.InputDefs(); + const NodeArg* min_input = (clip_inputs.size() > 1) ? clip_inputs[1] : nullptr; + const ONNX_NAMESPACE::TensorProto* initializer = nullptr; + + if (min_input && min_input->Exists()) { + initializer = graph_utils::GetConstantInitializer(graph, min_input->Name()); + if (!initializer) { + // non-const initializer. can't proceed + return Status::OK(); + } + } + + int32_t data_type; + + if (!initializer) { + // 'min' is using the default value of std::numeric_limits<>::lowest so we can fuse and provide a constant + // value of '0' for 'min' + + // we need to know the correct data type to create a valid initializer for the value 0. + // get that from 'input' as that must match the 'min' input type. + const auto* input_type = next_node.InputDefs()[0]->TypeAsProto(); + if (input_type == nullptr || !input_type->tensor_type().has_elem_type()) { + return Status::OK(); + } + + data_type = input_type->tensor_type().elem_type(); + replace_min = true; + } else { + // 'min' is provided by a constant initializer so we can fuse. + // see if we need to replace with an initializer with a value of 0 + + data_type = initializer->data_type(); + // construct an initializer to gracefully handle typed or raw data in the TensorProto + Initializer i(*initializer); + switch (data_type) { + case ONNX_NAMESPACE::TensorProto_DataType_FLOAT: + if (*i.data() < 0.f) { + replace_min = true; + } + break; + case ONNX_NAMESPACE::TensorProto_DataType_FLOAT16: + if (math::halfToFloat(i.data()->val) < 0.f) { + replace_min = true; + } + break; + default: + ORT_THROW("Unexpected data type for Clip 'min' input of ", initializer->data_type()); + } + } + + if (replace_min) { + // create a new TensorProto with value of 0 and unique name in replacement_min + auto new_name = graph.GenerateNodeArgName("FuseReluClip_" + node.Name() + "_min_zero_constant"); + Initializer(static_cast(data_type), new_name, {}) + .ToProto(replacement_min); + } + } + + // Remove the Relu node, and update the following Clip node if the 'min' is < 0.f, to set it to 0.f. + // This essentially fuses the Relu and Clip. If the Clip 'min' is >= 0.f no change is required to the Clip node + // as Relu would have set a lower min of 0.f. if (graph_utils::RemoveNode(graph, node)) { - // update the following Clip node if the 'min' is < 0.f to set it to 0.f - // this essentially fuses the Relu and Clip - // if the Clip 'min' is >= 0.f no change is required as Relu would have set the min to 0.f - if (graph_utils::GetNodeAttribute(next_node, "min")->f() < 0.f) { + if (replace_min) { auto* mutable_next_node = graph.GetNode(next_node.Index()); - mutable_next_node->ClearAttribute("min"); - mutable_next_node->AddAttribute("min", 0.f); + if (min_max_are_attributes) { + mutable_next_node->ClearAttribute("min"); + mutable_next_node->AddAttribute("min", 0.f); + } else { + graph.AddInitializedTensor(replacement_min); + auto& mutable_input_defs = mutable_next_node->MutableInputDefs(); + NodeArg* replacement_min_nodearg = graph.GetNodeArg(replacement_min.name()); + if (mutable_input_defs.size() == 1) { // Clip node only has the required 'input' so add optional 'min' input + mutable_input_defs.push_back(replacement_min_nodearg); + mutable_next_node->MutableInputArgsCount().push_back(1); + } else { + mutable_input_defs[1] = graph.GetNodeArg(replacement_min.name()); + } + } } rule_effect = RewriteRuleEffect::kRemovedCurrentNode; @@ -42,7 +123,7 @@ bool FuseReluClip::SatisfyCondition(const Graph& graph, const Node& node) const // as Clip will apply the minimum. If the Clip 'min' value is < 0 we need // to update it to 0 to apply what the Relu would have done. We do that in Apply. const auto& next_node = *node.OutputNodesBegin(); - if (!graph_utils::IsSupportedOptypeVersionAndDomain(next_node, "Clip", {6}) || + if (!graph_utils::IsSupportedOptypeVersionAndDomain(next_node, "Clip", {6, 11}) || next_node.GetExecutionProviderType() != node.GetExecutionProviderType()) { return false; } diff --git a/onnxruntime/test/optimizer/graph_transform_test.cc b/onnxruntime/test/optimizer/graph_transform_test.cc index 6fdb5e4501..c666167039 100644 --- a/onnxruntime/test/optimizer/graph_transform_test.cc +++ b/onnxruntime/test/optimizer/graph_transform_test.cc @@ -2,34 +2,36 @@ // Licensed under the MIT License. #include "core/session/inference_session.h" +#include "core/framework/data_types.h" +#include "core/framework/ml_value.h" #include "core/graph/graph_utils.h" #include "core/graph/graph_viewer.h" #include "core/graph/model.h" -#include "core/optimizer/graph_transformer.h" -#include "core/optimizer/graph_transformer_mgr.h" -#include "core/optimizer/identity_elimination.h" -#include "core/optimizer/dropout_elimination.h" -#include "core/optimizer/slice_elimination.h" -#include "core/optimizer/unsqueeze_elimination.h" +#include "core/optimizer/constant_folding.h" #include "core/optimizer/conv_bn_fusion.h" #include "core/optimizer/conv_mul_fusion.h" #include "core/optimizer/conv_add_fusion.h" #include "core/optimizer/conv_activation_fusion.h" -#include "core/optimizer/matmul_add_fusion.h" +#include "core/optimizer/dropout_elimination.h" #include "core/optimizer/gemm_activation_fusion.h" -#include "core/optimizer/relu_clip_fusion.h" -#include "core/framework/data_types.h" -#include "core/framework/ml_value.h" -#include "core/util/math.h" -#include "core/platform/env.h" -#include "test/framework/test_utils.h" -#include "test/capturing_sink.h" -#include "test/test_environment.h" -#include "gtest/gtest.h" -#include "core/optimizer/rule_based_graph_transformer.h" -#include "core/optimizer/constant_folding.h" -#include "core/optimizer/shape_to_initializer.h" #include "core/optimizer/gelu_fusion.h" +#include "core/optimizer/graph_transformer.h" +#include "core/optimizer/graph_transformer_mgr.h" +#include "core/optimizer/identity_elimination.h" +#include "core/optimizer/initializer.h" +#include "core/optimizer/matmul_add_fusion.h" +#include "core/optimizer/relu_clip_fusion.h" +#include "core/optimizer/rule_based_graph_transformer.h" +#include "core/optimizer/shape_to_initializer.h" +#include "core/optimizer/slice_elimination.h" +#include "core/optimizer/unsqueeze_elimination.h" +#include "core/platform/env.h" +#include "core/util/math.h" +#include "test/capturing_sink.h" +#include "test/framework/test_utils.h" +#include "test/test_environment.h" + +#include "gtest/gtest.h" using namespace std; using namespace ONNX_NAMESPACE; @@ -312,6 +314,44 @@ TEST(GraphTransformationTests, FuseConvActivation) { ASSERT_TRUE(op_to_count[model.second] == 0); } } + +TEST(GraphTransformationTests, FuseConvClip11Activation) { + std::string model_uri = MODEL_FOLDER + "fusion/conv_clip11.onnx"; + std::shared_ptr p_model; + auto status = Model::Load(model_uri, p_model); + ASSERT_TRUE(status.IsOK()) << status; + Graph& graph = p_model->MainGraph(); + + std::map op_to_count = CountOpsInGraph(graph); + ASSERT_EQ(op_to_count["Clip"], 3); + + // Apply transformer + onnxruntime::GraphTransformerManager graph_transformation_mgr{5}; + graph_transformation_mgr.Register(onnxruntime::make_unique(), TransformerLevel::Level2); + ASSERT_TRUE(graph_transformation_mgr.ApplyTransformers(graph, TransformerLevel::Level2).IsOK()); + + op_to_count = CountOpsInGraph(graph); + ASSERT_EQ(op_to_count["Clip"], 1); + + for (const Node& node : graph.Nodes()) { + if (node.OpType() == "Conv") { + EXPECT_TRUE(node.Name() == "Conv1") << "Conv1 should not have been fused as 'min' input to Clip was mutable."; + } + + if (node.OpType() == "FusedConv") { + const ONNX_NAMESPACE::AttributeProto& attr_proto = node.GetAttributes().at("activation_params"); + const auto& params = attr_proto.floats(); + // check expected values for each. Conv0 is explicitly specified. Conv2 are defaults + if (node.Name() == "Conv0") { + EXPECT_TRUE(params.Get(0) == -1.f); + EXPECT_TRUE(params.Get(1) == 1.f); + } else if (node.Name() == "Conv2") { + EXPECT_TRUE(params.Get(0) == std::numeric_limits::lowest()); + EXPECT_TRUE(params.Get(1) == std::numeric_limits::max()); + } + } + } +} #endif TEST(GraphTransformationTests, FuseConvMulNoBias) { @@ -537,10 +577,10 @@ TEST(GraphTransformationTests, FuseConvBnAddMulFloat16) { ASSERT_EQ(expected_values_prod, found); } -TEST(GraphTransformationTests, ReluClipFusion) { +TEST(GraphTransformationTests, ReluClip6Fusion) { // Clip op schema changed for opset version 11. Until Clip op is updated in ORT hard coding this model to use // older opset. - Model model("ReluClipFusion", true, ModelMetaData(), IOnnxRuntimeOpSchemaRegistryList(), {{"", 10}}, {}); + Model model("ReluClip6Fusion", true, ModelMetaData(), IOnnxRuntimeOpSchemaRegistryList(), {{"", 10}}, {}); auto& graph = model.MainGraph(); std::vector inputs; @@ -606,6 +646,126 @@ TEST(GraphTransformationTests, ReluClipFusion) { } } +// test handling of Clip 11 +TEST(GraphTransformationTests, ReluClip11Fusion) { + Model model("ReluClip6Fusion"); //, true, ModelMetaData(), IOnnxRuntimeOpSchemaRegistryList(), {{"", 11}}, {}); + auto& graph = model.MainGraph(); + + std::vector inputs; + std::vector outputs; + + TypeProto input_tensor_type; + input_tensor_type.mutable_tensor_type()->set_elem_type(TensorProto_DataType_FLOAT); + input_tensor_type.mutable_tensor_type()->mutable_shape()->add_dim()->set_dim_value(1); + + TypeProto float16_tensor_type; + float16_tensor_type.mutable_tensor_type()->set_elem_type(TensorProto_DataType_FLOAT16); + float16_tensor_type.mutable_tensor_type()->mutable_shape()->add_dim()->set_dim_value(1); + + // 4 paths in the model, each with Relu followed by Clip to test different aspects of Clip 11 handling + // One has a Clip with mutable 'min' (don't fuse) + // One has a Clip with constant 'min' < 0 (fuse and update 'min') + // One has a Clip with constant 'min' > 0 (fuse and leave 'min') + // One has a Clip with no 'min' (fuse and update to set min to 0 using type info from 'input') + auto& input0 = graph.GetOrCreateNodeArg("input_0", &input_tensor_type); + auto& input1 = graph.GetOrCreateNodeArg("input_1", &float16_tensor_type); + auto& input2 = graph.GetOrCreateNodeArg("input_2", &input_tensor_type); + auto& input3 = graph.GetOrCreateNodeArg("input_3", &input_tensor_type); + + auto& min_input_0 = graph.GetOrCreateNodeArg("min_input_0", &input_tensor_type); + auto& min_input_1 = graph.GetOrCreateNodeArg("min_input_1", &float16_tensor_type); + auto& min_input_2 = graph.GetOrCreateNodeArg("min_input_2", &input_tensor_type); + + // add initializer for min_input_1 so it's constant + TensorProto const_min_1; + Initializer i1(TensorProto_DataType_FLOAT16, "min_input_1", {1}); + i1.data()->val = math::floatToHalf(-1.f); + i1.ToProto(const_min_1); + graph.AddInitializedTensor(const_min_1); + + TensorProto const_min_2; + Initializer i2(TensorProto_DataType_FLOAT, "min_input_2", {1}); + *i2.data() = 1.f; + i2.ToProto(const_min_2); + graph.AddInitializedTensor(const_min_2); + + auto& relu0_output = graph.GetOrCreateNodeArg("relu0_output", &input_tensor_type); + auto& relu1_output = graph.GetOrCreateNodeArg("relu1_output", &float16_tensor_type); + auto& relu2_output = graph.GetOrCreateNodeArg("relu2_output", &input_tensor_type); + auto& relu3_output = graph.GetOrCreateNodeArg("relu3_output", &input_tensor_type); + + auto& clip0_output = graph.GetOrCreateNodeArg("clip0_output", &input_tensor_type); + auto& clip1_output = graph.GetOrCreateNodeArg("clip1_output", &float16_tensor_type); + auto& clip2_output = graph.GetOrCreateNodeArg("clip2_output", &input_tensor_type); + auto& clip3_output = graph.GetOrCreateNodeArg("clip3_output", &input_tensor_type); + + graph.AddNode("relu0", "Relu", "Relu0", {&input0}, {&relu0_output}); + graph.AddNode("relu1", "Relu", "Relu1", {&input1}, {&relu1_output}); + graph.AddNode("relu2", "Relu", "Relu2", {&input2}, {&relu2_output}); + graph.AddNode("relu3", "Relu", "Relu3", {&input3}, {&relu3_output}); + + auto& clip0 = graph.AddNode("clip0", "Clip", "Clip with mutable min", {&relu0_output, &min_input_0}, {&clip0_output}); + auto& clip1 = graph.AddNode("clip1", "Clip", "Clip with constant min < 0", {&relu1_output, &min_input_1}, {&clip1_output}); + auto& clip2 = graph.AddNode("clip2", "Clip", "Clip with constant min > 0", {&relu2_output, &min_input_2}, {&clip2_output}); + auto& clip3 = graph.AddNode("clip3", "Clip", "Clip with no min", {&relu3_output}, {&clip3_output}); + + graph.SetInputs({&input0, &input1, &input2, &input3, &min_input_0}); + auto status = graph.Resolve(); + EXPECT_EQ(status, Status::OK()) << status; + + std::map op_to_count = CountOpsInGraph(graph); + ASSERT_TRUE(op_to_count["Relu"] == 4); + + auto rule_transformer_L1 = onnxruntime::make_unique("RuleTransformer1"); + rule_transformer_L1->Register(onnxruntime::make_unique()); + onnxruntime::GraphTransformerManager graph_transformation_mgr{5}; + graph_transformation_mgr.Register(std::move(rule_transformer_L1), TransformerLevel::Level1); + status = graph_transformation_mgr.ApplyTransformers(graph, TransformerLevel::Level1); + ASSERT_TRUE(status.IsOK()) << status; + + op_to_count = CountOpsInGraph(graph); + ASSERT_TRUE(op_to_count["Relu"] == 1) << "All except the first Relu should have been fused"; + + for (const Node& node : graph.Nodes()) { + if (node.OpType() == "Relu") { + EXPECT_TRUE(node.Name() == "relu0") << "relu0 should be the only Relu node left"; + } + + if (node.OpType() == "Clip") { + auto* min_input = graph_utils::GetConstantInitializer(graph, node.InputDefs()[1]->Name()); + + if (&node == &clip0) { + EXPECT_TRUE(min_input == nullptr) << "clip0 should not have been fused as min_input_0 is not constant"; + } else { + EXPECT_TRUE(min_input != nullptr) + << node.Name() << " should have been fused and have a constant initializer for 'min'"; + + auto type = min_input->data_type(); + + if (&node == &clip1) { + // fusion with float16 data and min set to 0 + EXPECT_EQ(type, ONNX_NAMESPACE::TensorProto::DataType::TensorProto_DataType_FLOAT16); + MLFloat16 value = *Initializer(*min_input).data(); + EXPECT_EQ(math::halfToFloat(value.val), 0.f) << "Min was not 0.f. Got:" << math::halfToFloat(value.val); + } else if (&node == &clip2) { + // fusion with float data and min untouched + EXPECT_EQ(type, ONNX_NAMESPACE::TensorProto::DataType::TensorProto_DataType_FLOAT); + float value = *Initializer(*min_input).data(); + EXPECT_EQ(value, 1.0) << "Min should have remained unchanged but is now " << value; + } else if (&node == &clip3) { + // fusion with no min so type comes from input + EXPECT_EQ(type, ONNX_NAMESPACE::TensorProto::DataType::TensorProto_DataType_FLOAT); + float value = *Initializer(*min_input).data(); + EXPECT_EQ(value, 0.f) << "Min was not 0.f. Got:" << value; + + } else { + EXPECT_TRUE(false) << "Unexpected node " << node.Name(); + } + } + } + } +} + #ifndef DISABLE_CONTRIB_OPS TEST(GraphTransformationTests, GeluFusionTest) { string model_uri = MODEL_FOLDER + "fusion/gelu.onnx"; diff --git a/onnxruntime/test/testdata/transform/fusion/conv_clip11.onnx b/onnxruntime/test/testdata/transform/fusion/conv_clip11.onnx new file mode 100644 index 0000000000..d518546a3e Binary files /dev/null and b/onnxruntime/test/testdata/transform/fusion/conv_clip11.onnx differ diff --git a/onnxruntime/test/testdata/transform/fusion/create_conv_clip11.py b/onnxruntime/test/testdata/transform/fusion/create_conv_clip11.py new file mode 100644 index 0000000000..a345f3bc95 --- /dev/null +++ b/onnxruntime/test/testdata/transform/fusion/create_conv_clip11.py @@ -0,0 +1,37 @@ +import onnx +from onnx import helper +from onnx import TensorProto + +graph = helper.make_graph( + [ # nodes + # fusable, const_min_negative should be replaced + helper.make_node("Conv", ["X", "W"], ["conv0_out"], "Conv0"), + helper.make_node("Clip", ["conv0_out", "const_min", "const_max"], ["clip0_out"], "Clip0"), + + # mutable input. no fusion. + helper.make_node("Conv", ["X", "W"], ["conv1_out"], "Conv1"), + helper.make_node("Clip", ["conv1_out", "mutable_min", "const_max"], ["clip1_out"], "Clip1"), + + # fusabled. default min/max. + helper.make_node("Conv", ["X", "W"], ["conv2_out"], "Conv2"), + helper.make_node("Clip", ["conv2_out"], ["clip2_out"], "Clip2"), + ], + "ConvClipFusion", #name + [ # inputs + helper.make_tensor_value_info('X', TensorProto.FLOAT, [1, 1, 7]), + helper.make_tensor_value_info('W', TensorProto.FLOAT, [1, 1, 1]), + helper.make_tensor_value_info('mutable_min', TensorProto.FLOAT, [1]), + ], + [ # outputs + helper.make_tensor_value_info('clip0_out', TensorProto.FLOAT, None), + helper.make_tensor_value_info('clip1_out', TensorProto.FLOAT, None), + helper.make_tensor_value_info('clip2_out', TensorProto.FLOAT, None), + ], + [ # initializers + helper.make_tensor('const_min', TensorProto.FLOAT, [1], [-1.0]), + helper.make_tensor('const_max', TensorProto.FLOAT, [1], [10.0]) + ] +) + +model = helper.make_model(graph) +onnx.save(model, r'conv_clip11.onnx') \ No newline at end of file