mirror of
https://github.com/saymrwulf/onnxruntime.git
synced 2026-05-17 21:10:43 +00:00
Fuse HardSigmoid with conv. (#8674)
* Fuse HardSigmoid with conv. Add transform test case and FusedConv testcase. * Limit Conv/HardSigmoid fusion in CpuExecutionProvider. * Fix typo for arm build. * change format one place
This commit is contained in:
parent
206537936f
commit
2e37fe3f68
8 changed files with 194 additions and 20 deletions
|
|
@ -26,6 +26,9 @@ common::Status GetFusedActivationAttr(const OpKernelInfo& info, MLAS_ACTIVATION&
|
|||
} else if (activation_type == "Clip") {
|
||||
activation.ActivationKind = MlasClipActivation;
|
||||
activation_params_count = 2;
|
||||
} else if (activation_type == "HardSigmoid") {
|
||||
activation.ActivationKind = MlasHardSigmoidActivation;
|
||||
activation_params_count = 2;
|
||||
} else {
|
||||
return Status(common::ONNXRUNTIME, common::INVALID_ARGUMENT, "unimplemented activation: " + activation_type);
|
||||
}
|
||||
|
|
|
|||
|
|
@ -121,6 +121,7 @@ enum MLAS_ACTIVATION_KIND {
|
|||
MlasTanhActivation,
|
||||
MlasLogisticActivation,
|
||||
MlasClipActivation,
|
||||
MlasHardSigmoidActivation,
|
||||
};
|
||||
|
||||
struct MLAS_ACTIVATION {
|
||||
|
|
@ -133,6 +134,10 @@ struct MLAS_ACTIVATION {
|
|||
float minimum;
|
||||
float maximum;
|
||||
} Clip;
|
||||
struct {
|
||||
float alpha;
|
||||
float beta;
|
||||
} HardSigmoid;
|
||||
float Values[2];
|
||||
} Parameters;
|
||||
};
|
||||
|
|
|
|||
|
|
@ -193,6 +193,45 @@ struct MLAS_ACTIVATION_FUNCTION<MlasClipActivation>
|
|||
}
|
||||
};
|
||||
|
||||
template<>
|
||||
struct MLAS_ACTIVATION_FUNCTION<MlasHardSigmoidActivation>
|
||||
{
|
||||
MLAS_FLOAT32X4 AlphaBroadcast;
|
||||
MLAS_FLOAT32X4 BetaBroadcast;
|
||||
MLAS_FLOAT32X4 MinimumBroadcast;
|
||||
MLAS_FLOAT32X4 MaximumBroadcast;
|
||||
|
||||
MLAS_ACTIVATION_FUNCTION(const MLAS_ACTIVATION* Activation)
|
||||
{
|
||||
AlphaBroadcast = MlasBroadcastFloat32x4(&Activation->Parameters.HardSigmoid.alpha);
|
||||
BetaBroadcast = MlasBroadcastFloat32x4(&Activation->Parameters.HardSigmoid.beta);
|
||||
MinimumBroadcast = MlasZeroFloat32x4();
|
||||
MaximumBroadcast = MlasBroadcastFloat32x4(1.0f);
|
||||
}
|
||||
|
||||
MLAS_FLOAT32X4 Activate(MLAS_FLOAT32X4 Value)
|
||||
{
|
||||
Value = MlasMultiplyAddFloat32x4(Value, AlphaBroadcast, BetaBroadcast);
|
||||
Value = MlasMinimumFloat32x4(MaximumBroadcast, Value);
|
||||
Value = MlasMaximumFloat32x4(MinimumBroadcast, Value);
|
||||
|
||||
return Value;
|
||||
}
|
||||
|
||||
float Activate(float Value)
|
||||
{
|
||||
#if defined(MLAS_SSE2_INTRINSICS)
|
||||
return _mm_cvtss_f32(Activate(_mm_set_ss(Value)));
|
||||
#else
|
||||
Value = MlasExtractLaneFloat32x4<0>(AlphaBroadcast) * Value + MlasExtractLaneFloat32x4<0>(BetaBroadcast);
|
||||
Value = std::min(Value, MlasExtractLaneFloat32x4<0>(MaximumBroadcast));
|
||||
Value = std::max(Value, MlasExtractLaneFloat32x4<0>(MinimumBroadcast));
|
||||
|
||||
return Value;
|
||||
#endif
|
||||
}
|
||||
};
|
||||
|
||||
template<MLAS_ACTIVATION_KIND ActivationKind, bool AddBias>
|
||||
void
|
||||
MlasActivationKernel(
|
||||
|
|
@ -464,5 +503,11 @@ Return Value:
|
|||
MlasActivationKernel<MlasClipActivation>(Activation, Buffer, Bias, M, N, ldc);
|
||||
break;
|
||||
}
|
||||
|
||||
case MlasHardSigmoidActivation:
|
||||
{
|
||||
MlasActivationKernel<MlasHardSigmoidActivation>(Activation, Buffer, Bias, M, N, ldc);
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -136,7 +136,7 @@ Status ConvActivationFusion::ApplyImpl(Graph& graph, bool& modified, int graph_l
|
|||
auto conv_outputs = conv_node.MutableOutputDefs();
|
||||
auto add_inputs = add_node.MutableInputDefs();
|
||||
int32_t dependent = 0, independent = 0;
|
||||
for (auto add_input: add_inputs) {
|
||||
for (auto add_input : add_inputs) {
|
||||
if (add_input->Name() == conv_outputs[0]->Name()) {
|
||||
dependent++;
|
||||
} else {
|
||||
|
|
@ -179,6 +179,14 @@ Status ConvActivationFusion::ApplyImpl(Graph& graph, bool& modified, int graph_l
|
|||
} else {
|
||||
continue;
|
||||
}
|
||||
} else if ((node->GetExecutionProviderType().empty() || node->GetExecutionProviderType() == onnxruntime::kCpuExecutionProvider) &&
|
||||
graph_utils::IsSupportedOptypeVersionAndDomain(next_node, "HardSigmoid", {6})) {
|
||||
auto* alpha_attr = graph_utils::GetNodeAttribute(next_node, "alpha");
|
||||
auto* beta_attr = graph_utils::GetNodeAttribute(next_node, "beta");
|
||||
float alpha = (alpha_attr == nullptr ? 0.2f : alpha_attr->f());
|
||||
float beta = (beta_attr == nullptr ? 0.5f : beta_attr->f());
|
||||
activation_params.push_back(alpha);
|
||||
activation_params.push_back(beta);
|
||||
} else {
|
||||
continue;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -7,7 +7,7 @@
|
|||
namespace onnxruntime {
|
||||
namespace test {
|
||||
|
||||
#if defined(USE_CUDA) && !defined(DISABLE_CONTRIB_OPS)
|
||||
#if !defined(DISABLE_CONTRIB_OPS)
|
||||
using namespace std;
|
||||
|
||||
struct ConvOpAndTestAttributes {
|
||||
|
|
@ -18,24 +18,48 @@ struct ConvOpAndTestAttributes {
|
|||
vector<int64_t> pads;
|
||||
vector<int64_t> strides;
|
||||
string activation;
|
||||
vector<float> activation_parameters = {};
|
||||
};
|
||||
|
||||
static std::unordered_set<std::string> excluded_providers = {
|
||||
kCpuExecutionProvider,
|
||||
kDnnlExecutionProvider,
|
||||
kOpenVINOExecutionProvider,
|
||||
kNupharExecutionProvider,
|
||||
kVitisAIExecutionProvider,
|
||||
kTensorrtExecutionProvider,
|
||||
kNnapiExecutionProvider,
|
||||
kRknpuExecutionProvider,
|
||||
kDmlExecutionProvider,
|
||||
kMIGraphXExecutionProvider,
|
||||
kAclExecutionProvider,
|
||||
kArmNNExecutionProvider,
|
||||
kRocmExecutionProvider};
|
||||
static std::unordered_set<std::string> providers_except_cpu = {
|
||||
kCudaExecutionProvider,
|
||||
kDnnlExecutionProvider,
|
||||
kOpenVINOExecutionProvider,
|
||||
kNupharExecutionProvider,
|
||||
kVitisAIExecutionProvider,
|
||||
kTensorrtExecutionProvider,
|
||||
kNnapiExecutionProvider,
|
||||
kRknpuExecutionProvider,
|
||||
kDmlExecutionProvider,
|
||||
kMIGraphXExecutionProvider,
|
||||
kAclExecutionProvider,
|
||||
kArmNNExecutionProvider,
|
||||
kRocmExecutionProvider};
|
||||
|
||||
void TestConvOp(const ConvOpAndTestAttributes& attributes, const vector<vector<float>>& inputs, const vector<vector<int64_t>>& input_shapes, const std::initializer_list<float>& expected_output, const vector<int64_t>& expected_output_shape, bool weight_is_initializer = false, OpTester::ExpectResult expect_result = OpTester::ExpectResult::kExpectSuccess, const std::string& err_str = "") {
|
||||
static std::unordered_set<std::string> providers_except_cpu_cuda = {
|
||||
kDnnlExecutionProvider,
|
||||
kOpenVINOExecutionProvider,
|
||||
kNupharExecutionProvider,
|
||||
kVitisAIExecutionProvider,
|
||||
kTensorrtExecutionProvider,
|
||||
kNnapiExecutionProvider,
|
||||
kRknpuExecutionProvider,
|
||||
kDmlExecutionProvider,
|
||||
kMIGraphXExecutionProvider,
|
||||
kAclExecutionProvider,
|
||||
kArmNNExecutionProvider,
|
||||
kRocmExecutionProvider};
|
||||
|
||||
|
||||
void TestConvOp(const ConvOpAndTestAttributes& attributes,
|
||||
const vector<vector<float>>& inputs,
|
||||
const vector<vector<int64_t>>& input_shapes,
|
||||
const std::initializer_list<float>& expected_output,
|
||||
const vector<int64_t>& expected_output_shape,
|
||||
const std::unordered_set<std::string>& excluded_provider_types = providers_except_cpu_cuda,
|
||||
bool weight_is_initializer = false,
|
||||
OpTester::ExpectResult expect_result = OpTester::ExpectResult::kExpectSuccess,
|
||||
const std::string& err_str = "") {
|
||||
OpTester test("FusedConv", 1, onnxruntime::kMSDomain);
|
||||
test.AddAttribute("group", attributes.group);
|
||||
test.AddAttribute("kernel_shape", attributes.kernel_shape);
|
||||
|
|
@ -58,6 +82,10 @@ void TestConvOp(const ConvOpAndTestAttributes& attributes, const vector<vector<f
|
|||
ORT_ENFORCE(!attributes.activation.empty(), "activation must be set");
|
||||
test.AddAttribute("activation", attributes.activation);
|
||||
|
||||
if (!attributes.activation_parameters.empty()) {
|
||||
test.AddAttribute("activation_params", attributes.activation_parameters);
|
||||
}
|
||||
|
||||
const char* szNames[] = {"X", "W", "B", "Z"};
|
||||
test.AddInput<float>(szNames[0], input_shapes[0], inputs[0]);
|
||||
test.AddInput<float>(szNames[1], input_shapes[1], inputs[1], weight_is_initializer);
|
||||
|
|
@ -66,7 +94,28 @@ void TestConvOp(const ConvOpAndTestAttributes& attributes, const vector<vector<f
|
|||
if (inputs.size() >= 4)
|
||||
test.AddInput<float>(szNames[3], input_shapes[3], inputs[3]);
|
||||
test.AddOutput<float>("Y", expected_output_shape, expected_output);
|
||||
test.Run(expect_result, err_str, excluded_providers);
|
||||
test.Run(expect_result, err_str, excluded_provider_types);
|
||||
}
|
||||
|
||||
TEST(FusedConvTest, Conv2D_HardSigmoid) {
|
||||
ConvOpAndTestAttributes attrs = {
|
||||
"", // auto_pad
|
||||
vector<int64_t>{1, 1}, // dilations
|
||||
1, // group
|
||||
vector<int64_t>{2, 2}, // kernel_shape
|
||||
vector<int64_t>{0, 0, 0, 0}, // pads
|
||||
vector<int64_t>{1, 1}, // strides
|
||||
"HardSigmoid", // activation
|
||||
vector<float>{0.2f, 0.5f} // activation_parameters
|
||||
};
|
||||
|
||||
vector<float> X = {1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f, 9.0f};
|
||||
vector<int64_t> X_shape = {1, 1, 3, 3};
|
||||
vector<float> W = {0.125f, 0.125f, 0.125f, 0.125f, -0.125f, -0.125f, -0.125f, -0.125f};
|
||||
vector<int64_t> W_shape = {2, 1, 2, 2};
|
||||
vector<int64_t> Y_shape = {1, 2, 2, 2};
|
||||
auto expected_vals = {0.8f, 0.9f, 1.0f, 1.0f, 0.2f, 0.1f, 0.0f, 0.0f};
|
||||
TestConvOp(attrs, {X, W}, {X_shape, W_shape}, expected_vals, Y_shape, providers_except_cpu);
|
||||
}
|
||||
|
||||
TEST(FusedConvTest, Conv2D_Relu) {
|
||||
|
|
@ -111,6 +160,23 @@ TEST(FusedConvTest, Conv2D_Bias_Relu) {
|
|||
TestConvOp(attrs, {X, W, B}, {X_shape, W_shape, B_shape}, expected_vals, Y_shape);
|
||||
}
|
||||
|
||||
#if defined(USE_CUDA)
|
||||
|
||||
static std::unordered_set<std::string> providers_except_cuda = {
|
||||
kCpuExecutionProvider,
|
||||
kDnnlExecutionProvider,
|
||||
kOpenVINOExecutionProvider,
|
||||
kNupharExecutionProvider,
|
||||
kVitisAIExecutionProvider,
|
||||
kTensorrtExecutionProvider,
|
||||
kNnapiExecutionProvider,
|
||||
kRknpuExecutionProvider,
|
||||
kDmlExecutionProvider,
|
||||
kMIGraphXExecutionProvider,
|
||||
kAclExecutionProvider,
|
||||
kArmNNExecutionProvider,
|
||||
kRocmExecutionProvider};
|
||||
|
||||
TEST(FusedConvTest, Conv2D_Bias_Z_Relu) {
|
||||
ConvOpAndTestAttributes attrs = {
|
||||
"", // auto_pad
|
||||
|
|
@ -132,8 +198,10 @@ TEST(FusedConvTest, Conv2D_Bias_Z_Relu) {
|
|||
vector<float> Z = {-1.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 1.0f};
|
||||
vector<int64_t> Z_shape = {1, 2, 2, 2};
|
||||
auto expected_vals = {12.0f, 17.0f, 25.0f, 29.0f, 11.0f, 15.0f, 23.0f, 28.0f};
|
||||
TestConvOp(attrs, {X, W, B, Z}, {X_shape, W_shape, B_shape, Z_shape}, expected_vals, Y_shape);
|
||||
TestConvOp(attrs, {X, W, B, Z}, {X_shape, W_shape, B_shape, Z_shape}, expected_vals, Y_shape, providers_except_cuda);
|
||||
}
|
||||
|
||||
#endif
|
||||
#endif
|
||||
|
||||
} // namespace test
|
||||
|
|
|
|||
|
|
@ -732,7 +732,8 @@ TEST_F(GraphTransformationTests, FuseConvActivation) {
|
|||
{ORT_TSTR("fusion/conv_clip.onnx"), "Clip"},
|
||||
{ORT_TSTR("fusion/conv_sigmoid.onnx"), "Sigmoid"},
|
||||
{ORT_TSTR("fusion/conv_tanh.onnx"), "Tanh"},
|
||||
{ORT_TSTR("fusion/conv_leakyrelu.onnx"), "LeakyRelu"}};
|
||||
{ORT_TSTR("fusion/conv_leakyrelu.onnx"), "LeakyRelu"},
|
||||
{ORT_TSTR("fusion/conv_hardsigmoid.onnx"), "HardSigmoid"}};
|
||||
#endif
|
||||
for (const auto& model : model_to_op_name) {
|
||||
auto model_uri = MODEL_FOLDER + model.first;
|
||||
|
|
|
|||
22
onnxruntime/test/testdata/transform/fusion/conv_hardsigmoid.onnx
vendored
Normal file
22
onnxruntime/test/testdata/transform/fusion/conv_hardsigmoid.onnx
vendored
Normal file
|
|
@ -0,0 +1,22 @@
|
|||
:¾
|
||||
|
||||
X
|
||||
W conv0_outConv0"Conv
|
||||
8
|
||||
conv0_outhardsigmoid0_outHardSigmoid0"HardSigmoidConvClipFusionZ
|
||||
X
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
Z
|
||||
W
|
||||
|
||||
|
||||
|
||||
|
||||
b
|
||||
hardsigmoid0_out
|
||||
B
|
||||
22
onnxruntime/test/testdata/transform/fusion/create_conv_hardsigmoid.py
vendored
Normal file
22
onnxruntime/test/testdata/transform/fusion/create_conv_hardsigmoid.py
vendored
Normal file
|
|
@ -0,0 +1,22 @@
|
|||
import onnx
|
||||
from onnx import helper
|
||||
from onnx import TensorProto
|
||||
|
||||
graph = helper.make_graph(
|
||||
[ # nodes
|
||||
# fusable, const_min_negative should be replaced
|
||||
helper.make_node("Conv", ["X", "W"], ["conv0_out"], "Conv0"),
|
||||
helper.make_node("HardSigmoid", ["conv0_out"], ["hardsigmoid0_out"], "HardSigmoid0"),
|
||||
],
|
||||
"ConvClipFusion", #name
|
||||
[ # inputs
|
||||
helper.make_tensor_value_info('X', TensorProto.FLOAT, [1, 1, 10, 10]),
|
||||
helper.make_tensor_value_info('W', TensorProto.FLOAT, [1, 1, 3, 3]),
|
||||
],
|
||||
[ # outputs
|
||||
helper.make_tensor_value_info('hardsigmoid0_out', TensorProto.FLOAT, None),
|
||||
],
|
||||
)
|
||||
|
||||
model = helper.make_model(graph)
|
||||
onnx.save(model, r'conv_hardsigmoid.onnx')
|
||||
Loading…
Reference in a new issue