Implement LpNorm regularizer to be used on the inputs for feature importance (#26376)

Summary: Pull Request resolved: https://github.com/pytorch/pytorch/pull/26376 * Create the new dense_feature_reg (FCInputLpNorm) for feature importance to be applied to the fully-connected layer for feature-importance. Test Plan: * Unit test located in: `caffe2/caffe2/fb/dper/layer_models/tests/split_1/sparse_nn_test.py` Reviewed By: un-disclosed Differential Revision: D17360361 fbshipit-source-id: 1a0e119eeb17199a13dfffe58b3036ea4255e301
2026-05-14 20:57:59 +00:00 · 2019-10-03 09:37:42 -07:00 · 2019-10-03 09:37:42 -07:00 · c2223df578
commit c2223df578
parent 7e95b89980
1 changed files with 49 additions and 0 deletions
--- a/caffe2/python/regularizer.py
+++ b/caffe2/python/regularizer.py
@ -42,6 +42,9 @@ class Regularizer(object):
    def _run_after_optimizer(self, net, param_init_net, param, grad):
        return None

+    def _feature_grouping(self, param, net):
+        return None
+
    def _ensure_clipped(
        self,
        net,
@ -84,6 +87,52 @@ class L1Norm(Regularizer):
        net.Scale([output_blob], [output_blob], scale=self.reg_lambda)
        return output_blob

+class FCInputLpNorm(Regularizer):
+    def __init__(self, reg_lambda, p_value=0.5):
+        super(FCInputLpNorm, self).__init__()
+        assert reg_lambda >= 0, "factor ahead of regularization should be 0 or positive"
+        assert p_value >= 0, "p_value factor should be 0 or positive"
+        self.p_value = p_value
+        self.reg_lambda = reg_lambda
+
+    def _feature_grouping(self, param, net):
+        # Possible alternative grouping method via summing over absolute values
+        # Compute l2norm over feature weights
+        # pow( sum_i { pow(theda_i, 2) } ,  0.5)
+        param_mul = net.Mul([param, param], [net.NextScopedBlob("param_mul")])
+        param_reduced = net.ReduceFrontSum(
+            [param_mul], [net.NextScopedBlob("param_reduced")]
+        )
+        grouped_feature_weight_vec = net.Pow(
+            [param_reduced],
+            [net.NextScopedBlob("grouped_feature_weight_vec")],
+            exponent=0.5,
+        )
+
+        return grouped_feature_weight_vec
+
+    def _run_on_loss(self, net, param_init_net, param, grad=None):
+        # TODO: the second dim (num of input nodes) of param is after feature preproc,
+        # and does not correspond to the original num of dense features.
+        # In the future, will want to create a util to reduce the input dim of param to
+        # match the num of dense features.
+
+        output_blob = net.NextScopedBlob(param + "_dense_feature_regularization")
+        grouped_feature_weight_vec = self._feature_grouping(param, net)
+
+        # Compute Lpnorm over l2norm:
+        # pow( sum_i { pow(theda_i, p) } ,  1/p)
+        lp_vec_raised = net.Pow(
+            [grouped_feature_weight_vec], [net.NextScopedBlob("lp_vec_raised")], exponent=self.p_value
+        )
+        lp_vec_summed = net.ReduceFrontSum(
+            [lp_vec_raised], [net.NextScopedBlob("lp_vec_summed")]
+        )
+        lp_vec = net.Pow(
+            [lp_vec_summed], [net.NextScopedBlob("lp_vec")], exponent=(1 / self.p_value)
+        )
+        net.Scale([lp_vec], [output_blob], scale=self.reg_lambda)
+        return output_blob

 class L1NormTrimmed(Regularizer):
    """