mirror of
https://github.com/saymrwulf/pytorch.git
synced 2026-05-15 21:00:47 +00:00
Summary: Pull Request resolved: https://github.com/pytorch/pytorch/pull/37705 Pull Request resolved: https://github.com/pytorch/pytorch/pull/37372 Posted note: [Regularizing SparseNN Against Over-fitting](https://fb.workplace.com/notes/taiqing-wang/regularizing-sparsenn-against-over-fitting/220306075902708/) **Problem formulation** L(w) = J(w) + lambda/2 * ||w||^2 J(w) is the empirical loss, and ||w||^2 is the squared L2 norm of the parameters, a.k.a. L2 regularizer. dL(w)/ dw_i = dJ(w)/dw_i + lambda w_i dL(w)/ dw_i is the gradient of L(w) w.r.t. w_i. To implement the L2 regularizer, the gradient of J(w) w.r.t. w_i is added with w_i. lambda is called as weight decay in this implementation. **Code changes** * In the initialization method of AdagradOptimizer, a new input argument, weight_decay, is added. * In the _run function of AdagradOptimizer, the weight decay will be skipped for 1d bias vectors. * In the parameter update functions of Adagrad, the gradient is updated by weight_decay * w_i. The default value for weight_decay is zero. Test Plan: ` buck build caffe2/caffe2/fb/dper/layer_models/tests/split_1:sparse_nn_test_weight_decay ` ` ./buck-out/gen/caffe2/caffe2/fb/dper/layer_models/tests/split_1/sparse_nn_test_weight_decay#binary.par ` Reviewed By: jspark1105 Differential Revision: D21258652 fbshipit-source-id: d2366ddcd736a03205a2d16f914703b16d9fce8f |
||
|---|---|---|
| .. | ||
| benchmarks | ||
| docs | ||
| examples | ||
| fakelowp | ||
| helpers | ||
| ideep | ||
| layers | ||
| mint | ||
| mkl | ||
| modeling | ||
| models | ||
| onnx | ||
| operator_test | ||
| predictor | ||
| rnn | ||
| serialized_test | ||
| test | ||
| trt | ||
| __init__.py | ||
| _import_c_extension.py | ||
| allcompare_test.py | ||
| attention.py | ||
| benchmark_generator.py | ||
| binarysize.py | ||
| brew.py | ||
| brew_test.py | ||
| build.py | ||
| cached_reader.py | ||
| caffe_translator.py | ||
| caffe_translator_test.py | ||
| checkpoint.py | ||
| checkpoint_test.py | ||
| CMakeLists.txt | ||
| cnn.py | ||
| compatibility.py | ||
| context.py | ||
| context_test.py | ||
| control.py | ||
| control_ops_grad.py | ||
| control_ops_grad_test.py | ||
| control_ops_util.py | ||
| control_test.py | ||
| convert.py | ||
| convert_test.py | ||
| convnet_benchmarks.py | ||
| convnet_benchmarks_test.py | ||
| core.py | ||
| core_gradients_test.py | ||
| core_test.py | ||
| crf.py | ||
| crf_predict.py | ||
| crf_viterbi_test.py | ||
| data_parallel_model.py | ||
| data_parallel_model_test.py | ||
| data_workers.py | ||
| data_workers_test.py | ||
| dataio.py | ||
| dataio_test.py | ||
| dataset.py | ||
| db_file_reader.py | ||
| db_test.py | ||
| device_checker.py | ||
| dlpack.h | ||
| dyndep.py | ||
| embedding_generation_benchmark.py | ||
| experiment_util.py | ||
| extension_loader.py | ||
| filler_test.py | ||
| functional.py | ||
| functional_test.py | ||
| fused_8bit_rowwise_conversion_ops_test.py | ||
| gradient_check_test.py | ||
| gradient_checker.py | ||
| gru_cell.py | ||
| hip_test_util.py | ||
| hsm_util.py | ||
| hypothesis_test.py | ||
| hypothesis_test_util.py | ||
| ideep_test_util.py | ||
| layer_model_helper.py | ||
| layer_model_instantiator.py | ||
| layer_parameter_sharing_test.py | ||
| layer_test_util.py | ||
| layers_test.py | ||
| lengths_reducer_fused_8bit_rowwise_ops_test.py | ||
| lengths_reducer_rowwise_8bit_ops_test.py | ||
| lstm_benchmark.py | ||
| memonger.py | ||
| memonger_test.py | ||
| mkl_test_util.py | ||
| model_device_test.py | ||
| model_helper.py | ||
| model_helper_test.py | ||
| modifier_context.py | ||
| mpi_python.cc | ||
| muji.py | ||
| muji_test.py | ||
| net_builder.py | ||
| net_builder_test.py | ||
| net_drawer.py | ||
| net_printer.py | ||
| net_printer_test.py | ||
| nomnigraph.py | ||
| nomnigraph_test.py | ||
| nomnigraph_transformations.py | ||
| nomnigraph_transformations_test.py | ||
| normalizer.py | ||
| normalizer_context.py | ||
| normalizer_test.py | ||
| numa_benchmark.py | ||
| numa_test.py | ||
| observer_test.py | ||
| operator_fp_exceptions_test.py | ||
| optimizer.py | ||
| optimizer_context.py | ||
| optimizer_test.py | ||
| optimizer_test_util.py | ||
| parallel_workers.py | ||
| parallel_workers_test.py | ||
| parallelize_bmuf_distributed_test.py | ||
| pipeline.py | ||
| pipeline_test.py | ||
| predictor_constants.py | ||
| pybind_state.cc | ||
| pybind_state.h | ||
| pybind_state_dlpack.cc | ||
| pybind_state_dlpack.h | ||
| pybind_state_gpu.cc | ||
| pybind_state_hip.cc | ||
| pybind_state_ideep.cc | ||
| pybind_state_int8.cc | ||
| pybind_state_nomni.cc | ||
| pybind_state_registry.cc | ||
| pybind_state_registry.h | ||
| python_op_test.py | ||
| queue_util.py | ||
| record_queue.py | ||
| recurrent.py | ||
| regularizer.py | ||
| regularizer_context.py | ||
| regularizer_test.py | ||
| rnn_cell.py | ||
| schema.py | ||
| schema_test.py | ||
| scope.py | ||
| scope_test.py | ||
| session.py | ||
| session_test.py | ||
| sparse_to_dense_mask_test.py | ||
| sparse_to_dense_test.py | ||
| task.py | ||
| task_test.py | ||
| test_util.py | ||
| text_file_reader.py | ||
| timeout_guard.py | ||
| toy_regression_test.py | ||
| transformations.py | ||
| transformations_test.py | ||
| tt_core.py | ||
| tt_core_test.py | ||
| utils.py | ||
| utils_test.py | ||
| visualize.py | ||
| workspace.py | ||
| workspace_test.py | ||