mirror of
https://github.com/saymrwulf/pytorch.git
synced 2026-05-15 21:00:47 +00:00
Summary: Enforce counter value to double type in rowwise_counter. **Context:** The existing implementation is using float type for counter value. But due to the precision limit of a floating number [1], we observed that the counter value can't increment beyond 16777216.0 (i.e., the max value is 16777216.0) in our earlier experiments. We decide to enforce double type to avoid this issue. [1] https://stackoverflow.com/questions/12596695/why-does-a-float-variable-stop-incrementing-at-16777216-in-c Test Plan: op test ``` ruixliu@devvm1997:~/fbsource/fbcode/caffe2/caffe2/python/operator_test(f0b0b48c)$ buck test :rowwise_counter_test Trace available for this run at /tmp/testpilot.20200728-083200.729292.log TestPilot test runner for Facebook. See https://fburl.com/testpilot for details. Testpilot build revision cd2638f1f47250eac058b8c36561760027d16add fbpkg f88726c8ebde4ba288e1172a348c7f46 at Mon Jul 27 18:11:43 2020 by twsvcscm from /usr/local/fbprojects/packages/testinfra.testpilot/887/t.par Discovering tests Running 1 test Started new test run: https://our.intern.facebook.com/intern/testinfra/testrun/7881299364977047 ✓ caffe2/caffe2/python/operator_test:rowwise_counter_test - test_rowwise_counter (caffe2.caffe2.python.operator_test.rowwise_counter_test.TestRowWiseCounter) 0.265 1/1 (passed) ✓ caffe2/caffe2/python/operator_test:rowwise_counter_test - main 14.414 (passed) Finished test run: https://our.intern.facebook.com/intern/testinfra/testrun/7881299364977047 Summary (total time 18.51s): PASS: 2 FAIL: 0 SKIP: 0 FATAL: 0 TIMEOUT: 0 OMIT: 0 ``` optimizer test ``` ruixliu@devvm1997:~/fbsource/fbcode/caffe2/caffe2/python(7d66fbb9)$ buck test :optimizer_test Finished test run: https://our.intern.facebook.com/intern/testinfra/testrun/7036874434841896 Summary (total time 64.87s): PASS: 48 FAIL: 0 SKIP: 24 caffe2/caffe2/python:optimizer_test - testGPUDense (caffe2.caffe2.python.optimizer_test.TestMomentumSgd) caffe2/caffe2/python:optimizer_test - testGPUDense (caffe2.caffe2.python.optimizer_test.TestGFtrl) caffe2/caffe2/python:optimizer_test - test_caffe2_cpu_vs_numpy (caffe2.caffe2.python.optimizer_test.TestYellowFin) caffe2/caffe2/python:optimizer_test - testGPUDense (caffe2.caffe2.python.optimizer_test.TestSparseRAdam) caffe2/caffe2/python:optimizer_test - testGPUDense (caffe2.caffe2.python.optimizer_test.TestRowWiseAdagradWithCounter) caffe2/caffe2/python:optimizer_test - testGPUDense (caffe2.caffe2.python.optimizer_test.TestAdagrad) caffe2/caffe2/python:optimizer_test - test_caffe2_gpu_vs_numpy (caffe2.caffe2.python.optimizer_test.TestYellowFin) caffe2/caffe2/python:optimizer_test - testDense (caffe2.caffe2.python.optimizer_test.TestRowWiseAdagrad) caffe2/caffe2/python:optimizer_test - testGPUDense (caffe2.caffe2.python.optimizer_test.TestFtrl) caffe2/caffe2/python:optimizer_test - testSparse (caffe2.caffe2.python.optimizer_test.TestRmsProp) ...and 14 more not shown... FATAL: 0 TIMEOUT: 0 OMIT: 0 ``` param download test ``` ruixliu@devvm1997:~/fbsource/fbcode/caffe2/caffe2/fb/net_transforms/tests(7ef20a38)$ sudo buck test :param_download_test Finished test run: Finished test run: https://our.intern.facebook.com/intern/testinfra/testrun/6473924481526935 ``` e2e flow: f208394929 f207991149 f207967273 ANP notebook to check the counter value loaded from the flows https://fburl.com/anp/5fdcbnoi screenshot of the loaded counter (note that counter max is larger than 16777216.0) {F250926501} Reviewed By: ellie-wen Differential Revision: D22711514 fbshipit-source-id: 426fed7415270aa3f276dda8141907534734337f
69 lines
2.2 KiB
Python
69 lines
2.2 KiB
Python
from __future__ import absolute_import, division, print_function
|
|
|
|
import unittest
|
|
|
|
import caffe2.python.hypothesis_test_util as hu
|
|
import numpy as np
|
|
from caffe2.python import core, workspace
|
|
|
|
|
|
def update_counter_ref(prev_iter, update_counter, indices, curr_iter, counter_halflife):
|
|
prev_iter_out = prev_iter.copy()
|
|
update_counter_out = update_counter.copy()
|
|
|
|
counter_neg_log_rho = np.log(2) / counter_halflife
|
|
for i in indices:
|
|
iter_diff = curr_iter[0] - prev_iter_out[i]
|
|
prev_iter_out[i] = curr_iter[0]
|
|
update_counter_out[i] = (
|
|
1.0 + np.exp(-iter_diff * counter_neg_log_rho) * update_counter_out[i]
|
|
)
|
|
return prev_iter_out, update_counter_out
|
|
|
|
|
|
class TestRowWiseCounter(hu.HypothesisTestCase):
|
|
def test_rowwise_counter(self):
|
|
h = 8 * 20
|
|
n = 5
|
|
curr_iter = np.array([100], dtype=np.int64)
|
|
|
|
update_counter = np.random.randint(99, size=h).astype(np.float64)
|
|
prev_iter = np.random.rand(h, 1).astype(np.int64)
|
|
indices = np.unique(np.random.randint(0, h, size=n))
|
|
indices.sort(axis=0)
|
|
counter_halflife = 1
|
|
|
|
net = core.Net("test_net")
|
|
net.Proto().type = "dag"
|
|
|
|
workspace.FeedBlob("indices", indices)
|
|
workspace.FeedBlob("curr_iter", curr_iter)
|
|
workspace.FeedBlob("update_counter", update_counter)
|
|
workspace.FeedBlob("prev_iter", prev_iter)
|
|
|
|
net.RowWiseCounter(
|
|
["prev_iter", "update_counter", "indices", "curr_iter"],
|
|
["prev_iter", "update_counter"],
|
|
counter_halflife=counter_halflife,
|
|
)
|
|
|
|
workspace.RunNetOnce(net)
|
|
|
|
prev_iter_out = workspace.FetchBlob("prev_iter")
|
|
update_counter_out = workspace.FetchBlob("update_counter")
|
|
|
|
prev_iter_out_ref, update_counter_out_ref = update_counter_ref(
|
|
prev_iter,
|
|
update_counter,
|
|
indices,
|
|
curr_iter,
|
|
counter_halflife=counter_halflife,
|
|
)
|
|
assert np.allclose(prev_iter_out, prev_iter_out_ref, rtol=1e-3)
|
|
assert np.allclose(update_counter_out, update_counter_out_ref, rtol=1e-3)
|
|
|
|
|
|
if __name__ == "__main__":
|
|
global_options = ["caffe2"]
|
|
core.GlobalInit(global_options)
|
|
unittest.main()
|