mirror of
https://github.com/saymrwulf/pytorch.git
synced 2026-05-15 21:00:47 +00:00
Summary: There is a module called `2to3` which you can target for future specifically to remove these, the directory of `caffe2` has the most redundant imports: ```2to3 -f future -w caffe2``` Pull Request resolved: https://github.com/pytorch/pytorch/pull/45033 Reviewed By: seemethere Differential Revision: D23808648 Pulled By: bugra fbshipit-source-id: 38971900f0fe43ab44a9168e57f2307580d36a38
157 lines
6.4 KiB
Python
157 lines
6.4 KiB
Python
|
|
|
|
|
|
|
|
|
|
import functools
|
|
|
|
from hypothesis import given, settings, HealthCheck
|
|
import hypothesis.strategies as st
|
|
import numpy as np
|
|
|
|
from caffe2.python import core
|
|
import caffe2.python.hypothesis_test_util as hu
|
|
|
|
|
|
class TestStorm(hu.HypothesisTestCase):
|
|
@given(inputs=hu.tensors(n=3),
|
|
grad_sq_sum=st.floats(min_value=0.01, max_value=0.99,
|
|
allow_nan=False, allow_infinity=False),
|
|
lr=st.floats(min_value=0.01, max_value=1.0,
|
|
allow_nan=False, allow_infinity=False),
|
|
momentum=st.floats(min_value=0.1, max_value=100.0,
|
|
allow_nan=False, allow_infinity=False),
|
|
beta=st.floats(min_value=0.1, max_value=10.0,
|
|
allow_nan=False, allow_infinity=False),
|
|
**hu.gcs_cpu_only)
|
|
def test_storm_dense(self, inputs, grad_sq_sum, lr, momentum, beta, gc, dc):
|
|
param, moment, grad = inputs
|
|
grad_sq_sum = np.array([grad_sq_sum], dtype=np.float32)
|
|
lr = np.array([lr], dtype=np.float32)
|
|
|
|
op = core.CreateOperator(
|
|
"Storm",
|
|
["param", "moment", "grad_sq_sum", "grad", "lr"],
|
|
["param", "moment", "grad_sq_sum"],
|
|
momentum=momentum,
|
|
beta=beta,
|
|
device_option=gc
|
|
)
|
|
|
|
def ref_dense(param, moment, grad_sq_sum, grad, lr, momentum, beta):
|
|
grad_sq_sum_out = grad_sq_sum + np.sum(grad * grad)
|
|
nlr = lr * np.power(beta + grad_sq_sum_out, -1.0 / 3.0)
|
|
alpha = momentum * np.square(nlr)
|
|
moment_out = grad + (1 - alpha) * (moment - grad)
|
|
param_out = param + nlr * moment_out
|
|
|
|
return (param_out.astype(np.float32), moment_out.astype(np.float32),
|
|
grad_sq_sum_out.astype(np.float32))
|
|
|
|
self.assertReferenceChecks(
|
|
gc, op,
|
|
[param, moment, grad_sq_sum, grad, lr],
|
|
functools.partial(ref_dense, momentum=momentum, beta=beta)
|
|
)
|
|
|
|
# Suppress filter_too_much health check.
|
|
# Likely caused by `assume` call falling through too often.
|
|
@settings(suppress_health_check=[HealthCheck.filter_too_much])
|
|
@given(inputs=hu.tensors(n=3),
|
|
grad_sq_sum=st.floats(min_value=0.01, max_value=0.99,
|
|
allow_nan=False, allow_infinity=False),
|
|
lr=st.floats(min_value=0.01, max_value=1.0,
|
|
allow_nan=False, allow_infinity=False),
|
|
momentum=st.floats(min_value=0.1, max_value=100.0,
|
|
allow_nan=False, allow_infinity=False),
|
|
beta=st.floats(min_value=0.1, max_value=10.0,
|
|
allow_nan=False, allow_infinity=False),
|
|
**hu.gcs_cpu_only)
|
|
def test_storm_sparse(self, inputs, grad_sq_sum, lr,
|
|
momentum, beta, gc, dc):
|
|
param, moment, grad = inputs
|
|
grad_sq_sum = np.array([grad_sq_sum], dtype=np.float32)
|
|
lr = np.array([lr], dtype=np.float32)
|
|
|
|
# Create an indexing array containing values that are lists of indices,
|
|
# which index into grad
|
|
indices = np.random.choice(np.arange(grad.shape[0]),
|
|
size=np.random.randint(grad.shape[0]),
|
|
replace=False)
|
|
|
|
# Sparsify grad
|
|
grad = grad[indices]
|
|
|
|
op = core.CreateOperator(
|
|
"SparseStorm",
|
|
["param", "moment", "grad_sq_sum", "grad", "indices", "lr"],
|
|
["param", "moment", "grad_sq_sum"],
|
|
momentum=momentum,
|
|
beta=beta,
|
|
device_option=gc)
|
|
|
|
def ref_sparse(param, moment, grad_sq_sum, grad, indices,
|
|
lr, momentum, beta):
|
|
param_out = np.copy(param)
|
|
moment_out = np.copy(moment)
|
|
grad_sq_sum_out = np.copy(grad_sq_sum)
|
|
|
|
grad_sq_sum_out = grad_sq_sum + np.sum(grad * grad)
|
|
nlr = lr * np.power(beta + grad_sq_sum_out, -1.0 / 3.0)
|
|
alpha = momentum * np.square(nlr)
|
|
for i, index in enumerate(indices):
|
|
gi = grad[i]
|
|
moment_out[index] = gi + (1 - alpha) * (moment[index] - gi)
|
|
param_out[index] = param[index] + nlr * moment_out[index]
|
|
|
|
return (param_out.astype(np.float32), moment_out.astype(np.float32),
|
|
grad_sq_sum_out.astype(np.float32))
|
|
|
|
self.assertReferenceChecks(
|
|
gc, op,
|
|
[param, moment, grad_sq_sum, grad, indices, lr],
|
|
functools.partial(ref_sparse, momentum=momentum, beta=beta)
|
|
)
|
|
|
|
@given(inputs=hu.tensors(n=2),
|
|
grad_sq_sum=st.floats(min_value=0.01, max_value=0.99,
|
|
allow_nan=False, allow_infinity=False),
|
|
lr=st.floats(min_value=0.01, max_value=1.0,
|
|
allow_nan=False, allow_infinity=False),
|
|
momentum=st.floats(min_value=0.1, max_value=100.0,
|
|
allow_nan=False, allow_infinity=False),
|
|
beta=st.floats(min_value=0.1, max_value=10.0,
|
|
allow_nan=False, allow_infinity=False),
|
|
data_strategy=st.data(),
|
|
**hu.gcs_cpu_only)
|
|
def test_storm_sparse_empty(self, inputs, grad_sq_sum, lr, momentum,
|
|
beta, data_strategy, gc, dc):
|
|
param, moment = inputs
|
|
grad_sq_sum = np.array([grad_sq_sum], dtype=np.float32)
|
|
lr = np.array([lr], dtype=np.float32)
|
|
|
|
grad = np.empty(shape=(0,) + param.shape[1:], dtype=np.float32)
|
|
indices = np.empty(shape=(0,), dtype=np.int64)
|
|
|
|
op = core.CreateOperator(
|
|
"SparseStorm",
|
|
["param", "moment", "grad_sq_sum", "grad", "indices", "lr"],
|
|
["param", "moment", "grad_sq_sum"],
|
|
momentum=momentum,
|
|
beta=beta,
|
|
device_option=gc)
|
|
|
|
def ref_sparse_empty(param, moment, grad_sq_sum, grad, indices,
|
|
lr, momentum, beta):
|
|
param_out = np.copy(param)
|
|
moment_out = np.copy(moment)
|
|
grad_sq_sum_out = np.copy(grad_sq_sum)
|
|
|
|
return (param_out.astype(np.float32), moment_out.astype(np.float32),
|
|
grad_sq_sum_out.astype(np.float32))
|
|
|
|
self.assertReferenceChecks(
|
|
gc, op,
|
|
[param, moment, grad_sq_sum, grad, indices, lr],
|
|
functools.partial(ref_sparse_empty, momentum=momentum, beta=beta)
|
|
)
|