mirror of
https://github.com/saymrwulf/pytorch.git
synced 2026-05-14 20:57:59 +00:00
[optim] take kw-only argument for functional optim APIs (#56185)
Summary: Pull Request resolved: https://github.com/pytorch/pytorch/pull/56185 ghstack-source-id: 126670123 Reviewed By: albanD Differential Revision: D27802169 fbshipit-source-id: f5e1cb2046dcdeecf5f6b0f70892828bf0adb22f
This commit is contained in:
parent
bd3c63aeeb
commit
4611387608
17 changed files with 88 additions and 80 deletions
|
|
@ -76,7 +76,7 @@ class _FunctionalAdadelta(object):
|
|||
grads,
|
||||
square_avgs,
|
||||
acc_deltas,
|
||||
lr,
|
||||
rho,
|
||||
eps,
|
||||
weight_decay)
|
||||
lr=lr,
|
||||
rho=rho,
|
||||
eps=eps,
|
||||
weight_decay=weight_decay)
|
||||
|
|
|
|||
|
|
@ -84,7 +84,7 @@ class _FunctionalAdagrad(object):
|
|||
grads,
|
||||
state_sums,
|
||||
state_steps,
|
||||
self.defaults['lr'],
|
||||
self.defaults['weight_decay'],
|
||||
self.defaults['lr_decay'],
|
||||
self.defaults['eps'])
|
||||
lr=self.defaults['lr'],
|
||||
weight_decay=self.defaults['weight_decay'],
|
||||
lr_decay=self.defaults['lr_decay'],
|
||||
eps=self.defaults['eps'])
|
||||
|
|
|
|||
|
|
@ -105,9 +105,9 @@ class _FunctionalAdam(object):
|
|||
exp_avg_sqs,
|
||||
max_exp_avg_sqs,
|
||||
state_steps,
|
||||
self.amsgrad,
|
||||
self.defaults['beta1'],
|
||||
self.defaults['beta2'],
|
||||
self.defaults['lr'],
|
||||
self.defaults['weight_decay'],
|
||||
self.defaults['eps'])
|
||||
amsgrad=self.amsgrad,
|
||||
beta1=self.defaults['beta1'],
|
||||
beta2=self.defaults['beta2'],
|
||||
lr=self.defaults['lr'],
|
||||
weight_decay=self.defaults['weight_decay'],
|
||||
eps=self.defaults['eps'])
|
||||
|
|
|
|||
|
|
@ -95,8 +95,8 @@ class _FunctionalAdamax(object):
|
|||
exp_avgs,
|
||||
exp_infs,
|
||||
state_steps,
|
||||
self.defaults['eps'],
|
||||
self.defaults['beta1'],
|
||||
self.defaults['beta2'],
|
||||
self.defaults['lr'],
|
||||
self.defaults['weight_decay'])
|
||||
eps=self.defaults['eps'],
|
||||
beta1=self.defaults['beta1'],
|
||||
beta2=self.defaults['beta2'],
|
||||
lr=self.defaults['lr'],
|
||||
weight_decay=self.defaults['weight_decay'])
|
||||
|
|
|
|||
|
|
@ -105,9 +105,9 @@ class _FunctionalAdamW(object):
|
|||
exp_avg_sqs,
|
||||
max_exp_avg_sqs,
|
||||
state_steps,
|
||||
self.amsgrad,
|
||||
self.defaults['beta1'],
|
||||
self.defaults['beta2'],
|
||||
self.defaults['lr'],
|
||||
self.defaults['weight_decay'],
|
||||
self.defaults['eps'])
|
||||
amsgrad=self.amsgrad,
|
||||
beta1=self.defaults['beta1'],
|
||||
beta2=self.defaults['beta2'],
|
||||
lr=self.defaults['lr'],
|
||||
weight_decay=self.defaults['weight_decay'],
|
||||
eps=self.defaults['eps'])
|
||||
|
|
|
|||
|
|
@ -91,9 +91,9 @@ class _FunctionalRMSprop(object):
|
|||
square_avgs,
|
||||
grad_avgs,
|
||||
momentum_buffer_list,
|
||||
lr,
|
||||
alpha,
|
||||
eps,
|
||||
weight_decay,
|
||||
momentum,
|
||||
self.centered)
|
||||
lr=lr,
|
||||
alpha=alpha,
|
||||
eps=eps,
|
||||
weight_decay=weight_decay,
|
||||
momentum=momentum,
|
||||
centered=self.centered)
|
||||
|
|
|
|||
|
|
@ -75,7 +75,7 @@ class _FunctionalRprop(object):
|
|||
grads,
|
||||
prevs,
|
||||
step_sizes,
|
||||
step_size_min,
|
||||
step_size_max,
|
||||
etaminus,
|
||||
etaplus)
|
||||
step_size_min=step_size_min,
|
||||
step_size_max=step_size_max,
|
||||
etaminus=etaminus,
|
||||
etaplus=etaplus)
|
||||
|
|
|
|||
|
|
@ -73,11 +73,11 @@ class _FunctionalSGD(object):
|
|||
F.sgd(params,
|
||||
grads,
|
||||
momentum_buffer_list,
|
||||
weight_decay,
|
||||
momentum,
|
||||
lr,
|
||||
dampening,
|
||||
self.nesterov)
|
||||
weight_decay=weight_decay,
|
||||
momentum=momentum,
|
||||
lr=lr,
|
||||
dampening=dampening,
|
||||
nesterov=self.nesterov)
|
||||
|
||||
# update momentum_buffers in state
|
||||
for i, p in enumerate(params):
|
||||
|
|
|
|||
|
|
@ -17,6 +17,7 @@ def adagrad(params: List[Tensor],
|
|||
grads: List[Tensor],
|
||||
state_sums: List[Tensor],
|
||||
state_steps: List[int],
|
||||
*,
|
||||
lr: float,
|
||||
weight_decay: float,
|
||||
lr_decay: float,
|
||||
|
|
@ -56,6 +57,7 @@ def adam(params: List[Tensor],
|
|||
exp_avg_sqs: List[Tensor],
|
||||
max_exp_avg_sqs: List[Tensor],
|
||||
state_steps: List[int],
|
||||
*,
|
||||
amsgrad: bool,
|
||||
beta1: float,
|
||||
beta2: float,
|
||||
|
|
@ -102,6 +104,7 @@ def adamw(params: List[Tensor],
|
|||
exp_avg_sqs: List[Tensor],
|
||||
max_exp_avg_sqs: List[Tensor],
|
||||
state_steps: List[int],
|
||||
*,
|
||||
amsgrad: bool,
|
||||
beta1: float,
|
||||
beta2: float,
|
||||
|
|
@ -143,6 +146,7 @@ def adamw(params: List[Tensor],
|
|||
def sgd(params: List[Tensor],
|
||||
d_p_list: List[Tensor],
|
||||
momentum_buffer_list: List[Optional[Tensor]],
|
||||
*,
|
||||
weight_decay: float,
|
||||
momentum: float,
|
||||
lr: float,
|
||||
|
|
@ -180,6 +184,7 @@ def adadelta(params: List[Tensor],
|
|||
grads: List[Tensor],
|
||||
square_avgs: List[Tensor],
|
||||
acc_deltas: List[Tensor],
|
||||
*,
|
||||
lr: float,
|
||||
rho: float,
|
||||
eps: float,
|
||||
|
|
@ -205,6 +210,7 @@ def rmsprop(params: List[Tensor],
|
|||
square_avgs: List[Tensor],
|
||||
grad_avgs: List[Tensor],
|
||||
momentum_buffer_list: List[Tensor],
|
||||
*,
|
||||
lr: float,
|
||||
alpha: float,
|
||||
eps: float,
|
||||
|
|
@ -244,6 +250,7 @@ def rprop(params: List[Tensor],
|
|||
grads: List[Tensor],
|
||||
prevs: List[Tensor],
|
||||
step_sizes: List[Tensor],
|
||||
*,
|
||||
step_size_min: float,
|
||||
step_size_max: float,
|
||||
etaminus: float,
|
||||
|
|
@ -282,6 +289,7 @@ def adamax(params: List[Tensor],
|
|||
exp_avgs: List[Tensor],
|
||||
exp_infs: List[Tensor],
|
||||
state_steps: List[int],
|
||||
*,
|
||||
eps: float,
|
||||
beta1: float,
|
||||
beta2: float,
|
||||
|
|
|
|||
|
|
@ -81,9 +81,9 @@ class Adadelta(Optimizer):
|
|||
grads,
|
||||
square_avgs,
|
||||
acc_deltas,
|
||||
lr,
|
||||
rho,
|
||||
eps,
|
||||
weight_decay)
|
||||
lr=lr,
|
||||
rho=rho,
|
||||
eps=eps,
|
||||
weight_decay=weight_decay)
|
||||
|
||||
return loss
|
||||
|
|
|
|||
|
|
@ -84,9 +84,9 @@ class Adagrad(Optimizer):
|
|||
grads,
|
||||
state_sums,
|
||||
state_steps,
|
||||
group['lr'],
|
||||
group['weight_decay'],
|
||||
group['lr_decay'],
|
||||
group['eps'])
|
||||
lr=group['lr'],
|
||||
weight_decay=group['weight_decay'],
|
||||
lr_decay=group['lr_decay'],
|
||||
eps=group['eps'])
|
||||
|
||||
return loss
|
||||
|
|
|
|||
|
|
@ -110,10 +110,10 @@ class Adam(Optimizer):
|
|||
exp_avg_sqs,
|
||||
max_exp_avg_sqs,
|
||||
state_steps,
|
||||
group['amsgrad'],
|
||||
beta1,
|
||||
beta2,
|
||||
group['lr'],
|
||||
group['weight_decay'],
|
||||
group['eps'])
|
||||
amsgrad=group['amsgrad'],
|
||||
beta1=beta1,
|
||||
beta2=beta2,
|
||||
lr=group['lr'],
|
||||
weight_decay=group['weight_decay'],
|
||||
eps=group['eps'])
|
||||
return loss
|
||||
|
|
|
|||
|
|
@ -89,10 +89,10 @@ class Adamax(Optimizer):
|
|||
exp_avgs,
|
||||
exp_infs,
|
||||
state_steps,
|
||||
eps,
|
||||
beta1,
|
||||
beta2,
|
||||
lr,
|
||||
weight_decay)
|
||||
eps=eps,
|
||||
beta1=beta1,
|
||||
beta2=beta2,
|
||||
lr=lr,
|
||||
weight_decay=weight_decay)
|
||||
|
||||
return loss
|
||||
|
|
|
|||
|
|
@ -113,11 +113,11 @@ class AdamW(Optimizer):
|
|||
exp_avg_sqs,
|
||||
max_exp_avg_sqs,
|
||||
state_steps,
|
||||
amsgrad,
|
||||
beta1,
|
||||
beta2,
|
||||
group['lr'],
|
||||
group['weight_decay'],
|
||||
group['eps'])
|
||||
amsgrad=amsgrad,
|
||||
beta1=beta1,
|
||||
beta2=beta2,
|
||||
lr=group['lr'],
|
||||
weight_decay=group['weight_decay'],
|
||||
eps=group['eps'])
|
||||
|
||||
return loss
|
||||
|
|
|
|||
|
|
@ -108,11 +108,11 @@ class RMSprop(Optimizer):
|
|||
square_avgs,
|
||||
grad_avgs,
|
||||
momentum_buffer_list,
|
||||
group['lr'],
|
||||
group['alpha'],
|
||||
group['eps'],
|
||||
group['weight_decay'],
|
||||
group['momentum'],
|
||||
group['centered'])
|
||||
lr=group['lr'],
|
||||
alpha=group['alpha'],
|
||||
eps=group['eps'],
|
||||
weight_decay=group['weight_decay'],
|
||||
momentum=group['momentum'],
|
||||
centered=group['centered'])
|
||||
|
||||
return loss
|
||||
|
|
|
|||
|
|
@ -74,9 +74,9 @@ class Rprop(Optimizer):
|
|||
grads,
|
||||
prevs,
|
||||
step_sizes,
|
||||
step_size_min,
|
||||
step_size_max,
|
||||
etaminus,
|
||||
etaplus)
|
||||
step_size_min=step_size_min,
|
||||
step_size_max=step_size_max,
|
||||
etaminus=etaminus,
|
||||
etaplus=etaplus)
|
||||
|
||||
return loss
|
||||
|
|
|
|||
|
|
@ -110,11 +110,11 @@ class SGD(Optimizer):
|
|||
F.sgd(params_with_grad,
|
||||
d_p_list,
|
||||
momentum_buffer_list,
|
||||
weight_decay,
|
||||
momentum,
|
||||
lr,
|
||||
dampening,
|
||||
nesterov)
|
||||
weight_decay=weight_decay,
|
||||
momentum=momentum,
|
||||
lr=lr,
|
||||
dampening=dampening,
|
||||
nesterov=nesterov)
|
||||
|
||||
# update momentum_buffers in state
|
||||
for p, momentum_buffer in zip(params_with_grad, momentum_buffer_list):
|
||||
|
|
|
|||
Loading…
Reference in a new issue