[optim] take kw-only argument for functional optim APIs (#56185)

Summary:
Pull Request resolved: https://github.com/pytorch/pytorch/pull/56185

ghstack-source-id: 126670123

Reviewed By: albanD

Differential Revision: D27802169

fbshipit-source-id: f5e1cb2046dcdeecf5f6b0f70892828bf0adb22f
This commit is contained in:
Wanchao Liang 2021-04-15 20:05:14 -07:00 committed by Facebook GitHub Bot
parent bd3c63aeeb
commit 4611387608
17 changed files with 88 additions and 80 deletions

View file

@ -76,7 +76,7 @@ class _FunctionalAdadelta(object):
grads,
square_avgs,
acc_deltas,
lr,
rho,
eps,
weight_decay)
lr=lr,
rho=rho,
eps=eps,
weight_decay=weight_decay)

View file

@ -84,7 +84,7 @@ class _FunctionalAdagrad(object):
grads,
state_sums,
state_steps,
self.defaults['lr'],
self.defaults['weight_decay'],
self.defaults['lr_decay'],
self.defaults['eps'])
lr=self.defaults['lr'],
weight_decay=self.defaults['weight_decay'],
lr_decay=self.defaults['lr_decay'],
eps=self.defaults['eps'])

View file

@ -105,9 +105,9 @@ class _FunctionalAdam(object):
exp_avg_sqs,
max_exp_avg_sqs,
state_steps,
self.amsgrad,
self.defaults['beta1'],
self.defaults['beta2'],
self.defaults['lr'],
self.defaults['weight_decay'],
self.defaults['eps'])
amsgrad=self.amsgrad,
beta1=self.defaults['beta1'],
beta2=self.defaults['beta2'],
lr=self.defaults['lr'],
weight_decay=self.defaults['weight_decay'],
eps=self.defaults['eps'])

View file

@ -95,8 +95,8 @@ class _FunctionalAdamax(object):
exp_avgs,
exp_infs,
state_steps,
self.defaults['eps'],
self.defaults['beta1'],
self.defaults['beta2'],
self.defaults['lr'],
self.defaults['weight_decay'])
eps=self.defaults['eps'],
beta1=self.defaults['beta1'],
beta2=self.defaults['beta2'],
lr=self.defaults['lr'],
weight_decay=self.defaults['weight_decay'])

View file

@ -105,9 +105,9 @@ class _FunctionalAdamW(object):
exp_avg_sqs,
max_exp_avg_sqs,
state_steps,
self.amsgrad,
self.defaults['beta1'],
self.defaults['beta2'],
self.defaults['lr'],
self.defaults['weight_decay'],
self.defaults['eps'])
amsgrad=self.amsgrad,
beta1=self.defaults['beta1'],
beta2=self.defaults['beta2'],
lr=self.defaults['lr'],
weight_decay=self.defaults['weight_decay'],
eps=self.defaults['eps'])

View file

@ -91,9 +91,9 @@ class _FunctionalRMSprop(object):
square_avgs,
grad_avgs,
momentum_buffer_list,
lr,
alpha,
eps,
weight_decay,
momentum,
self.centered)
lr=lr,
alpha=alpha,
eps=eps,
weight_decay=weight_decay,
momentum=momentum,
centered=self.centered)

View file

@ -75,7 +75,7 @@ class _FunctionalRprop(object):
grads,
prevs,
step_sizes,
step_size_min,
step_size_max,
etaminus,
etaplus)
step_size_min=step_size_min,
step_size_max=step_size_max,
etaminus=etaminus,
etaplus=etaplus)

View file

@ -73,11 +73,11 @@ class _FunctionalSGD(object):
F.sgd(params,
grads,
momentum_buffer_list,
weight_decay,
momentum,
lr,
dampening,
self.nesterov)
weight_decay=weight_decay,
momentum=momentum,
lr=lr,
dampening=dampening,
nesterov=self.nesterov)
# update momentum_buffers in state
for i, p in enumerate(params):

View file

@ -17,6 +17,7 @@ def adagrad(params: List[Tensor],
grads: List[Tensor],
state_sums: List[Tensor],
state_steps: List[int],
*,
lr: float,
weight_decay: float,
lr_decay: float,
@ -56,6 +57,7 @@ def adam(params: List[Tensor],
exp_avg_sqs: List[Tensor],
max_exp_avg_sqs: List[Tensor],
state_steps: List[int],
*,
amsgrad: bool,
beta1: float,
beta2: float,
@ -102,6 +104,7 @@ def adamw(params: List[Tensor],
exp_avg_sqs: List[Tensor],
max_exp_avg_sqs: List[Tensor],
state_steps: List[int],
*,
amsgrad: bool,
beta1: float,
beta2: float,
@ -143,6 +146,7 @@ def adamw(params: List[Tensor],
def sgd(params: List[Tensor],
d_p_list: List[Tensor],
momentum_buffer_list: List[Optional[Tensor]],
*,
weight_decay: float,
momentum: float,
lr: float,
@ -180,6 +184,7 @@ def adadelta(params: List[Tensor],
grads: List[Tensor],
square_avgs: List[Tensor],
acc_deltas: List[Tensor],
*,
lr: float,
rho: float,
eps: float,
@ -205,6 +210,7 @@ def rmsprop(params: List[Tensor],
square_avgs: List[Tensor],
grad_avgs: List[Tensor],
momentum_buffer_list: List[Tensor],
*,
lr: float,
alpha: float,
eps: float,
@ -244,6 +250,7 @@ def rprop(params: List[Tensor],
grads: List[Tensor],
prevs: List[Tensor],
step_sizes: List[Tensor],
*,
step_size_min: float,
step_size_max: float,
etaminus: float,
@ -282,6 +289,7 @@ def adamax(params: List[Tensor],
exp_avgs: List[Tensor],
exp_infs: List[Tensor],
state_steps: List[int],
*,
eps: float,
beta1: float,
beta2: float,

View file

@ -81,9 +81,9 @@ class Adadelta(Optimizer):
grads,
square_avgs,
acc_deltas,
lr,
rho,
eps,
weight_decay)
lr=lr,
rho=rho,
eps=eps,
weight_decay=weight_decay)
return loss

View file

@ -84,9 +84,9 @@ class Adagrad(Optimizer):
grads,
state_sums,
state_steps,
group['lr'],
group['weight_decay'],
group['lr_decay'],
group['eps'])
lr=group['lr'],
weight_decay=group['weight_decay'],
lr_decay=group['lr_decay'],
eps=group['eps'])
return loss

View file

@ -110,10 +110,10 @@ class Adam(Optimizer):
exp_avg_sqs,
max_exp_avg_sqs,
state_steps,
group['amsgrad'],
beta1,
beta2,
group['lr'],
group['weight_decay'],
group['eps'])
amsgrad=group['amsgrad'],
beta1=beta1,
beta2=beta2,
lr=group['lr'],
weight_decay=group['weight_decay'],
eps=group['eps'])
return loss

View file

@ -89,10 +89,10 @@ class Adamax(Optimizer):
exp_avgs,
exp_infs,
state_steps,
eps,
beta1,
beta2,
lr,
weight_decay)
eps=eps,
beta1=beta1,
beta2=beta2,
lr=lr,
weight_decay=weight_decay)
return loss

View file

@ -113,11 +113,11 @@ class AdamW(Optimizer):
exp_avg_sqs,
max_exp_avg_sqs,
state_steps,
amsgrad,
beta1,
beta2,
group['lr'],
group['weight_decay'],
group['eps'])
amsgrad=amsgrad,
beta1=beta1,
beta2=beta2,
lr=group['lr'],
weight_decay=group['weight_decay'],
eps=group['eps'])
return loss

View file

@ -108,11 +108,11 @@ class RMSprop(Optimizer):
square_avgs,
grad_avgs,
momentum_buffer_list,
group['lr'],
group['alpha'],
group['eps'],
group['weight_decay'],
group['momentum'],
group['centered'])
lr=group['lr'],
alpha=group['alpha'],
eps=group['eps'],
weight_decay=group['weight_decay'],
momentum=group['momentum'],
centered=group['centered'])
return loss

View file

@ -74,9 +74,9 @@ class Rprop(Optimizer):
grads,
prevs,
step_sizes,
step_size_min,
step_size_max,
etaminus,
etaplus)
step_size_min=step_size_min,
step_size_max=step_size_max,
etaminus=etaminus,
etaplus=etaplus)
return loss

View file

@ -110,11 +110,11 @@ class SGD(Optimizer):
F.sgd(params_with_grad,
d_p_list,
momentum_buffer_list,
weight_decay,
momentum,
lr,
dampening,
nesterov)
weight_decay=weight_decay,
momentum=momentum,
lr=lr,
dampening=dampening,
nesterov=nesterov)
# update momentum_buffers in state
for p, momentum_buffer in zip(params_with_grad, momentum_buffer_list):