@ -114,7 +114,7 @@ class Lars(Optimizer):
)
if group['trust_clip']:
trust_ratio = torch.minimum(trust_ratio / group['lr'], one_tensor)
grad.add(p, alpha=weight_decay)
grad.add_(p, alpha=weight_decay)
grad.mul_(trust_ratio)
# apply SGD update https://github.com/pytorch/pytorch/blob/1.7/torch/optim/sgd.py#L100