Update a few comment, add some references

pull/1/head
Ross Wightman 6 years ago
parent 0562b91c38
commit db1fe34d0c

@ -20,6 +20,9 @@ class AdaBound(Optimizer):
amsbound (boolean, optional): whether to use the AMSBound variant of this algorithm amsbound (boolean, optional): whether to use the AMSBound variant of this algorithm
.. Adaptive Gradient Methods with Dynamic Bound of Learning Rate: .. Adaptive Gradient Methods with Dynamic Bound of Learning Rate:
https://openreview.net/forum?id=Bkg3g2R9FX https://openreview.net/forum?id=Bkg3g2R9FX
Originally taken from https://github.com/Luolc/AdaBound
NOTE: Has not provided good (or even decent) results on large datasets like ImageNet
""" """
def __init__(self, params, lr=1e-3, betas=(0.9, 0.999), final_lr=0.1, gamma=1e-3, def __init__(self, params, lr=1e-3, betas=(0.9, 0.999), final_lr=0.1, gamma=1e-3,

@ -20,6 +20,9 @@ class Nadam(Optimizer):
__ http://cs229.stanford.edu/proj2015/054_report.pdf __ http://cs229.stanford.edu/proj2015/054_report.pdf
__ http://www.cs.toronto.edu/~fritz/absps/momentum.pdf __ http://www.cs.toronto.edu/~fritz/absps/momentum.pdf
Originally taken from: https://github.com/pytorch/pytorch/pull/1408
NOTE: Has potential issues but does work well on some problems.
""" """
def __init__(self, params, lr=2e-3, betas=(0.9, 0.999), eps=1e-8, def __init__(self, params, lr=2e-3, betas=(0.9, 0.999), eps=1e-8,

@ -11,8 +11,11 @@ logger = logging.getLogger(__name__)
class CosineLRScheduler(Scheduler): class CosineLRScheduler(Scheduler):
""" """
Cosine annealing with restarts. Cosine decay with restarts.
This is described in the paper https://arxiv.org/abs/1608.03983. This is described in the paper https://arxiv.org/abs/1608.03983.
Inspiration from
https://github.com/allenai/allennlp/blob/master/allennlp/training/learning_rate_schedulers/cosine.py
""" """
def __init__(self, def __init__(self,

@ -11,8 +11,8 @@ logger = logging.getLogger(__name__)
class TanhLRScheduler(Scheduler): class TanhLRScheduler(Scheduler):
""" """
Cosine annealing with restarts. Hyberbolic-Tangent decay with restarts.
This is described in the paper https://arxiv.org/abs/1608.03983. This is described in the paper https://arxiv.org/abs/1806.01593
""" """
def __init__(self, def __init__(self,

@ -1,7 +1,5 @@
import torch import torch
import numbers
import math import math
import numpy as np
import os import os
import shutil import shutil
import glob import glob

@ -68,7 +68,6 @@ def main():
else: else:
model = model.cuda() model = model.cuda()
# define loss function (criterion) and optimizer
criterion = nn.CrossEntropyLoss().cuda() criterion = nn.CrossEntropyLoss().cuda()
loader = create_loader( loader = create_loader(
@ -87,7 +86,6 @@ def main():
top1 = AverageMeter() top1 = AverageMeter()
top5 = AverageMeter() top5 = AverageMeter()
# switch to evaluate mode
model.eval() model.eval()
end = time.time() end = time.time()
with torch.no_grad(): with torch.no_grad():

Loading…
Cancel
Save