From db1fe34d0c18bbec5764ca15fcb345b06dced9d0 Mon Sep 17 00:00:00 2001 From: Ross Wightman Date: Fri, 12 Apr 2019 23:16:49 -0700 Subject: [PATCH] Update a few comment, add some references --- optim/adabound.py | 3 +++ optim/nadam.py | 3 +++ scheduler/cosine_lr.py | 5 ++++- scheduler/tanh_lr.py | 4 ++-- utils.py | 2 -- validate.py | 2 -- 6 files changed, 12 insertions(+), 7 deletions(-) diff --git a/optim/adabound.py b/optim/adabound.py index 161a2e86..3ff2712c 100644 --- a/optim/adabound.py +++ b/optim/adabound.py @@ -20,6 +20,9 @@ class AdaBound(Optimizer): amsbound (boolean, optional): whether to use the AMSBound variant of this algorithm .. Adaptive Gradient Methods with Dynamic Bound of Learning Rate: https://openreview.net/forum?id=Bkg3g2R9FX + + Originally taken from https://github.com/Luolc/AdaBound + NOTE: Has not provided good (or even decent) results on large datasets like ImageNet """ def __init__(self, params, lr=1e-3, betas=(0.9, 0.999), final_lr=0.1, gamma=1e-3, diff --git a/optim/nadam.py b/optim/nadam.py index 56a57b1c..d994d1b8 100644 --- a/optim/nadam.py +++ b/optim/nadam.py @@ -20,6 +20,9 @@ class Nadam(Optimizer): __ http://cs229.stanford.edu/proj2015/054_report.pdf __ http://www.cs.toronto.edu/~fritz/absps/momentum.pdf + + Originally taken from: https://github.com/pytorch/pytorch/pull/1408 + NOTE: Has potential issues but does work well on some problems. """ def __init__(self, params, lr=2e-3, betas=(0.9, 0.999), eps=1e-8, diff --git a/scheduler/cosine_lr.py b/scheduler/cosine_lr.py index 36a13ea2..f2a85931 100644 --- a/scheduler/cosine_lr.py +++ b/scheduler/cosine_lr.py @@ -11,8 +11,11 @@ logger = logging.getLogger(__name__) class CosineLRScheduler(Scheduler): """ - Cosine annealing with restarts. + Cosine decay with restarts. This is described in the paper https://arxiv.org/abs/1608.03983. + + Inspiration from + https://github.com/allenai/allennlp/blob/master/allennlp/training/learning_rate_schedulers/cosine.py """ def __init__(self, diff --git a/scheduler/tanh_lr.py b/scheduler/tanh_lr.py index 4335e9be..795f974d 100644 --- a/scheduler/tanh_lr.py +++ b/scheduler/tanh_lr.py @@ -11,8 +11,8 @@ logger = logging.getLogger(__name__) class TanhLRScheduler(Scheduler): """ - Cosine annealing with restarts. - This is described in the paper https://arxiv.org/abs/1608.03983. + Hyberbolic-Tangent decay with restarts. + This is described in the paper https://arxiv.org/abs/1806.01593 """ def __init__(self, diff --git a/utils.py b/utils.py index f206f945..b19ed32c 100644 --- a/utils.py +++ b/utils.py @@ -1,7 +1,5 @@ import torch -import numbers import math -import numpy as np import os import shutil import glob diff --git a/validate.py b/validate.py index 63f73616..3f54acd0 100644 --- a/validate.py +++ b/validate.py @@ -68,7 +68,6 @@ def main(): else: model = model.cuda() - # define loss function (criterion) and optimizer criterion = nn.CrossEntropyLoss().cuda() loader = create_loader( @@ -87,7 +86,6 @@ def main(): top1 = AverageMeter() top5 = AverageMeter() - # switch to evaluate mode model.eval() end = time.time() with torch.no_grad():