You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
204 lines
6.5 KiB
204 lines
6.5 KiB
""" Scheduler Factory
|
|
Hacked together by / Copyright 2021 Ross Wightman
|
|
"""
|
|
from typing import List, Union
|
|
|
|
from torch.optim import Optimizer
|
|
|
|
from .cosine_lr import CosineLRScheduler
|
|
from .multistep_lr import MultiStepLRScheduler
|
|
from .plateau_lr import PlateauLRScheduler
|
|
from .poly_lr import PolyLRScheduler
|
|
from .step_lr import StepLRScheduler
|
|
from .tanh_lr import TanhLRScheduler
|
|
|
|
|
|
def scheduler_kwargs(cfg):
|
|
""" cfg/argparse to kwargs helper
|
|
Convert scheduler args in argparse args or cfg (.dot) like object to keyword args.
|
|
"""
|
|
eval_metric = getattr(cfg, 'eval_metric', 'top1')
|
|
plateau_mode = 'min' if 'loss' in eval_metric else 'max'
|
|
kwargs = dict(
|
|
sched=cfg.sched,
|
|
num_epochs=getattr(cfg, 'epochs', 100),
|
|
decay_epochs=getattr(cfg, 'decay_epochs', 30),
|
|
decay_milestones=getattr(cfg, 'decay_milestones', [30, 60]),
|
|
warmup_epochs=getattr(cfg, 'warmup_epochs', 5),
|
|
cooldown_epochs=getattr(cfg, 'cooldown_epochs', 0),
|
|
patience_epochs=getattr(cfg, 'patience_epochs', 10),
|
|
decay_rate=getattr(cfg, 'decay_rate', 0.1),
|
|
min_lr=getattr(cfg, 'min_lr', 0.),
|
|
warmup_lr=getattr(cfg, 'warmup_lr', 1e-5),
|
|
warmup_prefix=getattr(cfg, 'warmup_prefix', False),
|
|
noise=getattr(cfg, 'lr_noise', None),
|
|
noise_pct=getattr(cfg, 'lr_noise_pct', 0.67),
|
|
noise_std=getattr(cfg, 'lr_noise_std', 1.),
|
|
noise_seed=getattr(cfg, 'seed', 42),
|
|
cycle_mul=getattr(cfg, 'lr_cycle_mul', 1.),
|
|
cycle_decay=getattr(cfg, 'lr_cycle_decay', 0.1),
|
|
cycle_limit=getattr(cfg, 'lr_cycle_limit', 1),
|
|
k_decay=getattr(cfg, 'lr_k_decay', 1.0),
|
|
plateau_mode=plateau_mode,
|
|
step_on_epochs=not getattr(cfg, 'sched_on_updates', False),
|
|
)
|
|
return kwargs
|
|
|
|
|
|
def create_scheduler(
|
|
args,
|
|
optimizer: Optimizer,
|
|
updates_per_epoch: int = 0,
|
|
):
|
|
return create_scheduler_v2(
|
|
optimizer=optimizer,
|
|
**scheduler_kwargs(args),
|
|
updates_per_epoch=updates_per_epoch,
|
|
)
|
|
|
|
|
|
def create_scheduler_v2(
|
|
optimizer: Optimizer,
|
|
sched: str = 'cosine',
|
|
num_epochs: int = 300,
|
|
decay_epochs: int = 90,
|
|
decay_milestones: List[int] = (90, 180, 270),
|
|
cooldown_epochs: int = 0,
|
|
patience_epochs: int = 10,
|
|
decay_rate: float = 0.1,
|
|
min_lr: float = 0,
|
|
warmup_lr: float = 1e-5,
|
|
warmup_epochs: int = 0,
|
|
warmup_prefix: bool = False,
|
|
noise: Union[float, List[float]] = None,
|
|
noise_pct: float = 0.67,
|
|
noise_std: float = 1.,
|
|
noise_seed: int = 42,
|
|
cycle_mul: float = 1.,
|
|
cycle_decay: float = 0.1,
|
|
cycle_limit: int = 1,
|
|
k_decay: float = 1.0,
|
|
plateau_mode: str = 'max',
|
|
step_on_epochs: bool = True,
|
|
updates_per_epoch: int = 0,
|
|
):
|
|
t_initial = num_epochs
|
|
warmup_t = warmup_epochs
|
|
decay_t = decay_epochs
|
|
cooldown_t = cooldown_epochs
|
|
|
|
if not step_on_epochs:
|
|
assert updates_per_epoch > 0, 'updates_per_epoch must be set to number of dataloader batches'
|
|
t_initial = t_initial * updates_per_epoch
|
|
warmup_t = warmup_t * updates_per_epoch
|
|
decay_t = decay_t * updates_per_epoch
|
|
decay_milestones = [d * updates_per_epoch for d in decay_milestones]
|
|
cooldown_t = cooldown_t * updates_per_epoch
|
|
|
|
# warmup args
|
|
warmup_args = dict(
|
|
warmup_lr_init=warmup_lr,
|
|
warmup_t=warmup_t,
|
|
warmup_prefix=warmup_prefix,
|
|
)
|
|
|
|
# setup noise args for supporting schedulers
|
|
if noise is not None:
|
|
if isinstance(noise, (list, tuple)):
|
|
noise_range = [n * t_initial for n in noise]
|
|
if len(noise_range) == 1:
|
|
noise_range = noise_range[0]
|
|
else:
|
|
noise_range = noise * t_initial
|
|
else:
|
|
noise_range = None
|
|
noise_args = dict(
|
|
noise_range_t=noise_range,
|
|
noise_pct=noise_pct,
|
|
noise_std=noise_std,
|
|
noise_seed=noise_seed,
|
|
)
|
|
|
|
# setup cycle args for supporting schedulers
|
|
cycle_args = dict(
|
|
cycle_mul=cycle_mul,
|
|
cycle_decay=cycle_decay,
|
|
cycle_limit=cycle_limit,
|
|
)
|
|
|
|
lr_scheduler = None
|
|
if sched == 'cosine':
|
|
lr_scheduler = CosineLRScheduler(
|
|
optimizer,
|
|
t_initial=t_initial,
|
|
lr_min=min_lr,
|
|
t_in_epochs=step_on_epochs,
|
|
**cycle_args,
|
|
**warmup_args,
|
|
**noise_args,
|
|
k_decay=k_decay,
|
|
)
|
|
elif sched == 'tanh':
|
|
lr_scheduler = TanhLRScheduler(
|
|
optimizer,
|
|
t_initial=t_initial,
|
|
lr_min=min_lr,
|
|
t_in_epochs=step_on_epochs,
|
|
**cycle_args,
|
|
**warmup_args,
|
|
**noise_args,
|
|
)
|
|
elif sched == 'step':
|
|
lr_scheduler = StepLRScheduler(
|
|
optimizer,
|
|
decay_t=decay_t,
|
|
decay_rate=decay_rate,
|
|
t_in_epochs=step_on_epochs,
|
|
**warmup_args,
|
|
**noise_args,
|
|
)
|
|
elif sched == 'multistep':
|
|
lr_scheduler = MultiStepLRScheduler(
|
|
optimizer,
|
|
decay_t=decay_milestones,
|
|
decay_rate=decay_rate,
|
|
t_in_epochs=step_on_epochs,
|
|
**warmup_args,
|
|
**noise_args,
|
|
)
|
|
elif sched == 'plateau':
|
|
assert step_on_epochs, 'Plateau LR only supports step per epoch.'
|
|
warmup_args.pop('warmup_prefix', False)
|
|
lr_scheduler = PlateauLRScheduler(
|
|
optimizer,
|
|
decay_rate=decay_rate,
|
|
patience_t=patience_epochs,
|
|
cooldown_t=0,
|
|
**warmup_args,
|
|
lr_min=min_lr,
|
|
mode=plateau_mode,
|
|
**noise_args,
|
|
)
|
|
elif sched == 'poly':
|
|
lr_scheduler = PolyLRScheduler(
|
|
optimizer,
|
|
power=decay_rate, # overloading 'decay_rate' as polynomial power
|
|
t_initial=t_initial,
|
|
lr_min=min_lr,
|
|
t_in_epochs=step_on_epochs,
|
|
k_decay=k_decay,
|
|
**cycle_args,
|
|
**warmup_args,
|
|
**noise_args,
|
|
)
|
|
|
|
if hasattr(lr_scheduler, 'get_cycle_length'):
|
|
# for cycle based schedulers (cosine, tanh, poly) recalculate total epochs w/ cycles & cooldown
|
|
t_with_cycles_and_cooldown = lr_scheduler.get_cycle_length() + cooldown_t
|
|
if step_on_epochs:
|
|
num_epochs = t_with_cycles_and_cooldown
|
|
else:
|
|
num_epochs = t_with_cycles_and_cooldown // updates_per_epoch
|
|
|
|
return lr_scheduler, num_epochs
|