Merge branch 'logger' into features. Change 'logger' to '_logger'.

pull/175/head
Ross Wightman 4 years ago
commit 7995295968

@ -0,0 +1,39 @@
name: Autosquash
on:
check_run:
types:
# Check runs completing successfully can unblock the
# corresponding pull requests and make them mergeable.
- completed
pull_request:
types:
# A closed pull request makes the checks on the other
# pull request on the same base outdated.
- closed
# Adding the autosquash label to a pull request can
# trigger an update or a merge.
- labeled
pull_request_review:
types:
# Review approvals can unblock the pull request and
# make it mergeable.
- submitted
# Success statuses can unblock the corresponding
# pull requests and make them mergeable.
status: {}
jobs:
autosquash:
name: Autosquash
runs-on: ubuntu-18.04
steps:
- uses: tibdex/autosquash@v2
with:
# We can't use the built-in secrets.GITHUB_TOKEN yet because of this limitation:
# https://github.community/t5/GitHub-Actions/Triggering-a-new-workflow-from-another-workflow/td-p/31676
# In the meantime, use a token granting write access on the repo:
# - a GitHub App token
# See https://github.com/marketplace/actions/github-app-token.
# - a personal access token
# See https://help.github.com/en/articles/creating-a-personal-access-token-for-the-command-line.
github_token: ${{ secrets.AUTOSQUASH_TOKEN }}

@ -17,6 +17,8 @@ from timm.data import Dataset, create_loader, resolve_data_config
from timm.utils import AverageMeter, setup_default_logging from timm.utils import AverageMeter, setup_default_logging
torch.backends.cudnn.benchmark = True torch.backends.cudnn.benchmark = True
_logger = logging.getLogger('inference')
parser = argparse.ArgumentParser(description='PyTorch ImageNet Inference') parser = argparse.ArgumentParser(description='PyTorch ImageNet Inference')
parser.add_argument('data', metavar='DIR', parser.add_argument('data', metavar='DIR',
@ -67,7 +69,7 @@ def main():
pretrained=args.pretrained, pretrained=args.pretrained,
checkpoint_path=args.checkpoint) checkpoint_path=args.checkpoint)
logging.info('Model %s created, param count: %d' % _logger.info('Model %s created, param count: %d' %
(args.model, sum([m.numel() for m in model.parameters()]))) (args.model, sum([m.numel() for m in model.parameters()])))
config = resolve_data_config(vars(args), model=model) config = resolve_data_config(vars(args), model=model)
@ -107,7 +109,7 @@ def main():
end = time.time() end = time.time()
if batch_idx % args.log_freq == 0: if batch_idx % args.log_freq == 0:
logging.info('Predict: [{0}/{1}] Time {batch_time.val:.3f} ({batch_time.avg:.3f})'.format( _logger.info('Predict: [{0}/{1}] Time {batch_time.val:.3f} ({batch_time.avg:.3f})'.format(
batch_idx, len(loader), batch_time=batch_time)) batch_idx, len(loader), batch_time=batch_time))
topk_ids = np.concatenate(topk_ids, axis=0).squeeze() topk_ids = np.concatenate(topk_ids, axis=0).squeeze()

@ -2,6 +2,9 @@ import logging
from .constants import * from .constants import *
_logger = logging.getLogger(__name__)
def resolve_data_config(args, default_cfg={}, model=None, verbose=True): def resolve_data_config(args, default_cfg={}, model=None, verbose=True):
new_config = {} new_config = {}
default_cfg = default_cfg default_cfg = default_cfg
@ -65,8 +68,8 @@ def resolve_data_config(args, default_cfg={}, model=None, verbose=True):
new_config['crop_pct'] = default_cfg['crop_pct'] new_config['crop_pct'] = default_cfg['crop_pct']
if verbose: if verbose:
logging.info('Data processing configuration for current model + dataset:') _logger.info('Data processing configuration for current model + dataset:')
for n, v in new_config.items(): for n, v in new_config.items():
logging.info('\t%s: %s' % (n, str(v))) _logger.info('\t%s: %s' % (n, str(v)))
return new_config return new_config

@ -18,10 +18,12 @@ from .layers import CondConv2d, get_condconv_initializer
__all__ = ["EfficientNetBuilder", "decode_arch_def", "efficientnet_init_weights"] __all__ = ["EfficientNetBuilder", "decode_arch_def", "efficientnet_init_weights"]
_logger = logging.getLogger(__name__)
def _log_info_if(msg, condition): def _log_info_if(msg, condition):
if condition: if condition:
logging.info(msg) _logger.info(msg)
def _parse_ksize(ss): def _parse_ksize(ss):
@ -233,7 +235,7 @@ class EfficientNetBuilder:
self.drop_path_rate = drop_path_rate self.drop_path_rate = drop_path_rate
if feature_location == 'depthwise': if feature_location == 'depthwise':
# old 'depthwise' mode renamed 'expansion' to match TF impl, old expansion mode didn't make sense # old 'depthwise' mode renamed 'expansion' to match TF impl, old expansion mode didn't make sense
logging.warning("feature_location=='depthwise' is deprecated, using 'expansion'") _logger.warning("feature_location=='depthwise' is deprecated, using 'expansion'")
feature_location = 'expansion' feature_location = 'expansion'
self.feature_location = feature_location self.feature_location = feature_location
assert feature_location in ('bottleneck', 'expansion', '') assert feature_location in ('bottleneck', 'expansion', '')
@ -291,7 +293,7 @@ class EfficientNetBuilder:
""" Build the blocks """ Build the blocks
Args: Args:
in_chs: Number of input-channels passed to first block in_chs: Number of input-channels passed to first block
model_block_args: A list of lists, outer list defines stacks (block stages), inner model_block_args: A list of lists, outer list defines stages, inner
list contains strings defining block configuration(s) list contains strings defining block configuration(s)
Return: Return:
List of block stacks (each stack wrapped in nn.Sequential) List of block stacks (each stack wrapped in nn.Sequential)

@ -12,6 +12,9 @@ from .features import FeatureListNet, FeatureDictNet, FeatureHookNet
from .layers import Conv2dSame from .layers import Conv2dSame
_logger = logging.getLogger(__name__)
def load_state_dict(checkpoint_path, use_ema=False): def load_state_dict(checkpoint_path, use_ema=False):
if checkpoint_path and os.path.isfile(checkpoint_path): if checkpoint_path and os.path.isfile(checkpoint_path):
checkpoint = torch.load(checkpoint_path, map_location='cpu') checkpoint = torch.load(checkpoint_path, map_location='cpu')
@ -28,10 +31,10 @@ def load_state_dict(checkpoint_path, use_ema=False):
state_dict = new_state_dict state_dict = new_state_dict
else: else:
state_dict = checkpoint state_dict = checkpoint
logging.info("Loaded {} from checkpoint '{}'".format(state_dict_key, checkpoint_path)) _logger.info("Loaded {} from checkpoint '{}'".format(state_dict_key, checkpoint_path))
return state_dict return state_dict
else: else:
logging.error("No checkpoint found at '{}'".format(checkpoint_path)) _logger.error("No checkpoint found at '{}'".format(checkpoint_path))
raise FileNotFoundError() raise FileNotFoundError()
@ -59,13 +62,13 @@ def resume_checkpoint(model, checkpoint_path):
resume_epoch = checkpoint['epoch'] resume_epoch = checkpoint['epoch']
if 'version' in checkpoint and checkpoint['version'] > 1: if 'version' in checkpoint and checkpoint['version'] > 1:
resume_epoch += 1 # start at the next epoch, old checkpoints incremented before save resume_epoch += 1 # start at the next epoch, old checkpoints incremented before save
logging.info("Loaded checkpoint '{}' (epoch {})".format(checkpoint_path, checkpoint['epoch'])) _logger.info("Loaded checkpoint '{}' (epoch {})".format(checkpoint_path, checkpoint['epoch']))
else: else:
model.load_state_dict(checkpoint) model.load_state_dict(checkpoint)
logging.info("Loaded checkpoint '{}'".format(checkpoint_path)) _logger.info("Loaded checkpoint '{}'".format(checkpoint_path))
return other_state, resume_epoch return other_state, resume_epoch
else: else:
logging.error("No checkpoint found at '{}'".format(checkpoint_path)) _logger.error("No checkpoint found at '{}'".format(checkpoint_path))
raise FileNotFoundError() raise FileNotFoundError()
@ -73,7 +76,7 @@ def load_pretrained(model, cfg=None, num_classes=1000, in_chans=3, filter_fn=Non
if cfg is None: if cfg is None:
cfg = getattr(model, 'default_cfg') cfg = getattr(model, 'default_cfg')
if cfg is None or 'url' not in cfg or not cfg['url']: if cfg is None or 'url' not in cfg or not cfg['url']:
logging.warning("Pretrained model URL is invalid, using random initialization.") _logger.warning("Pretrained model URL is invalid, using random initialization.")
return return
state_dict = model_zoo.load_url(cfg['url'], progress=False, map_location='cpu') state_dict = model_zoo.load_url(cfg['url'], progress=False, map_location='cpu')
@ -83,7 +86,7 @@ def load_pretrained(model, cfg=None, num_classes=1000, in_chans=3, filter_fn=Non
if in_chans == 1: if in_chans == 1:
conv1_name = cfg['first_conv'] conv1_name = cfg['first_conv']
logging.info('Converting first conv (%s) from 3 to 1 channel' % conv1_name) _logger.info('Converting first conv (%s) from 3 to 1 channel' % conv1_name)
conv1_weight = state_dict[conv1_name + '.weight'] conv1_weight = state_dict[conv1_name + '.weight']
state_dict[conv1_name + '.weight'] = conv1_weight.sum(dim=1, keepdim=True) state_dict[conv1_name + '.weight'] = conv1_weight.sum(dim=1, keepdim=True)
elif in_chans != 3: elif in_chans != 3:

@ -23,7 +23,7 @@ from .registry import register_model
from .resnet import BasicBlock, Bottleneck # leveraging ResNet blocks w/ additional features like SE from .resnet import BasicBlock, Bottleneck # leveraging ResNet blocks w/ additional features like SE
_BN_MOMENTUM = 0.1 _BN_MOMENTUM = 0.1
logger = logging.getLogger(__name__) _logger = logging.getLogger(__name__)
def _cfg(url='', **kwargs): def _cfg(url='', **kwargs):
@ -412,7 +412,7 @@ class HighResolutionModule(nn.Module):
elif num_branches != len(num_inchannels): elif num_branches != len(num_inchannels):
error_msg = 'NUM_BRANCHES({}) <> NUM_INCHANNELS({})'.format(num_branches, len(num_inchannels)) error_msg = 'NUM_BRANCHES({}) <> NUM_INCHANNELS({})'.format(num_branches, len(num_inchannels))
if error_msg: if error_msg:
logger.error(error_msg) _logger.error(error_msg)
raise ValueError(error_msg) raise ValueError(error_msg)
def _make_one_branch(self, branch_index, block, num_blocks, num_channels, stride=1): def _make_one_branch(self, branch_index, block, num_blocks, num_channels, stride=1):

@ -10,6 +10,9 @@ import torch.nn.functional as F
from .adaptive_avgmax_pool import adaptive_avgmax_pool2d from .adaptive_avgmax_pool import adaptive_avgmax_pool2d
_logger = logging.getLogger(__name__)
class TestTimePoolHead(nn.Module): class TestTimePoolHead(nn.Module):
def __init__(self, base, original_pool=7): def __init__(self, base, original_pool=7):
super(TestTimePoolHead, self).__init__() super(TestTimePoolHead, self).__init__()
@ -40,7 +43,7 @@ def apply_test_time_pool(model, config, args):
if not args.no_test_pool and \ if not args.no_test_pool and \
config['input_size'][-1] > model.default_cfg['input_size'][-1] and \ config['input_size'][-1] > model.default_cfg['input_size'][-1] and \
config['input_size'][-2] > model.default_cfg['input_size'][-2]: config['input_size'][-2] > model.default_cfg['input_size'][-2]:
logging.info('Target input size %s > pretrained default %s, using test time pooling' % _logger.info('Target input size %s > pretrained default %s, using test time pooling' %
(str(config['input_size'][-2:]), str(model.default_cfg['input_size'][-2:]))) (str(config['input_size'][-2:]), str(model.default_cfg['input_size'][-2:])))
model = TestTimePoolHead(model, original_pool=model.default_cfg['pool_size']) model = TestTimePoolHead(model, original_pool=model.default_cfg['pool_size'])
test_time_pool = True test_time_pool = True

@ -5,4 +5,6 @@ from .radam import RAdam
from .novograd import NovoGrad from .novograd import NovoGrad
from .nvnovograd import NvNovoGrad from .nvnovograd import NvNovoGrad
from .lookahead import Lookahead from .lookahead import Lookahead
from .adamp import AdamP
from .sgdp import SGDP
from .optim_factory import create_optimizer from .optim_factory import create_optimizer

@ -0,0 +1,107 @@
"""
AdamP Optimizer Implementation copied from https://github.com/clovaai/AdamP/blob/master/adamp/adamp.py
Paper: `Slowing Down the Weight Norm Increase in Momentum-based Optimizers` - https://arxiv.org/abs/2006.08217
Code: https://github.com/clovaai/AdamP
Copyright (c) 2020-present NAVER Corp.
MIT license
"""
import torch
import torch.nn as nn
from torch.optim.optimizer import Optimizer, required
import math
class AdamP(Optimizer):
def __init__(self, params, lr=1e-3, betas=(0.9, 0.999), eps=1e-8,
weight_decay=0, delta=0.1, wd_ratio=0.1, nesterov=False):
defaults = dict(lr=lr, betas=betas, eps=eps, weight_decay=weight_decay,
delta=delta, wd_ratio=wd_ratio, nesterov=nesterov)
super(AdamP, self).__init__(params, defaults)
def _channel_view(self, x):
return x.view(x.size(0), -1)
def _layer_view(self, x):
return x.view(1, -1)
def _cosine_similarity(self, x, y, eps, view_func):
x = view_func(x)
y = view_func(y)
x_norm = x.norm(dim=1).add_(eps)
y_norm = y.norm(dim=1).add_(eps)
dot = (x * y).sum(dim=1)
return dot.abs() / x_norm / y_norm
def _projection(self, p, grad, perturb, delta, wd_ratio, eps):
wd = 1
expand_size = [-1] + [1] * (len(p.shape) - 1)
for view_func in [self._channel_view, self._layer_view]:
cosine_sim = self._cosine_similarity(grad, p.data, eps, view_func)
if cosine_sim.max() < delta / math.sqrt(view_func(p.data).size(1)):
p_n = p.data / view_func(p.data).norm(dim=1).view(expand_size).add_(eps)
perturb -= p_n * view_func(p_n * perturb).sum(dim=1).view(expand_size)
wd = wd_ratio
return perturb, wd
return perturb, wd
def step(self, closure=None):
loss = None
if closure is not None:
loss = closure()
for group in self.param_groups:
for p in group['params']:
if p.grad is None:
continue
grad = p.grad.data
beta1, beta2 = group['betas']
nesterov = group['nesterov']
state = self.state[p]
# State initialization
if len(state) == 0:
state['step'] = 0
state['exp_avg'] = torch.zeros_like(p.data)
state['exp_avg_sq'] = torch.zeros_like(p.data)
# Adam
exp_avg, exp_avg_sq = state['exp_avg'], state['exp_avg_sq']
state['step'] += 1
bias_correction1 = 1 - beta1 ** state['step']
bias_correction2 = 1 - beta2 ** state['step']
exp_avg.mul_(beta1).add_(1 - beta1, grad)
exp_avg_sq.mul_(beta2).addcmul_(1 - beta2, grad, grad)
denom = (exp_avg_sq.sqrt() / math.sqrt(bias_correction2)).add_(group['eps'])
step_size = group['lr'] / bias_correction1
if nesterov:
perturb = (beta1 * exp_avg + (1 - beta1) * grad) / denom
else:
perturb = exp_avg / denom
# Projection
wd_ratio = 1
if len(p.shape) > 1:
perturb, wd_ratio = self._projection(p, grad, perturb, group['delta'], group['wd_ratio'], group['eps'])
# Weight decay
if group['weight_decay'] > 0:
p.data.mul_(1 - group['lr'] * group['weight_decay'] * wd_ratio)
# Step
p.data.add_(-step_size, perturb)
return loss

@ -3,7 +3,7 @@ Hacked together by / Copyright 2020 Ross Wightman
""" """
import torch import torch
from torch import optim as optim from torch import optim as optim
from timm.optim import Nadam, RMSpropTF, AdamW, RAdam, NovoGrad, NvNovoGrad, Lookahead from timm.optim import Nadam, RMSpropTF, AdamW, RAdam, NovoGrad, NvNovoGrad, Lookahead, AdamP, SGDP
try: try:
from apex.optimizers import FusedNovoGrad, FusedAdam, FusedLAMB, FusedSGD from apex.optimizers import FusedNovoGrad, FusedAdam, FusedLAMB, FusedSGD
has_apex = True has_apex = True
@ -63,6 +63,14 @@ def create_optimizer(args, model, filter_bias_and_bn=True):
elif opt_lower == 'radam': elif opt_lower == 'radam':
optimizer = RAdam( optimizer = RAdam(
parameters, lr=args.lr, weight_decay=weight_decay, eps=args.opt_eps) parameters, lr=args.lr, weight_decay=weight_decay, eps=args.opt_eps)
elif opt_lower == 'adamp':
optimizer = AdamP(
parameters, lr=args.lr, weight_decay=weight_decay, eps=args.opt_eps,
delta=0.1, wd_ratio=0.01, nesterov=True)
elif opt_lower == 'sgdp':
optimizer = SGDP(
parameters, lr=args.lr, momentum=args.momentum, weight_decay=weight_decay,
eps=args.opt_eps, nesterov=True)
elif opt_lower == 'adadelta': elif opt_lower == 'adadelta':
optimizer = optim.Adadelta( optimizer = optim.Adadelta(
parameters, lr=args.lr, weight_decay=weight_decay, eps=args.opt_eps) parameters, lr=args.lr, weight_decay=weight_decay, eps=args.opt_eps)

@ -0,0 +1,96 @@
"""
SGDP Optimizer Implementation copied from https://github.com/clovaai/AdamP/blob/master/adamp/sgdp.py
Paper: `Slowing Down the Weight Norm Increase in Momentum-based Optimizers` - https://arxiv.org/abs/2006.08217
Code: https://github.com/clovaai/AdamP
Copyright (c) 2020-present NAVER Corp.
MIT license
"""
import torch
import torch.nn as nn
from torch.optim.optimizer import Optimizer, required
import math
class SGDP(Optimizer):
def __init__(self, params, lr=required, momentum=0, dampening=0,
weight_decay=0, nesterov=False, eps=1e-8, delta=0.1, wd_ratio=0.1):
defaults = dict(lr=lr, momentum=momentum, dampening=dampening, weight_decay=weight_decay,
nesterov=nesterov, eps=eps, delta=delta, wd_ratio=wd_ratio)
super(SGDP, self).__init__(params, defaults)
def _channel_view(self, x):
return x.view(x.size(0), -1)
def _layer_view(self, x):
return x.view(1, -1)
def _cosine_similarity(self, x, y, eps, view_func):
x = view_func(x)
y = view_func(y)
x_norm = x.norm(dim=1).add_(eps)
y_norm = y.norm(dim=1).add_(eps)
dot = (x * y).sum(dim=1)
return dot.abs() / x_norm / y_norm
def _projection(self, p, grad, perturb, delta, wd_ratio, eps):
wd = 1
expand_size = [-1] + [1] * (len(p.shape) - 1)
for view_func in [self._channel_view, self._layer_view]:
cosine_sim = self._cosine_similarity(grad, p.data, eps, view_func)
if cosine_sim.max() < delta / math.sqrt(view_func(p.data).size(1)):
p_n = p.data / view_func(p.data).norm(dim=1).view(expand_size).add_(eps)
perturb -= p_n * view_func(p_n * perturb).sum(dim=1).view(expand_size)
wd = wd_ratio
return perturb, wd
return perturb, wd
def step(self, closure=None):
loss = None
if closure is not None:
loss = closure()
for group in self.param_groups:
weight_decay = group['weight_decay']
momentum = group['momentum']
dampening = group['dampening']
nesterov = group['nesterov']
for p in group['params']:
if p.grad is None:
continue
grad = p.grad.data
state = self.state[p]
# State initialization
if len(state) == 0:
state['momentum'] = torch.zeros_like(p.data)
# SGD
buf = state['momentum']
buf.mul_(momentum).add_(1 - dampening, grad)
if nesterov:
d_p = grad + momentum * buf
else:
d_p = buf
# Projection
wd_ratio = 1
if len(p.shape) > 1:
d_p, wd_ratio = self._projection(p, grad, d_p, group['delta'], group['wd_ratio'], group['eps'])
# Weight decay
if weight_decay != 0:
p.data.mul_(1 - group['lr'] * group['weight_decay'] * wd_ratio / (1-momentum))
# Step
p.data.add_(-group['lr'], d_p)
return loss

@ -12,7 +12,7 @@ import torch
from .scheduler import Scheduler from .scheduler import Scheduler
logger = logging.getLogger(__name__) _logger = logging.getLogger(__name__)
class CosineLRScheduler(Scheduler): class CosineLRScheduler(Scheduler):
@ -48,7 +48,7 @@ class CosineLRScheduler(Scheduler):
assert t_initial > 0 assert t_initial > 0
assert lr_min >= 0 assert lr_min >= 0
if t_initial == 1 and t_mul == 1 and decay_rate == 1: if t_initial == 1 and t_mul == 1 and decay_rate == 1:
logger.warning("Cosine annealing scheduler will have no effect on the learning " _logger.warning("Cosine annealing scheduler will have no effect on the learning "
"rate since t_initial = t_mul = eta_mul = 1.") "rate since t_initial = t_mul = eta_mul = 1.")
self.t_initial = t_initial self.t_initial = t_initial
self.t_mul = t_mul self.t_mul = t_mul

@ -12,7 +12,7 @@ import torch
from .scheduler import Scheduler from .scheduler import Scheduler
logger = logging.getLogger(__name__) _logger = logging.getLogger(__name__)
class TanhLRScheduler(Scheduler): class TanhLRScheduler(Scheduler):

@ -14,6 +14,7 @@ import glob
import csv import csv
import operator import operator
import logging import logging
import logging.handlers
import numpy as np import numpy as np
from collections import OrderedDict from collections import OrderedDict
try: try:
@ -26,6 +27,9 @@ except ImportError:
from torch import distributed as dist from torch import distributed as dist
_logger = logging.getLogger(__name__)
def unwrap_model(model): def unwrap_model(model):
if isinstance(model, ModelEma): if isinstance(model, ModelEma):
return unwrap_model(model.ema) return unwrap_model(model.ema)
@ -89,7 +93,7 @@ class CheckpointSaver:
checkpoints_str = "Current checkpoints:\n" checkpoints_str = "Current checkpoints:\n"
for c in self.checkpoint_files: for c in self.checkpoint_files:
checkpoints_str += ' {}\n'.format(c) checkpoints_str += ' {}\n'.format(c)
logging.info(checkpoints_str) _logger.info(checkpoints_str)
if metric is not None and (self.best_metric is None or self.cmp(metric, self.best_metric)): if metric is not None and (self.best_metric is None or self.cmp(metric, self.best_metric)):
self.best_epoch = epoch self.best_epoch = epoch
@ -126,10 +130,10 @@ class CheckpointSaver:
to_delete = self.checkpoint_files[delete_index:] to_delete = self.checkpoint_files[delete_index:]
for d in to_delete: for d in to_delete:
try: try:
logging.debug("Cleaning checkpoint: {}".format(d)) _logger.debug("Cleaning checkpoint: {}".format(d))
os.remove(d[0]) os.remove(d[0])
except Exception as e: except Exception as e:
logging.error("Exception '{}' while deleting checkpoint".format(e)) _logger.error("Exception '{}' while deleting checkpoint".format(e))
self.checkpoint_files = self.checkpoint_files[:delete_index] self.checkpoint_files = self.checkpoint_files[:delete_index]
def save_recovery(self, model, optimizer, args, epoch, model_ema=None, use_amp=False, batch_idx=0): def save_recovery(self, model, optimizer, args, epoch, model_ema=None, use_amp=False, batch_idx=0):
@ -139,10 +143,10 @@ class CheckpointSaver:
self._save(save_path, model, optimizer, args, epoch, model_ema, use_amp=use_amp) self._save(save_path, model, optimizer, args, epoch, model_ema, use_amp=use_amp)
if os.path.exists(self.last_recovery_file): if os.path.exists(self.last_recovery_file):
try: try:
logging.debug("Cleaning recovery: {}".format(self.last_recovery_file)) _logger.debug("Cleaning recovery: {}".format(self.last_recovery_file))
os.remove(self.last_recovery_file) os.remove(self.last_recovery_file)
except Exception as e: except Exception as e:
logging.error("Exception '{}' while removing {}".format(e, self.last_recovery_file)) _logger.error("Exception '{}' while removing {}".format(e, self.last_recovery_file))
self.last_recovery_file = self.curr_recovery_file self.last_recovery_file = self.curr_recovery_file
self.curr_recovery_file = save_path self.curr_recovery_file = save_path
@ -284,9 +288,9 @@ class ModelEma:
name = k name = k
new_state_dict[name] = v new_state_dict[name] = v
self.ema.load_state_dict(new_state_dict) self.ema.load_state_dict(new_state_dict)
logging.info("Loaded state_dict_ema") _logger.info("Loaded state_dict_ema")
else: else:
logging.warning("Failed to find state_dict_ema, starting from loaded model weights") _logger.warning("Failed to find state_dict_ema, starting from loaded model weights")
def update(self, model): def update(self, model):
# correct a mismatch in state dict keys # correct a mismatch in state dict keys
@ -312,8 +316,13 @@ class FormatterNoInfo(logging.Formatter):
return logging.Formatter.format(self, record) return logging.Formatter.format(self, record)
def setup_default_logging(default_level=logging.INFO): def setup_default_logging(default_level=logging.INFO, log_path=''):
console_handler = logging.StreamHandler() console_handler = logging.StreamHandler()
console_handler.setFormatter(FormatterNoInfo()) console_handler.setFormatter(FormatterNoInfo())
logging.root.addHandler(console_handler) logging.root.addHandler(console_handler)
logging.root.setLevel(default_level) logging.root.setLevel(default_level)
if log_path:
file_handler = logging.handlers.RotatingFileHandler(log_path, maxBytes=(1024 ** 2 * 2), backupCount=3)
file_formatter = logging.Formatter("%(asctime)s - %(name)20s: [%(levelname)8s] - %(message)s")
file_handler.setFormatter(file_formatter)
logging.root.addHandler(file_handler)

@ -40,6 +40,7 @@ import torch.nn as nn
import torchvision.utils import torchvision.utils
torch.backends.cudnn.benchmark = True torch.backends.cudnn.benchmark = True
_logger = logging.getLogger('train')
# The first arg parser parses out only the --config argument, this argument is used to # The first arg parser parses out only the --config argument, this argument is used to
@ -232,7 +233,7 @@ def main():
if 'WORLD_SIZE' in os.environ: if 'WORLD_SIZE' in os.environ:
args.distributed = int(os.environ['WORLD_SIZE']) > 1 args.distributed = int(os.environ['WORLD_SIZE']) > 1
if args.distributed and args.num_gpu > 1: if args.distributed and args.num_gpu > 1:
logging.warning('Using more than one GPU per process in distributed mode is not allowed. Setting num_gpu to 1.') _logger.warning('Using more than one GPU per process in distributed mode is not allowed. Setting num_gpu to 1.')
args.num_gpu = 1 args.num_gpu = 1
args.device = 'cuda:0' args.device = 'cuda:0'
@ -248,10 +249,10 @@ def main():
assert args.rank >= 0 assert args.rank >= 0
if args.distributed: if args.distributed:
logging.info('Training in distributed mode with multiple processes, 1 GPU per process. Process %d, total %d.' _logger.info('Training in distributed mode with multiple processes, 1 GPU per process. Process %d, total %d.'
% (args.rank, args.world_size)) % (args.rank, args.world_size))
else: else:
logging.info('Training with a single process on %d GPUs.' % args.num_gpu) _logger.info('Training with a single process on %d GPUs.' % args.num_gpu)
torch.manual_seed(args.seed + args.rank) torch.manual_seed(args.seed + args.rank)
@ -270,7 +271,7 @@ def main():
checkpoint_path=args.initial_checkpoint) checkpoint_path=args.initial_checkpoint)
if args.local_rank == 0: if args.local_rank == 0:
logging.info('Model %s created, param count: %d' % _logger.info('Model %s created, param count: %d' %
(args.model, sum([m.numel() for m in model.parameters()]))) (args.model, sum([m.numel() for m in model.parameters()])))
data_config = resolve_data_config(vars(args), model=model, verbose=args.local_rank == 0) data_config = resolve_data_config(vars(args), model=model, verbose=args.local_rank == 0)
@ -286,7 +287,7 @@ def main():
if args.num_gpu > 1: if args.num_gpu > 1:
if args.amp: if args.amp:
logging.warning( _logger.warning(
'AMP does not work well with nn.DataParallel, disabling. Use distributed mode for multi-GPU AMP.') 'AMP does not work well with nn.DataParallel, disabling. Use distributed mode for multi-GPU AMP.')
args.amp = False args.amp = False
model = nn.DataParallel(model, device_ids=list(range(args.num_gpu))).cuda() model = nn.DataParallel(model, device_ids=list(range(args.num_gpu))).cuda()
@ -300,7 +301,7 @@ def main():
model, optimizer = amp.initialize(model, optimizer, opt_level='O1') model, optimizer = amp.initialize(model, optimizer, opt_level='O1')
use_amp = True use_amp = True
if args.local_rank == 0: if args.local_rank == 0:
logging.info('NVIDIA APEX {}. AMP {}.'.format( _logger.info('NVIDIA APEX {}. AMP {}.'.format(
'installed' if has_apex else 'not installed', 'on' if use_amp else 'off')) 'installed' if has_apex else 'not installed', 'on' if use_amp else 'off'))
# optionally resume from a checkpoint # optionally resume from a checkpoint
@ -311,11 +312,11 @@ def main():
if resume_state and not args.no_resume_opt: if resume_state and not args.no_resume_opt:
if 'optimizer' in resume_state: if 'optimizer' in resume_state:
if args.local_rank == 0: if args.local_rank == 0:
logging.info('Restoring Optimizer state from checkpoint') _logger.info('Restoring Optimizer state from checkpoint')
optimizer.load_state_dict(resume_state['optimizer']) optimizer.load_state_dict(resume_state['optimizer'])
if use_amp and 'amp' in resume_state and 'load_state_dict' in amp.__dict__: if use_amp and 'amp' in resume_state and 'load_state_dict' in amp.__dict__:
if args.local_rank == 0: if args.local_rank == 0:
logging.info('Restoring NVIDIA AMP state from checkpoint') _logger.info('Restoring NVIDIA AMP state from checkpoint')
amp.load_state_dict(resume_state['amp']) amp.load_state_dict(resume_state['amp'])
del resume_state del resume_state
@ -337,16 +338,16 @@ def main():
else: else:
model = torch.nn.SyncBatchNorm.convert_sync_batchnorm(model) model = torch.nn.SyncBatchNorm.convert_sync_batchnorm(model)
if args.local_rank == 0: if args.local_rank == 0:
logging.info( _logger.info(
'Converted model to use Synchronized BatchNorm. WARNING: You may have issues if using ' 'Converted model to use Synchronized BatchNorm. WARNING: You may have issues if using '
'zero initialized BN layers (enabled by default for ResNets) while sync-bn enabled.') 'zero initialized BN layers (enabled by default for ResNets) while sync-bn enabled.')
except Exception as e: except Exception as e:
logging.error('Failed to enable Synchronized BatchNorm. Install Apex or Torch >= 1.1') _logger.error('Failed to enable Synchronized BatchNorm. Install Apex or Torch >= 1.1')
if has_apex: if has_apex:
model = DDP(model, delay_allreduce=True) model = DDP(model, delay_allreduce=True)
else: else:
if args.local_rank == 0: if args.local_rank == 0:
logging.info("Using torch DistributedDataParallel. Install NVIDIA Apex for Apex DDP.") _logger.info("Using torch DistributedDataParallel. Install NVIDIA Apex for Apex DDP.")
model = DDP(model, device_ids=[args.local_rank]) # can use device str in Torch >= 1.1 model = DDP(model, device_ids=[args.local_rank]) # can use device str in Torch >= 1.1
# NOTE: EMA model does not need to be wrapped by DDP # NOTE: EMA model does not need to be wrapped by DDP
@ -361,11 +362,11 @@ def main():
lr_scheduler.step(start_epoch) lr_scheduler.step(start_epoch)
if args.local_rank == 0: if args.local_rank == 0:
logging.info('Scheduled epochs: {}'.format(num_epochs)) _logger.info('Scheduled epochs: {}'.format(num_epochs))
train_dir = os.path.join(args.data, 'train') train_dir = os.path.join(args.data, 'train')
if not os.path.exists(train_dir): if not os.path.exists(train_dir):
logging.error('Training folder does not exist at: {}'.format(train_dir)) _logger.error('Training folder does not exist at: {}'.format(train_dir))
exit(1) exit(1)
dataset_train = Dataset(train_dir) dataset_train = Dataset(train_dir)
@ -404,7 +405,7 @@ def main():
if not os.path.isdir(eval_dir): if not os.path.isdir(eval_dir):
eval_dir = os.path.join(args.data, 'validation') eval_dir = os.path.join(args.data, 'validation')
if not os.path.isdir(eval_dir): if not os.path.isdir(eval_dir):
logging.error('Validation folder does not exist at: {}'.format(eval_dir)) _logger.error('Validation folder does not exist at: {}'.format(eval_dir))
exit(1) exit(1)
dataset_eval = Dataset(eval_dir) dataset_eval = Dataset(eval_dir)
@ -468,7 +469,7 @@ def main():
if args.distributed and args.dist_bn in ('broadcast', 'reduce'): if args.distributed and args.dist_bn in ('broadcast', 'reduce'):
if args.local_rank == 0: if args.local_rank == 0:
logging.info("Distributing BatchNorm running means and vars") _logger.info("Distributing BatchNorm running means and vars")
distribute_bn(model, args.world_size, args.dist_bn == 'reduce') distribute_bn(model, args.world_size, args.dist_bn == 'reduce')
eval_metrics = validate(model, loader_eval, validate_loss_fn, args) eval_metrics = validate(model, loader_eval, validate_loss_fn, args)
@ -499,7 +500,7 @@ def main():
except KeyboardInterrupt: except KeyboardInterrupt:
pass pass
if best_metric is not None: if best_metric is not None:
logging.info('*** Best metric: {0} (epoch {1})'.format(best_metric, best_epoch)) _logger.info('*** Best metric: {0} (epoch {1})'.format(best_metric, best_epoch))
def train_epoch( def train_epoch(
@ -559,7 +560,7 @@ def train_epoch(
losses_m.update(reduced_loss.item(), input.size(0)) losses_m.update(reduced_loss.item(), input.size(0))
if args.local_rank == 0: if args.local_rank == 0:
logging.info( _logger.info(
'Train: {} [{:>4d}/{} ({:>3.0f}%)] ' 'Train: {} [{:>4d}/{} ({:>3.0f}%)] '
'Loss: {loss.val:>9.6f} ({loss.avg:>6.4f}) ' 'Loss: {loss.val:>9.6f} ({loss.avg:>6.4f}) '
'Time: {batch_time.val:.3f}s, {rate:>7.2f}/s ' 'Time: {batch_time.val:.3f}s, {rate:>7.2f}/s '
@ -647,7 +648,7 @@ def validate(model, loader, loss_fn, args, log_suffix=''):
end = time.time() end = time.time()
if args.local_rank == 0 and (last_batch or batch_idx % args.log_interval == 0): if args.local_rank == 0 and (last_batch or batch_idx % args.log_interval == 0):
log_name = 'Test' + log_suffix log_name = 'Test' + log_suffix
logging.info( _logger.info(
'{0}: [{1:>4d}/{2}] ' '{0}: [{1:>4d}/{2}] '
'Time: {batch_time.val:.3f} ({batch_time.avg:.3f}) ' 'Time: {batch_time.val:.3f} ({batch_time.avg:.3f}) '
'Loss: {loss.val:>7.4f} ({loss.avg:>6.4f}) ' 'Loss: {loss.val:>7.4f} ({loss.avg:>6.4f}) '

@ -5,7 +5,7 @@ This is intended to be a lean and easily modifiable ImageNet validation script f
models or training checkpoints against ImageNet or similarly organized image datasets. It prioritizes models or training checkpoints against ImageNet or similarly organized image datasets. It prioritizes
canonical PyTorch, standard Python style, and good performance. Repurpose as you see fit. canonical PyTorch, standard Python style, and good performance. Repurpose as you see fit.
Hacked together by / Copyright 2020 Ross Wightman (https://github.com/rwightman) Hacked together by Ross Wightman (https://github.com/rwightman)
""" """
import argparse import argparse
import os import os
@ -29,6 +29,8 @@ from timm.data import Dataset, DatasetTar, create_loader, resolve_data_config, R
from timm.utils import accuracy, AverageMeter, natural_key, setup_default_logging from timm.utils import accuracy, AverageMeter, natural_key, setup_default_logging
torch.backends.cudnn.benchmark = True torch.backends.cudnn.benchmark = True
_logger = logging.getLogger('validate')
parser = argparse.ArgumentParser(description='PyTorch ImageNet Validation') parser = argparse.ArgumentParser(description='PyTorch ImageNet Validation')
parser.add_argument('data', metavar='DIR', parser.add_argument('data', metavar='DIR',
@ -115,7 +117,7 @@ def validate(args):
load_checkpoint(model, args.checkpoint, args.use_ema) load_checkpoint(model, args.checkpoint, args.use_ema)
param_count = sum([m.numel() for m in model.parameters()]) param_count = sum([m.numel() for m in model.parameters()])
logging.info('Model %s created, param count: %d' % (args.model, param_count)) _logger.info('Model %s created, param count: %d' % (args.model, param_count))
data_config = resolve_data_config(vars(args), model=model) data_config = resolve_data_config(vars(args), model=model)
model, test_time_pool = apply_test_time_pool(model, data_config, args) model, test_time_pool = apply_test_time_pool(model, data_config, args)
@ -194,7 +196,7 @@ def validate(args):
end = time.time() end = time.time()
if batch_idx % args.log_freq == 0: if batch_idx % args.log_freq == 0:
logging.info( _logger.info(
'Test: [{0:>4d}/{1}] ' 'Test: [{0:>4d}/{1}] '
'Time: {batch_time.val:.3f}s ({batch_time.avg:.3f}s, {rate_avg:>7.2f}/s) ' 'Time: {batch_time.val:.3f}s ({batch_time.avg:.3f}s, {rate_avg:>7.2f}/s) '
'Loss: {loss.val:>7.4f} ({loss.avg:>6.4f}) ' 'Loss: {loss.val:>7.4f} ({loss.avg:>6.4f}) '
@ -220,9 +222,9 @@ def validate(args):
param_count=round(param_count / 1e6, 2), param_count=round(param_count / 1e6, 2),
img_size=data_config['input_size'][-1], img_size=data_config['input_size'][-1],
cropt_pct=crop_pct, cropt_pct=crop_pct,
interpolation=data_config['interpolation'] interpolation=data_config['interpolation']))
))
logging.info(' * Acc@1 {:.3f} ({:.3f}) Acc@5 {:.3f} ({:.3f})'.format( _logger.info(' * Acc@1 {:.3f} ({:.3f}) Acc@5 {:.3f} ({:.3f})'.format(
results['top1'], results['top1_err'], results['top5'], results['top5_err'])) results['top1'], results['top1_err'], results['top5'], results['top5_err']))
return results return results
@ -252,7 +254,7 @@ def main():
if len(model_cfgs): if len(model_cfgs):
results_file = args.results_file or './results-all.csv' results_file = args.results_file or './results-all.csv'
logging.info('Running bulk validation on these pretrained models: {}'.format(', '.join(model_names))) _logger.info('Running bulk validation on these pretrained models: {}'.format(', '.join(model_names)))
results = [] results = []
try: try:
start_batch_size = args.batch_size start_batch_size = args.batch_size

Loading…
Cancel
Save