diff --git a/.github/workflows/autosquash.yml b/.github/workflows/autosquash.yml new file mode 100644 index 00000000..a898991c --- /dev/null +++ b/.github/workflows/autosquash.yml @@ -0,0 +1,39 @@ +name: Autosquash +on: + check_run: + types: + # Check runs completing successfully can unblock the + # corresponding pull requests and make them mergeable. + - completed + pull_request: + types: + # A closed pull request makes the checks on the other + # pull request on the same base outdated. + - closed + # Adding the autosquash label to a pull request can + # trigger an update or a merge. + - labeled + pull_request_review: + types: + # Review approvals can unblock the pull request and + # make it mergeable. + - submitted + # Success statuses can unblock the corresponding + # pull requests and make them mergeable. + status: {} + +jobs: + autosquash: + name: Autosquash + runs-on: ubuntu-18.04 + steps: + - uses: tibdex/autosquash@v2 + with: + # We can't use the built-in secrets.GITHUB_TOKEN yet because of this limitation: + # https://github.community/t5/GitHub-Actions/Triggering-a-new-workflow-from-another-workflow/td-p/31676 + # In the meantime, use a token granting write access on the repo: + # - a GitHub App token + # See https://github.com/marketplace/actions/github-app-token. + # - a personal access token + # See https://help.github.com/en/articles/creating-a-personal-access-token-for-the-command-line. + github_token: ${{ secrets.AUTOSQUASH_TOKEN }} diff --git a/inference.py b/inference.py index 3d89c71f..a5f9f569 100755 --- a/inference.py +++ b/inference.py @@ -17,6 +17,8 @@ from timm.data import Dataset, create_loader, resolve_data_config from timm.utils import AverageMeter, setup_default_logging torch.backends.cudnn.benchmark = True +_logger = logging.getLogger('inference') + parser = argparse.ArgumentParser(description='PyTorch ImageNet Inference') parser.add_argument('data', metavar='DIR', @@ -67,7 +69,7 @@ def main(): pretrained=args.pretrained, checkpoint_path=args.checkpoint) - logging.info('Model %s created, param count: %d' % + _logger.info('Model %s created, param count: %d' % (args.model, sum([m.numel() for m in model.parameters()]))) config = resolve_data_config(vars(args), model=model) @@ -107,7 +109,7 @@ def main(): end = time.time() if batch_idx % args.log_freq == 0: - logging.info('Predict: [{0}/{1}] Time {batch_time.val:.3f} ({batch_time.avg:.3f})'.format( + _logger.info('Predict: [{0}/{1}] Time {batch_time.val:.3f} ({batch_time.avg:.3f})'.format( batch_idx, len(loader), batch_time=batch_time)) topk_ids = np.concatenate(topk_ids, axis=0).squeeze() diff --git a/timm/data/config.py b/timm/data/config.py index dbae0da7..9cb4bda8 100644 --- a/timm/data/config.py +++ b/timm/data/config.py @@ -2,6 +2,9 @@ import logging from .constants import * +_logger = logging.getLogger(__name__) + + def resolve_data_config(args, default_cfg={}, model=None, verbose=True): new_config = {} default_cfg = default_cfg @@ -65,8 +68,8 @@ def resolve_data_config(args, default_cfg={}, model=None, verbose=True): new_config['crop_pct'] = default_cfg['crop_pct'] if verbose: - logging.info('Data processing configuration for current model + dataset:') + _logger.info('Data processing configuration for current model + dataset:') for n, v in new_config.items(): - logging.info('\t%s: %s' % (n, str(v))) + _logger.info('\t%s: %s' % (n, str(v))) return new_config diff --git a/timm/models/efficientnet_builder.py b/timm/models/efficientnet_builder.py index e7e47ce8..f670aa6c 100644 --- a/timm/models/efficientnet_builder.py +++ b/timm/models/efficientnet_builder.py @@ -18,10 +18,12 @@ from .layers import CondConv2d, get_condconv_initializer __all__ = ["EfficientNetBuilder", "decode_arch_def", "efficientnet_init_weights"] +_logger = logging.getLogger(__name__) + def _log_info_if(msg, condition): if condition: - logging.info(msg) + _logger.info(msg) def _parse_ksize(ss): @@ -233,7 +235,7 @@ class EfficientNetBuilder: self.drop_path_rate = drop_path_rate if feature_location == 'depthwise': # old 'depthwise' mode renamed 'expansion' to match TF impl, old expansion mode didn't make sense - logging.warning("feature_location=='depthwise' is deprecated, using 'expansion'") + _logger.warning("feature_location=='depthwise' is deprecated, using 'expansion'") feature_location = 'expansion' self.feature_location = feature_location assert feature_location in ('bottleneck', 'expansion', '') @@ -291,7 +293,7 @@ class EfficientNetBuilder: """ Build the blocks Args: in_chs: Number of input-channels passed to first block - model_block_args: A list of lists, outer list defines stacks (block stages), inner + model_block_args: A list of lists, outer list defines stages, inner list contains strings defining block configuration(s) Return: List of block stacks (each stack wrapped in nn.Sequential) diff --git a/timm/models/helpers.py b/timm/models/helpers.py index a34593ce..20247f49 100644 --- a/timm/models/helpers.py +++ b/timm/models/helpers.py @@ -12,6 +12,9 @@ from .features import FeatureListNet, FeatureDictNet, FeatureHookNet from .layers import Conv2dSame +_logger = logging.getLogger(__name__) + + def load_state_dict(checkpoint_path, use_ema=False): if checkpoint_path and os.path.isfile(checkpoint_path): checkpoint = torch.load(checkpoint_path, map_location='cpu') @@ -28,10 +31,10 @@ def load_state_dict(checkpoint_path, use_ema=False): state_dict = new_state_dict else: state_dict = checkpoint - logging.info("Loaded {} from checkpoint '{}'".format(state_dict_key, checkpoint_path)) + _logger.info("Loaded {} from checkpoint '{}'".format(state_dict_key, checkpoint_path)) return state_dict else: - logging.error("No checkpoint found at '{}'".format(checkpoint_path)) + _logger.error("No checkpoint found at '{}'".format(checkpoint_path)) raise FileNotFoundError() @@ -59,13 +62,13 @@ def resume_checkpoint(model, checkpoint_path): resume_epoch = checkpoint['epoch'] if 'version' in checkpoint and checkpoint['version'] > 1: resume_epoch += 1 # start at the next epoch, old checkpoints incremented before save - logging.info("Loaded checkpoint '{}' (epoch {})".format(checkpoint_path, checkpoint['epoch'])) + _logger.info("Loaded checkpoint '{}' (epoch {})".format(checkpoint_path, checkpoint['epoch'])) else: model.load_state_dict(checkpoint) - logging.info("Loaded checkpoint '{}'".format(checkpoint_path)) + _logger.info("Loaded checkpoint '{}'".format(checkpoint_path)) return other_state, resume_epoch else: - logging.error("No checkpoint found at '{}'".format(checkpoint_path)) + _logger.error("No checkpoint found at '{}'".format(checkpoint_path)) raise FileNotFoundError() @@ -73,7 +76,7 @@ def load_pretrained(model, cfg=None, num_classes=1000, in_chans=3, filter_fn=Non if cfg is None: cfg = getattr(model, 'default_cfg') if cfg is None or 'url' not in cfg or not cfg['url']: - logging.warning("Pretrained model URL is invalid, using random initialization.") + _logger.warning("Pretrained model URL is invalid, using random initialization.") return state_dict = model_zoo.load_url(cfg['url'], progress=False, map_location='cpu') @@ -83,7 +86,7 @@ def load_pretrained(model, cfg=None, num_classes=1000, in_chans=3, filter_fn=Non if in_chans == 1: conv1_name = cfg['first_conv'] - logging.info('Converting first conv (%s) from 3 to 1 channel' % conv1_name) + _logger.info('Converting first conv (%s) from 3 to 1 channel' % conv1_name) conv1_weight = state_dict[conv1_name + '.weight'] state_dict[conv1_name + '.weight'] = conv1_weight.sum(dim=1, keepdim=True) elif in_chans != 3: diff --git a/timm/models/hrnet.py b/timm/models/hrnet.py index 7796b8a4..61c051b1 100644 --- a/timm/models/hrnet.py +++ b/timm/models/hrnet.py @@ -23,7 +23,7 @@ from .registry import register_model from .resnet import BasicBlock, Bottleneck # leveraging ResNet blocks w/ additional features like SE _BN_MOMENTUM = 0.1 -logger = logging.getLogger(__name__) +_logger = logging.getLogger(__name__) def _cfg(url='', **kwargs): @@ -412,7 +412,7 @@ class HighResolutionModule(nn.Module): elif num_branches != len(num_inchannels): error_msg = 'NUM_BRANCHES({}) <> NUM_INCHANNELS({})'.format(num_branches, len(num_inchannels)) if error_msg: - logger.error(error_msg) + _logger.error(error_msg) raise ValueError(error_msg) def _make_one_branch(self, branch_index, block, num_blocks, num_channels, stride=1): diff --git a/timm/models/layers/test_time_pool.py b/timm/models/layers/test_time_pool.py index 27c1099d..642f4e4b 100644 --- a/timm/models/layers/test_time_pool.py +++ b/timm/models/layers/test_time_pool.py @@ -10,6 +10,9 @@ import torch.nn.functional as F from .adaptive_avgmax_pool import adaptive_avgmax_pool2d +_logger = logging.getLogger(__name__) + + class TestTimePoolHead(nn.Module): def __init__(self, base, original_pool=7): super(TestTimePoolHead, self).__init__() @@ -40,7 +43,7 @@ def apply_test_time_pool(model, config, args): if not args.no_test_pool and \ config['input_size'][-1] > model.default_cfg['input_size'][-1] and \ config['input_size'][-2] > model.default_cfg['input_size'][-2]: - logging.info('Target input size %s > pretrained default %s, using test time pooling' % + _logger.info('Target input size %s > pretrained default %s, using test time pooling' % (str(config['input_size'][-2:]), str(model.default_cfg['input_size'][-2:]))) model = TestTimePoolHead(model, original_pool=model.default_cfg['pool_size']) test_time_pool = True diff --git a/timm/optim/__init__.py b/timm/optim/__init__.py index 994b36d2..ef4a0aec 100644 --- a/timm/optim/__init__.py +++ b/timm/optim/__init__.py @@ -5,4 +5,6 @@ from .radam import RAdam from .novograd import NovoGrad from .nvnovograd import NvNovoGrad from .lookahead import Lookahead +from .adamp import AdamP +from .sgdp import SGDP from .optim_factory import create_optimizer diff --git a/timm/optim/adamp.py b/timm/optim/adamp.py new file mode 100644 index 00000000..468c3e86 --- /dev/null +++ b/timm/optim/adamp.py @@ -0,0 +1,107 @@ +""" +AdamP Optimizer Implementation copied from https://github.com/clovaai/AdamP/blob/master/adamp/adamp.py + +Paper: `Slowing Down the Weight Norm Increase in Momentum-based Optimizers` - https://arxiv.org/abs/2006.08217 +Code: https://github.com/clovaai/AdamP + +Copyright (c) 2020-present NAVER Corp. +MIT license +""" + +import torch +import torch.nn as nn +from torch.optim.optimizer import Optimizer, required +import math + +class AdamP(Optimizer): + def __init__(self, params, lr=1e-3, betas=(0.9, 0.999), eps=1e-8, + weight_decay=0, delta=0.1, wd_ratio=0.1, nesterov=False): + defaults = dict(lr=lr, betas=betas, eps=eps, weight_decay=weight_decay, + delta=delta, wd_ratio=wd_ratio, nesterov=nesterov) + super(AdamP, self).__init__(params, defaults) + + def _channel_view(self, x): + return x.view(x.size(0), -1) + + def _layer_view(self, x): + return x.view(1, -1) + + def _cosine_similarity(self, x, y, eps, view_func): + x = view_func(x) + y = view_func(y) + + x_norm = x.norm(dim=1).add_(eps) + y_norm = y.norm(dim=1).add_(eps) + dot = (x * y).sum(dim=1) + + return dot.abs() / x_norm / y_norm + + def _projection(self, p, grad, perturb, delta, wd_ratio, eps): + wd = 1 + expand_size = [-1] + [1] * (len(p.shape) - 1) + for view_func in [self._channel_view, self._layer_view]: + + cosine_sim = self._cosine_similarity(grad, p.data, eps, view_func) + + if cosine_sim.max() < delta / math.sqrt(view_func(p.data).size(1)): + p_n = p.data / view_func(p.data).norm(dim=1).view(expand_size).add_(eps) + perturb -= p_n * view_func(p_n * perturb).sum(dim=1).view(expand_size) + wd = wd_ratio + + return perturb, wd + + return perturb, wd + + def step(self, closure=None): + loss = None + if closure is not None: + loss = closure() + + for group in self.param_groups: + for p in group['params']: + if p.grad is None: + continue + + grad = p.grad.data + beta1, beta2 = group['betas'] + nesterov = group['nesterov'] + + state = self.state[p] + + # State initialization + if len(state) == 0: + state['step'] = 0 + state['exp_avg'] = torch.zeros_like(p.data) + state['exp_avg_sq'] = torch.zeros_like(p.data) + + # Adam + exp_avg, exp_avg_sq = state['exp_avg'], state['exp_avg_sq'] + + state['step'] += 1 + bias_correction1 = 1 - beta1 ** state['step'] + bias_correction2 = 1 - beta2 ** state['step'] + + exp_avg.mul_(beta1).add_(1 - beta1, grad) + exp_avg_sq.mul_(beta2).addcmul_(1 - beta2, grad, grad) + + denom = (exp_avg_sq.sqrt() / math.sqrt(bias_correction2)).add_(group['eps']) + step_size = group['lr'] / bias_correction1 + + if nesterov: + perturb = (beta1 * exp_avg + (1 - beta1) * grad) / denom + else: + perturb = exp_avg / denom + + # Projection + wd_ratio = 1 + if len(p.shape) > 1: + perturb, wd_ratio = self._projection(p, grad, perturb, group['delta'], group['wd_ratio'], group['eps']) + + # Weight decay + if group['weight_decay'] > 0: + p.data.mul_(1 - group['lr'] * group['weight_decay'] * wd_ratio) + + # Step + p.data.add_(-step_size, perturb) + + return loss diff --git a/timm/optim/optim_factory.py b/timm/optim/optim_factory.py index 397f136b..7ae85120 100644 --- a/timm/optim/optim_factory.py +++ b/timm/optim/optim_factory.py @@ -3,7 +3,7 @@ Hacked together by / Copyright 2020 Ross Wightman """ import torch from torch import optim as optim -from timm.optim import Nadam, RMSpropTF, AdamW, RAdam, NovoGrad, NvNovoGrad, Lookahead +from timm.optim import Nadam, RMSpropTF, AdamW, RAdam, NovoGrad, NvNovoGrad, Lookahead, AdamP, SGDP try: from apex.optimizers import FusedNovoGrad, FusedAdam, FusedLAMB, FusedSGD has_apex = True @@ -63,6 +63,14 @@ def create_optimizer(args, model, filter_bias_and_bn=True): elif opt_lower == 'radam': optimizer = RAdam( parameters, lr=args.lr, weight_decay=weight_decay, eps=args.opt_eps) + elif opt_lower == 'adamp': + optimizer = AdamP( + parameters, lr=args.lr, weight_decay=weight_decay, eps=args.opt_eps, + delta=0.1, wd_ratio=0.01, nesterov=True) + elif opt_lower == 'sgdp': + optimizer = SGDP( + parameters, lr=args.lr, momentum=args.momentum, weight_decay=weight_decay, + eps=args.opt_eps, nesterov=True) elif opt_lower == 'adadelta': optimizer = optim.Adadelta( parameters, lr=args.lr, weight_decay=weight_decay, eps=args.opt_eps) diff --git a/timm/optim/sgdp.py b/timm/optim/sgdp.py new file mode 100644 index 00000000..f4a94aa3 --- /dev/null +++ b/timm/optim/sgdp.py @@ -0,0 +1,96 @@ +""" +SGDP Optimizer Implementation copied from https://github.com/clovaai/AdamP/blob/master/adamp/sgdp.py + +Paper: `Slowing Down the Weight Norm Increase in Momentum-based Optimizers` - https://arxiv.org/abs/2006.08217 +Code: https://github.com/clovaai/AdamP + +Copyright (c) 2020-present NAVER Corp. +MIT license +""" + +import torch +import torch.nn as nn +from torch.optim.optimizer import Optimizer, required +import math + +class SGDP(Optimizer): + def __init__(self, params, lr=required, momentum=0, dampening=0, + weight_decay=0, nesterov=False, eps=1e-8, delta=0.1, wd_ratio=0.1): + defaults = dict(lr=lr, momentum=momentum, dampening=dampening, weight_decay=weight_decay, + nesterov=nesterov, eps=eps, delta=delta, wd_ratio=wd_ratio) + super(SGDP, self).__init__(params, defaults) + + def _channel_view(self, x): + return x.view(x.size(0), -1) + + def _layer_view(self, x): + return x.view(1, -1) + + def _cosine_similarity(self, x, y, eps, view_func): + x = view_func(x) + y = view_func(y) + + x_norm = x.norm(dim=1).add_(eps) + y_norm = y.norm(dim=1).add_(eps) + dot = (x * y).sum(dim=1) + + return dot.abs() / x_norm / y_norm + + def _projection(self, p, grad, perturb, delta, wd_ratio, eps): + wd = 1 + expand_size = [-1] + [1] * (len(p.shape) - 1) + for view_func in [self._channel_view, self._layer_view]: + + cosine_sim = self._cosine_similarity(grad, p.data, eps, view_func) + + if cosine_sim.max() < delta / math.sqrt(view_func(p.data).size(1)): + p_n = p.data / view_func(p.data).norm(dim=1).view(expand_size).add_(eps) + perturb -= p_n * view_func(p_n * perturb).sum(dim=1).view(expand_size) + wd = wd_ratio + + return perturb, wd + + return perturb, wd + + def step(self, closure=None): + loss = None + if closure is not None: + loss = closure() + + for group in self.param_groups: + weight_decay = group['weight_decay'] + momentum = group['momentum'] + dampening = group['dampening'] + nesterov = group['nesterov'] + + for p in group['params']: + if p.grad is None: + continue + grad = p.grad.data + state = self.state[p] + + # State initialization + if len(state) == 0: + state['momentum'] = torch.zeros_like(p.data) + + # SGD + buf = state['momentum'] + buf.mul_(momentum).add_(1 - dampening, grad) + if nesterov: + d_p = grad + momentum * buf + else: + d_p = buf + + # Projection + wd_ratio = 1 + if len(p.shape) > 1: + d_p, wd_ratio = self._projection(p, grad, d_p, group['delta'], group['wd_ratio'], group['eps']) + + # Weight decay + if weight_decay != 0: + p.data.mul_(1 - group['lr'] * group['weight_decay'] * wd_ratio / (1-momentum)) + + # Step + p.data.add_(-group['lr'], d_p) + + return loss diff --git a/timm/scheduler/cosine_lr.py b/timm/scheduler/cosine_lr.py index 15da5757..1532f092 100644 --- a/timm/scheduler/cosine_lr.py +++ b/timm/scheduler/cosine_lr.py @@ -12,7 +12,7 @@ import torch from .scheduler import Scheduler -logger = logging.getLogger(__name__) +_logger = logging.getLogger(__name__) class CosineLRScheduler(Scheduler): @@ -48,7 +48,7 @@ class CosineLRScheduler(Scheduler): assert t_initial > 0 assert lr_min >= 0 if t_initial == 1 and t_mul == 1 and decay_rate == 1: - logger.warning("Cosine annealing scheduler will have no effect on the learning " + _logger.warning("Cosine annealing scheduler will have no effect on the learning " "rate since t_initial = t_mul = eta_mul = 1.") self.t_initial = t_initial self.t_mul = t_mul diff --git a/timm/scheduler/tanh_lr.py b/timm/scheduler/tanh_lr.py index e32cb644..8cc338bb 100644 --- a/timm/scheduler/tanh_lr.py +++ b/timm/scheduler/tanh_lr.py @@ -12,7 +12,7 @@ import torch from .scheduler import Scheduler -logger = logging.getLogger(__name__) +_logger = logging.getLogger(__name__) class TanhLRScheduler(Scheduler): diff --git a/timm/utils.py b/timm/utils.py index 7afdca05..4739064f 100644 --- a/timm/utils.py +++ b/timm/utils.py @@ -14,6 +14,7 @@ import glob import csv import operator import logging +import logging.handlers import numpy as np from collections import OrderedDict try: @@ -26,6 +27,9 @@ except ImportError: from torch import distributed as dist +_logger = logging.getLogger(__name__) + + def unwrap_model(model): if isinstance(model, ModelEma): return unwrap_model(model.ema) @@ -89,7 +93,7 @@ class CheckpointSaver: checkpoints_str = "Current checkpoints:\n" for c in self.checkpoint_files: checkpoints_str += ' {}\n'.format(c) - logging.info(checkpoints_str) + _logger.info(checkpoints_str) if metric is not None and (self.best_metric is None or self.cmp(metric, self.best_metric)): self.best_epoch = epoch @@ -126,10 +130,10 @@ class CheckpointSaver: to_delete = self.checkpoint_files[delete_index:] for d in to_delete: try: - logging.debug("Cleaning checkpoint: {}".format(d)) + _logger.debug("Cleaning checkpoint: {}".format(d)) os.remove(d[0]) except Exception as e: - logging.error("Exception '{}' while deleting checkpoint".format(e)) + _logger.error("Exception '{}' while deleting checkpoint".format(e)) self.checkpoint_files = self.checkpoint_files[:delete_index] def save_recovery(self, model, optimizer, args, epoch, model_ema=None, use_amp=False, batch_idx=0): @@ -139,10 +143,10 @@ class CheckpointSaver: self._save(save_path, model, optimizer, args, epoch, model_ema, use_amp=use_amp) if os.path.exists(self.last_recovery_file): try: - logging.debug("Cleaning recovery: {}".format(self.last_recovery_file)) + _logger.debug("Cleaning recovery: {}".format(self.last_recovery_file)) os.remove(self.last_recovery_file) except Exception as e: - logging.error("Exception '{}' while removing {}".format(e, self.last_recovery_file)) + _logger.error("Exception '{}' while removing {}".format(e, self.last_recovery_file)) self.last_recovery_file = self.curr_recovery_file self.curr_recovery_file = save_path @@ -284,9 +288,9 @@ class ModelEma: name = k new_state_dict[name] = v self.ema.load_state_dict(new_state_dict) - logging.info("Loaded state_dict_ema") + _logger.info("Loaded state_dict_ema") else: - logging.warning("Failed to find state_dict_ema, starting from loaded model weights") + _logger.warning("Failed to find state_dict_ema, starting from loaded model weights") def update(self, model): # correct a mismatch in state dict keys @@ -312,8 +316,13 @@ class FormatterNoInfo(logging.Formatter): return logging.Formatter.format(self, record) -def setup_default_logging(default_level=logging.INFO): +def setup_default_logging(default_level=logging.INFO, log_path=''): console_handler = logging.StreamHandler() console_handler.setFormatter(FormatterNoInfo()) logging.root.addHandler(console_handler) logging.root.setLevel(default_level) + if log_path: + file_handler = logging.handlers.RotatingFileHandler(log_path, maxBytes=(1024 ** 2 * 2), backupCount=3) + file_formatter = logging.Formatter("%(asctime)s - %(name)20s: [%(levelname)8s] - %(message)s") + file_handler.setFormatter(file_formatter) + logging.root.addHandler(file_handler) diff --git a/train.py b/train.py index 126bd5da..bc856f34 100755 --- a/train.py +++ b/train.py @@ -40,6 +40,7 @@ import torch.nn as nn import torchvision.utils torch.backends.cudnn.benchmark = True +_logger = logging.getLogger('train') # The first arg parser parses out only the --config argument, this argument is used to @@ -232,7 +233,7 @@ def main(): if 'WORLD_SIZE' in os.environ: args.distributed = int(os.environ['WORLD_SIZE']) > 1 if args.distributed and args.num_gpu > 1: - logging.warning('Using more than one GPU per process in distributed mode is not allowed. Setting num_gpu to 1.') + _logger.warning('Using more than one GPU per process in distributed mode is not allowed. Setting num_gpu to 1.') args.num_gpu = 1 args.device = 'cuda:0' @@ -248,10 +249,10 @@ def main(): assert args.rank >= 0 if args.distributed: - logging.info('Training in distributed mode with multiple processes, 1 GPU per process. Process %d, total %d.' + _logger.info('Training in distributed mode with multiple processes, 1 GPU per process. Process %d, total %d.' % (args.rank, args.world_size)) else: - logging.info('Training with a single process on %d GPUs.' % args.num_gpu) + _logger.info('Training with a single process on %d GPUs.' % args.num_gpu) torch.manual_seed(args.seed + args.rank) @@ -270,7 +271,7 @@ def main(): checkpoint_path=args.initial_checkpoint) if args.local_rank == 0: - logging.info('Model %s created, param count: %d' % + _logger.info('Model %s created, param count: %d' % (args.model, sum([m.numel() for m in model.parameters()]))) data_config = resolve_data_config(vars(args), model=model, verbose=args.local_rank == 0) @@ -286,7 +287,7 @@ def main(): if args.num_gpu > 1: if args.amp: - logging.warning( + _logger.warning( 'AMP does not work well with nn.DataParallel, disabling. Use distributed mode for multi-GPU AMP.') args.amp = False model = nn.DataParallel(model, device_ids=list(range(args.num_gpu))).cuda() @@ -300,7 +301,7 @@ def main(): model, optimizer = amp.initialize(model, optimizer, opt_level='O1') use_amp = True if args.local_rank == 0: - logging.info('NVIDIA APEX {}. AMP {}.'.format( + _logger.info('NVIDIA APEX {}. AMP {}.'.format( 'installed' if has_apex else 'not installed', 'on' if use_amp else 'off')) # optionally resume from a checkpoint @@ -311,11 +312,11 @@ def main(): if resume_state and not args.no_resume_opt: if 'optimizer' in resume_state: if args.local_rank == 0: - logging.info('Restoring Optimizer state from checkpoint') + _logger.info('Restoring Optimizer state from checkpoint') optimizer.load_state_dict(resume_state['optimizer']) if use_amp and 'amp' in resume_state and 'load_state_dict' in amp.__dict__: if args.local_rank == 0: - logging.info('Restoring NVIDIA AMP state from checkpoint') + _logger.info('Restoring NVIDIA AMP state from checkpoint') amp.load_state_dict(resume_state['amp']) del resume_state @@ -337,16 +338,16 @@ def main(): else: model = torch.nn.SyncBatchNorm.convert_sync_batchnorm(model) if args.local_rank == 0: - logging.info( + _logger.info( 'Converted model to use Synchronized BatchNorm. WARNING: You may have issues if using ' 'zero initialized BN layers (enabled by default for ResNets) while sync-bn enabled.') except Exception as e: - logging.error('Failed to enable Synchronized BatchNorm. Install Apex or Torch >= 1.1') + _logger.error('Failed to enable Synchronized BatchNorm. Install Apex or Torch >= 1.1') if has_apex: model = DDP(model, delay_allreduce=True) else: if args.local_rank == 0: - logging.info("Using torch DistributedDataParallel. Install NVIDIA Apex for Apex DDP.") + _logger.info("Using torch DistributedDataParallel. Install NVIDIA Apex for Apex DDP.") model = DDP(model, device_ids=[args.local_rank]) # can use device str in Torch >= 1.1 # NOTE: EMA model does not need to be wrapped by DDP @@ -361,11 +362,11 @@ def main(): lr_scheduler.step(start_epoch) if args.local_rank == 0: - logging.info('Scheduled epochs: {}'.format(num_epochs)) + _logger.info('Scheduled epochs: {}'.format(num_epochs)) train_dir = os.path.join(args.data, 'train') if not os.path.exists(train_dir): - logging.error('Training folder does not exist at: {}'.format(train_dir)) + _logger.error('Training folder does not exist at: {}'.format(train_dir)) exit(1) dataset_train = Dataset(train_dir) @@ -404,7 +405,7 @@ def main(): if not os.path.isdir(eval_dir): eval_dir = os.path.join(args.data, 'validation') if not os.path.isdir(eval_dir): - logging.error('Validation folder does not exist at: {}'.format(eval_dir)) + _logger.error('Validation folder does not exist at: {}'.format(eval_dir)) exit(1) dataset_eval = Dataset(eval_dir) @@ -468,7 +469,7 @@ def main(): if args.distributed and args.dist_bn in ('broadcast', 'reduce'): if args.local_rank == 0: - logging.info("Distributing BatchNorm running means and vars") + _logger.info("Distributing BatchNorm running means and vars") distribute_bn(model, args.world_size, args.dist_bn == 'reduce') eval_metrics = validate(model, loader_eval, validate_loss_fn, args) @@ -499,7 +500,7 @@ def main(): except KeyboardInterrupt: pass if best_metric is not None: - logging.info('*** Best metric: {0} (epoch {1})'.format(best_metric, best_epoch)) + _logger.info('*** Best metric: {0} (epoch {1})'.format(best_metric, best_epoch)) def train_epoch( @@ -559,7 +560,7 @@ def train_epoch( losses_m.update(reduced_loss.item(), input.size(0)) if args.local_rank == 0: - logging.info( + _logger.info( 'Train: {} [{:>4d}/{} ({:>3.0f}%)] ' 'Loss: {loss.val:>9.6f} ({loss.avg:>6.4f}) ' 'Time: {batch_time.val:.3f}s, {rate:>7.2f}/s ' @@ -647,7 +648,7 @@ def validate(model, loader, loss_fn, args, log_suffix=''): end = time.time() if args.local_rank == 0 and (last_batch or batch_idx % args.log_interval == 0): log_name = 'Test' + log_suffix - logging.info( + _logger.info( '{0}: [{1:>4d}/{2}] ' 'Time: {batch_time.val:.3f} ({batch_time.avg:.3f}) ' 'Loss: {loss.val:>7.4f} ({loss.avg:>6.4f}) ' diff --git a/validate.py b/validate.py index 8d893dba..2e5c89f9 100755 --- a/validate.py +++ b/validate.py @@ -5,7 +5,7 @@ This is intended to be a lean and easily modifiable ImageNet validation script f models or training checkpoints against ImageNet or similarly organized image datasets. It prioritizes canonical PyTorch, standard Python style, and good performance. Repurpose as you see fit. -Hacked together by / Copyright 2020 Ross Wightman (https://github.com/rwightman) +Hacked together by Ross Wightman (https://github.com/rwightman) """ import argparse import os @@ -29,6 +29,8 @@ from timm.data import Dataset, DatasetTar, create_loader, resolve_data_config, R from timm.utils import accuracy, AverageMeter, natural_key, setup_default_logging torch.backends.cudnn.benchmark = True +_logger = logging.getLogger('validate') + parser = argparse.ArgumentParser(description='PyTorch ImageNet Validation') parser.add_argument('data', metavar='DIR', @@ -115,7 +117,7 @@ def validate(args): load_checkpoint(model, args.checkpoint, args.use_ema) param_count = sum([m.numel() for m in model.parameters()]) - logging.info('Model %s created, param count: %d' % (args.model, param_count)) + _logger.info('Model %s created, param count: %d' % (args.model, param_count)) data_config = resolve_data_config(vars(args), model=model) model, test_time_pool = apply_test_time_pool(model, data_config, args) @@ -194,7 +196,7 @@ def validate(args): end = time.time() if batch_idx % args.log_freq == 0: - logging.info( + _logger.info( 'Test: [{0:>4d}/{1}] ' 'Time: {batch_time.val:.3f}s ({batch_time.avg:.3f}s, {rate_avg:>7.2f}/s) ' 'Loss: {loss.val:>7.4f} ({loss.avg:>6.4f}) ' @@ -220,9 +222,9 @@ def validate(args): param_count=round(param_count / 1e6, 2), img_size=data_config['input_size'][-1], cropt_pct=crop_pct, - interpolation=data_config['interpolation'] - )) - logging.info(' * Acc@1 {:.3f} ({:.3f}) Acc@5 {:.3f} ({:.3f})'.format( + interpolation=data_config['interpolation'])) + + _logger.info(' * Acc@1 {:.3f} ({:.3f}) Acc@5 {:.3f} ({:.3f})'.format( results['top1'], results['top1_err'], results['top5'], results['top5_err'])) return results @@ -252,7 +254,7 @@ def main(): if len(model_cfgs): results_file = args.results_file or './results-all.csv' - logging.info('Running bulk validation on these pretrained models: {}'.format(', '.join(model_names))) + _logger.info('Running bulk validation on these pretrained models: {}'.format(', '.join(model_names))) results = [] try: start_batch_size = args.batch_size