#!/usr/bin/env python """ ImageNet Validation Script This is intended to be a lean and easily modifiable ImageNet validation script for evaluating pretrained models or training checkpoints against ImageNet or similarly organized image datasets. It prioritizes canonical PyTorch, standard Python style, and good performance. Repurpose as you see fit. Hacked together by Ross Wightman (https://github.com/rwightman) """ import os import csv import glob import time import yaml import logging import torch import torch.nn as nn import torch.nn.parallel from collections import OrderedDict from contextlib import suppress from fire import Fire from addict import Dict from timm.models import create_model, apply_test_time_pool, load_checkpoint, is_model, list_models from timm.data import Dataset, DatasetTar, create_loader, resolve_data_config, RealLabelsImagenet from timm.utils import accuracy, AverageMeter, natural_key, setup_default_logging, set_jit_legacy has_apex = False try: from apex import amp has_apex = True except ImportError: pass has_native_amp = False try: if getattr(torch.cuda.amp, 'autocast') is not None: has_native_amp = True except AttributeError: pass torch.backends.cudnn.benchmark = True _logger = logging.getLogger('validate') def _update_config(config, params): for k, v in params.items(): *path, key = k.split(".") config.update({k: v}) print(f"Overwriting {k} = {v} (was {config.get(key)})") return config def _fit(**kwargs): with open('configs/validate.yaml') as stream: base_config = yaml.safe_load(stream) if "config" in kwargs.keys(): cfg_path = kwargs["config"] with open(cfg_path) as cfg: cfg_yaml = yaml.load(cfg, Loader=yaml.FullLoader) merged_cfg = _update_config(base_config, cfg_yaml) else: merged_cfg = base_config update_cfg = _update_config(merged_cfg, kwargs) return update_cfg def _parse_args(): args = Dict(Fire(_fit)) # Cache the args as a text string to save them in the output dir later args_text = yaml.safe_dump(args.__dict__, default_flow_style=False) return args, args_text def validate(args): # might as well try to validate something args.pretrained = args.pretrained or not args.checkpoint args.prefetcher = not args.no_prefetcher amp_autocast = suppress # do nothing if args.amp: if has_apex: args.apex_amp = True elif has_native_amp: args.native_amp = True else: _logger.warning("Neither APEX or Native Torch AMP is available, using FP32.") assert not args.apex_amp or not args.native_amp, "Only one AMP mode should be set." if args.native_amp: amp_autocast = torch.cuda.amp.autocast if args.legacy_jit: set_jit_legacy() # create model model = create_model( args.model, pretrained=args.pretrained, num_classes=args.num_classes, in_chans=3, global_pool=args.gp, scriptable=args.torchscript) if args.checkpoint: load_checkpoint(model, args.checkpoint, args.use_ema) param_count = sum([m.numel() for m in model.parameters()]) _logger.info('Model %s created, param count: %d' % (args.model, param_count)) data_config = resolve_data_config(vars(args), model=model) model, test_time_pool = (model, False) if args.no_test_pool else apply_test_time_pool(model, data_config) if args.torchscript: torch.jit.optimized_execution(True) model = torch.jit.script(model) model = model.cuda() if args.apex_amp: model = amp.initialize(model, opt_level='O1') if args.channels_last: model = model.to(memory_format=torch.channels_last) if args.num_gpu > 1: model = torch.nn.DataParallel(model, device_ids=list(range(args.num_gpu))) criterion = nn.CrossEntropyLoss().cuda() if os.path.splitext(args.data)[1] == '.tar' and os.path.isfile(args.data): dataset = DatasetTar(args.data, load_bytes=args.tf_preprocessing, class_map=args.class_map) else: dataset = Dataset(args.data, load_bytes=args.tf_preprocessing, class_map=args.class_map) if args.valid_labels: with open(args.valid_labels, 'r') as f: valid_labels = {int(line.rstrip()) for line in f} valid_labels = [i in valid_labels for i in range(args.num_classes)] else: valid_labels = None if args.real_labels: real_labels = RealLabelsImagenet(dataset.filenames(basename=True), real_json=args.real_labels) else: real_labels = None crop_pct = 1.0 if test_time_pool else data_config['crop_pct'] loader = create_loader( dataset, input_size=data_config['input_size'], batch_size=args.batch_size, use_prefetcher=args.prefetcher, interpolation=data_config['interpolation'], mean=data_config['mean'], std=data_config['std'], num_workers=args.workers, crop_pct=crop_pct, pin_memory=args.pin_mem, tf_preprocessing=args.tf_preprocessing) batch_time = AverageMeter() losses = AverageMeter() top1 = AverageMeter() top5 = AverageMeter() model.eval() with torch.no_grad(): # warmup, reduce variability of first batch time, especially for comparing torchscript vs non input = torch.randn((args.batch_size,) + data_config['input_size']).cuda() if args.channels_last: input = input.contiguous(memory_format=torch.channels_last) model(input) end = time.time() for batch_idx, (input, target) in enumerate(loader): if args.no_prefetcher: target = target.cuda() input = input.cuda() if args.channels_last: input = input.contiguous(memory_format=torch.channels_last) # compute output with amp_autocast(): output = model(input) if valid_labels is not None: output = output[:, valid_labels] loss = criterion(output, target) if real_labels is not None: real_labels.add_result(output) # measure accuracy and record loss acc1, acc5 = accuracy(output.detach(), target, topk=(1, 5)) losses.update(loss.item(), input.size(0)) top1.update(acc1.item(), input.size(0)) top5.update(acc5.item(), input.size(0)) # measure elapsed time batch_time.update(time.time() - end) end = time.time() if batch_idx % args.log_freq == 0: _logger.info( 'Test: [{0:>4d}/{1}] ' 'Time: {batch_time.val:.3f}s ({batch_time.avg:.3f}s, {rate_avg:>7.2f}/s) ' 'Loss: {loss.val:>7.4f} ({loss.avg:>6.4f}) ' 'Acc@1: {top1.val:>7.3f} ({top1.avg:>7.3f}) ' 'Acc@5: {top5.val:>7.3f} ({top5.avg:>7.3f})'.format( batch_idx, len(loader), batch_time=batch_time, rate_avg=input.size(0) / batch_time.avg, loss=losses, top1=top1, top5=top5)) if real_labels is not None: # real labels mode replaces topk values at the end top1a, top5a = real_labels.get_accuracy(k=1), real_labels.get_accuracy(k=5) else: top1a, top5a = top1.avg, top5.avg results = OrderedDict( top1=round(top1a, 4), top1_err=round(100 - top1a, 4), top5=round(top5a, 4), top5_err=round(100 - top5a, 4), param_count=round(param_count / 1e6, 2), img_size=data_config['input_size'][-1], cropt_pct=crop_pct, interpolation=data_config['interpolation']) _logger.info(' * Acc@1 {:.3f} ({:.3f}) Acc@5 {:.3f} ({:.3f})'.format( results['top1'], results['top1_err'], results['top5'], results['top5_err'])) return results def main(): setup_default_logging() args, args_text = _parse_args() model_cfgs = [] model_names = [] if os.path.isdir(args.checkpoint): # validate all checkpoints in a path with same model checkpoints = glob.glob(args.checkpoint + '/*.pth.tar') checkpoints += glob.glob(args.checkpoint + '/*.pth') model_names = list_models(args.model) model_cfgs = [(args.model, c) for c in sorted(checkpoints, key=natural_key)] else: if args.model == 'all': # validate all models in a list of names with pretrained checkpoints args.pretrained = True model_names = list_models(pretrained=True) model_cfgs = [(n, '') for n in model_names] elif not is_model(args.model): # model name doesn't exist, try as wildcard filter model_names = list_models(args.model) model_cfgs = [(n, '') for n in model_names] if len(model_cfgs): results_file = args.results_file or './results-all.csv' _logger.info('Running bulk validation on these pretrained models: {}'.format(', '.join(model_names))) results = [] try: start_batch_size = args.batch_size for m, c in model_cfgs: batch_size = start_batch_size args.model = m args.checkpoint = c result = OrderedDict(model=args.model) r = {} while not r and batch_size >= args.num_gpu: torch.cuda.empty_cache() try: args.batch_size = batch_size print('Validating with batch size: %d' % args.batch_size) r = validate(args) except RuntimeError as e: if batch_size <= args.num_gpu: print("Validation failed with no ability to reduce batch size. Exiting.") raise e batch_size = max(batch_size // 2, args.num_gpu) print("Validation failed, reducing batch size by 50%") result.update(r) if args.checkpoint: result['checkpoint'] = args.checkpoint results.append(result) except KeyboardInterrupt as e: pass results = sorted(results, key=lambda x: x['top1'], reverse=True) if len(results): write_results(results_file, results) else: validate(args) def write_results(results_file, results): with open(results_file, mode='w') as cf: dw = csv.DictWriter(cf, fieldnames=results[0].keys()) dw.writeheader() for r in results: dw.writerow(r) cf.flush() if __name__ == '__main__': main()