diff --git a/configs/eval.yaml b/configs/eval.yaml new file mode 100644 index 00000000..aef709fc --- /dev/null +++ b/configs/eval.yaml @@ -0,0 +1,8 @@ +model: 'tf_efficientnet_b0' # model architecture (default: dpn92) +img_size: 224 # Input image dimension +mean: null # Override mean pixel value of dataset +std: null # Override std deviation of of dataset +num_classes: 2 # Number classes in dataset +checkpoint: 'output/train/20201124-182940-tf_efficientnet_b0-224/model_best.pth.tar' # path to latest checkpoint (default: none) +pretrained: False # use pre-trained model +num_gpu: 1 # Number of GPUS to use diff --git a/configs/inference.yaml b/configs/inference.yaml new file mode 100644 index 00000000..89325d3d --- /dev/null +++ b/configs/inference.yaml @@ -0,0 +1,16 @@ +data: 'dataset/test' # path to dataset +output_dir: 'output/' # path to output files +model: 'tf_efficientnet_b5' # model architecture (default: dpn92) +workers: 4 # number of McD loading workers (default: 2 +batch_size: 256 # mini-batch size (default: 256) +img_size: 224 # Input image dimension +mean: null # Override mean pixel value of dataset +std: null # Override std deviation of of dataset +interpolation: '' # Image resize interpolation type (overrides model) +num_classes: 1000 # Number classes in dataset +log_freq: 10 # batch logging frequency (default: 10) +checkpoint: '' # path to latest checkpoint (default: none) +pretrained: True # use pre-trained model +num_gpu: 1 # Number of GPUS to use +no_test_pool: False # disable test time pool +topk: 5 # Top-k to output to CSV diff --git a/configs/train.yaml b/configs/train.yaml new file mode 100644 index 00000000..61824683 --- /dev/null +++ b/configs/train.yaml @@ -0,0 +1,102 @@ +# Dataset / Model parameters +data: 'dataset/splitted' # path to dataset +model: 'tf_efficientnet_b0' # Name of model to train (default: "countception") +pretrained: True # Start with pretrained version of specified network (if avail) +initial_checkpoint: '' # Initialize model from this checkpoint (default: none) +resume: '' # Resume full model and optimizer state from checkpoint (default: none) +no_resume_opt: False # prevent resume of optimizer state when resuming model +num_classes: 2 # number of label classes (default: 1000) +gp: null # Global pool type, one of (fast, avg, max, avgmax, avgmaxc). Model default if None. +img_size: 224 # Image patch size (default: None => model default) +crop_pct: null # Input image center crop percent (for validation only) +mean: null # Override mean pixel value of dataset +std: null # Override std deviation of of dataset +interpolation: '' # Image resize interpolation type (overrides model) +batch_size: 16 # input batch size for training (default: 32) +validation_batch_size_multiplier: 1 # ratio of validation batch size to training batch size (default: 1) + +# Optimizer parameters +opt: 'Adam' # Optimizer (default: "sgd" +opt_eps: null # Optimizer Epsilon (default: None, use opt default) +opt_betas: null # Optimizer Betas (default: None, use opt default) +momentum: 0.9 # Optimizer momentum (default: 0.9) +weight_decay: 0.0 # weight decay (default: 0.0001) +clip_grad: null # Clip gradient norm (default: None, no clipping) + +# Learning rate schedule parameters +sched: 'plateau' # LR scheduler (default: "step") +lr: 0.0001 # learning rate (default: 0.01) +lr_noise: null # learning rate noise on/off epoch percentages +lr_noise_pct: 0.67 # learning rate noise limit percent (default: 0.67) +lr_noise_std: 1.0 # learning rate noise std-dev (default: 1.0) +lr_cycle_mul: 1.0 # learning rate cycle len multiplier (default: 1.0) +lr_cycle_limit: 1 # learning rate cycle limit +warmup_lr: 0.0001 # warmup learning rate (default: 0.0001) +min_lr: 0.00001 # lower lr bound for cyclic schedulers that hit 0 (1e-5) +epochs: 30 # number of epochs to train (default: 2) +start_epoch: null # manual epoch number (useful on restarts) +decay_epochs: 5 # epoch interval to decay LR +warmup_epochs: 10 # epochs to warmup LR, if scheduler supports +cooldown_epochs: 0 # epochs to cooldown LR at min_lr, after cyclic schedule ends +patience_epochs: 5 # patience epochs for Plateau LR scheduler (default: 10) +decay_rate: 0.1 # LR decay rate (default: 0.1) + +# Augmentation & regularization parameters +no_aug: False # Disable all training augmentation, override other train aug args +scale: [1, 1] # Random resize scale (default: 0.08 1.0) +ratio: [0.8, 1.2] # Random resize aspect ratio (default: 0.75 1.33) +hflip: 0.5 # Horizontal flip training aug probability +vflip: 0.0 # Vertical flip training aug probability +color_jitter: 0.1 # Color jitter factor (default: 0.4) +aa: null # Use AutoAugment policy. "v0" or "original". (default: None) +aug_splits: 0 # Number of augmentation splits (default: 0, valid: 0 or >=2) +jsd: False # Enable Jensen-Shannon Divergence + CE loss. Use with `--aug-splits`. +reprob: 0.0 # Random erase prob (default: 0.) +remode: 'const' # Random erase mode (default: "const") +recount: 1 # Random erase count (default: 1) +resplit: False # Do not random erase first (clean) augmentation split +mixup: 0.0 # mixup alpha, mixup enabled if > 0. (default: 0.) +cutmix: 0.0 # cutmix alpha, cutmix enabled if > 0. (default: 0. +cutmix_minmax: # cutmix min/max ratio, overrides alpha and enables cutmix if set (default: None) +mixup_prob: 1.0 # Probability of performing mixup or cutmix when either/both is enabled +mixup_switch_prob: 0.5 # Probability of switching to cutmix when both mixup and cutmix enabled +mixup_mode: 'batch' # How to apply mixup/cutmix params. Per "batch", "pair", or "elem" +mixup_off_epoch: 0 # Turn off mixup after this epoch, disabled if 0 (default: 0) +smoothing: 0.0 # Label smoothing (default: 0.1) +train_interpolation: 'random' # Training interpolation (random, bilinear, bicubic default: "random" +drop: 0.0 # Dropout rate (default: 0.) +drop_connect: null # Drop connect rate, DEPRECATED, use drop-path (default: None) +drop_path: null # Drop path rate (default: None) +drop_block: null # Drop block rate (default: None) + +# Batch norm parameters (only works with gen_efficientnet based models currently) +bn_tf: bull # Use Tensorflow BatchNorm defaults for models that support it (default: False) +bn_momentum: null # BatchNorm momentum override (if not None) +bn_eps: null # BatchNorm epsilon override (if not None) +sync_bn: False # Enable NVIDIA Apex or Torch synchronized BatchNorm. +dist_bn: '' # Distribute BatchNorm stats between nodes after each epoch ("broadcast", "reduce", or "") +split_bn: False # Enable separate BN layers per augmentation split. + +# Model Exponential Moving Average +model_ema: False # Enable tracking moving average of model weights +model_ema_force_cpu: False # Force ema to be tracked on CPU, rank=0 node only. Disables EMA validation. +model_ema_decay: 0.9998 # decay factor for model weights moving average (default: 0.9998) + +# Misc +seed: 42 # random seed (default: 42) +log_interval: 50 # how many batches to wait before logging training status +recovery_interval: 0 # how many batches to wait before writing recovery checkpoint +workers: 1 # how many training processes to use (default: 1) +num_gpu: 1 # Number of GPUS to use +save_images: False # save images of input bathes every log interval for debugging +amp: False # use NVIDIA Apex AMP or Native AMP for mixed precision training +apex_amp: False # Use NVIDIA Apex AMP mixed precision +native_amp: False # Use Native Torch AMP mixed precision +channels_last: False # Use channels_last memory layout +pin_mem: False # Pin CPU memory in DataLoader for more efficient (sometimes) transfer to GPU. +no_prefetcher: False # disable fast prefetcher +output: '' # path to output folder (default: none, current dir) +eval_metric: 'top1' # Best metric (default: "top1" +tta: 0 # Test/inference time augmentation (oversampling) factor. 0=None (default: 0) +local_rank: 0 # +use_multi_epochs_loader: False # use the multi-epochs-loader to save time at the beginning of every epoch \ No newline at end of file diff --git a/configs/validate.yaml b/configs/validate.yaml new file mode 100644 index 00000000..cf55a513 --- /dev/null +++ b/configs/validate.yaml @@ -0,0 +1,34 @@ +data: 'dataset/splitted/val' # path to dataset +model: 'tf_efficientnet_b0' # Name of model to train (default: "countception" +# path to latest checkpoint (default: none) +checkpoint: 'output/train/tf_efficientnet_b0-224/model_best.pth.tar' + +workers: 4 # number of McD loading workers (default: 2) +batch_size: 16 # mini-batch size (default: 256) +img_size: 224 # Input image dimension, uses model default if empty +crop_pct: null # Input image center crop pct +mean: null # Override mean pixel value of dataset +std: null # Override std deviation of of dataset +interpolation: '' # Image resize interpolation type (overrides model) +num_classes: 2 # Number classes in dataset +class_map: '' # path to class to idx mapping file (default: "") +gp: null # Global pool type, one of (fast, avg, max, avgmax, avgmaxc). Model default if None. +log_freq: 10 # batch logging frequency (default: 10) +pretrained: False # use pre-trained model +num_gpu: 1 # Number of GPUS to use +no_test_pool: False # disable test time pool +no_prefetcher: False # disable fast prefetcher +pin_mem: False # Pin CPU memory in DataLoader for more efficient (sometimes) transfer to GPU +channels_last: False # Use channels_last memory layout +amp: False # Use AMP mixed precision. Defaults to Apex, fallback to native Torch AMP. +apex_amp: False # Use NVIDIA Apex AMP mixed precision +native_amp: False # Use Native Torch AMP mixed precision +tf_preprocessing: False # Use Tensorflow preprocessing pipeline (require CPU TF installed +use_ema: False # use ema version of weights if present +torchscript: False # convert model torchscript for inference +lagacy_jit: False # use legacy jit mode for pytorch 1.5/1.5.1/1.6 to get back fusion performance +results_file: '' # Output csv file for validation results (summary) +real_labels: '' # Real labels JSON file for imagenet evaluation +valid_labels: '' # Valid label indices txt file for validation of partial label space + + diff --git a/eval.py b/eval.py new file mode 100644 index 00000000..db0fa825 --- /dev/null +++ b/eval.py @@ -0,0 +1,105 @@ +#!/usr/bin/env python +"""PyTorch Evaluation Script + +An example evaluation script that outputs results of model evaluation. + +Hacked together by / Copyright 2020 Ross Wightman (https://github.com/rwightman) + +--- Usage: --- + +model = ClassificationModel() +img = Image.open("image.jpg") +out = model.eval(img) +print(out) + +""" +import yaml +from fire import Fire +from addict import Dict + +import torch +from torchvision import transforms + +from timm.models import create_model +from timm.data.constants import IMAGENET_DEFAULT_MEAN, IMAGENET_DEFAULT_STD + +torch.backends.cudnn.benchmark = True + + +def _update_config(config, params): + for k, v in params.items(): + *path, key = k.split(".") + config.update({k: v}) + print(f"Overwriting {k} = {v} (was {config.get(key)})") + return config + + +def _fit(**kwargs): + with open('configs/eval.yaml') as stream: + base_config = yaml.safe_load(stream) + + if "config" in kwargs.keys(): + cfg_path = kwargs["config"] + with open(cfg_path) as cfg: + cfg_yaml = yaml.load(cfg, Loader=yaml.FullLoader) + + merged_cfg = _update_config(base_config, cfg_yaml) + else: + merged_cfg = base_config + + update_cfg = _update_config(merged_cfg, kwargs) + return update_cfg + + +def _parse_args(): + args = Dict(Fire(_fit)) + + # Cache the args as a text string to save them in the output dir later + args_text = yaml.safe_dump(args.__dict__, default_flow_style=False) + return args, args_text + + +class ClassificationModel: + def __init__(self): + self.args, self.args_text = _parse_args() + + # might as well try to do something useful... + self.args.pretrained = self.args.pretrained or not self.args.checkpoint + + # create model + self.model = create_model( + self.args.model, + num_classes=self.args.num_classes, + in_chans=3, + pretrained=self.args.pretrained, + checkpoint_path=self.args.checkpoint) + self.softmax = torch.nn.Softmax(dim=1) + + mean = self.args.mean if self.args.mean is not None else IMAGENET_DEFAULT_MEAN + std = self.args.std if self.args.std is not None else IMAGENET_DEFAULT_STD + self.loader = transforms.Compose([ + transforms.ToTensor(), + transforms.Resize(self.args.img_size), + transforms.Normalize( + mean=torch.tensor(mean), + std=torch.tensor(std)), + ]) + + if self.args.num_gpu > 1: + self.model = torch.nn.DataParallel(self.model, device_ids=list(range(self.args.num_gpu))).cuda() + else: + self.model = self.model.cuda() + # self.model = self.model.cpu() + self.model.eval() + + def eval(self, input): + with torch.no_grad(): + # for OpenCV input + # input = Image.fromarray(np.uint8(input)).convert('RGB') + input = self.loader(input).float() + input = input.cuda() + + labels = self.model(input[None, ...]) + labels = self.softmax(labels) + labels = labels.cpu() + return labels.numpy() diff --git a/inference.py b/inference.py index 16d19944..c15c7b5c 100755 --- a/inference.py +++ b/inference.py @@ -7,8 +7,11 @@ Hacked together by / Copyright 2020 Ross Wightman (https://github.com/rwightman) """ import os import time -import argparse import logging +import yaml +from fire import Fire +from addict import Dict + import numpy as np import torch @@ -20,44 +23,42 @@ torch.backends.cudnn.benchmark = True _logger = logging.getLogger('inference') -parser = argparse.ArgumentParser(description='PyTorch ImageNet Inference') -parser.add_argument('data', metavar='DIR', - help='path to dataset') -parser.add_argument('--output_dir', metavar='DIR', default='./', - help='path to output files') -parser.add_argument('--model', '-m', metavar='MODEL', default='dpn92', - help='model architecture (default: dpn92)') -parser.add_argument('-j', '--workers', default=2, type=int, metavar='N', - help='number of data loading workers (default: 2)') -parser.add_argument('-b', '--batch-size', default=256, type=int, - metavar='N', help='mini-batch size (default: 256)') -parser.add_argument('--img-size', default=None, type=int, - metavar='N', help='Input image dimension') -parser.add_argument('--mean', type=float, nargs='+', default=None, metavar='MEAN', - help='Override mean pixel value of dataset') -parser.add_argument('--std', type=float, nargs='+', default=None, metavar='STD', - help='Override std deviation of of dataset') -parser.add_argument('--interpolation', default='', type=str, metavar='NAME', - help='Image resize interpolation type (overrides model)') -parser.add_argument('--num-classes', type=int, default=1000, - help='Number classes in dataset') -parser.add_argument('--log-freq', default=10, type=int, - metavar='N', help='batch logging frequency (default: 10)') -parser.add_argument('--checkpoint', default='', type=str, metavar='PATH', - help='path to latest checkpoint (default: none)') -parser.add_argument('--pretrained', dest='pretrained', action='store_true', - help='use pre-trained model') -parser.add_argument('--num-gpu', type=int, default=1, - help='Number of GPUS to use') -parser.add_argument('--no-test-pool', dest='no_test_pool', action='store_true', - help='disable test time pool') -parser.add_argument('--topk', default=5, type=int, - metavar='N', help='Top-k to output to CSV') +def _update_config(config, params): + for k, v in params.items(): + *path, key = k.split(".") + config.update({k: v}) + print(f"Overwriting {k} = {v} (was {config.get(key)})") + return config + + +def _fit(**kwargs): + with open('configs/inference.yaml') as stream: + base_config = yaml.safe_load(stream) + + if "config" in kwargs.keys(): + cfg_path = kwargs["config"] + with open(cfg_path) as cfg: + cfg_yaml = yaml.load(cfg, Loader=yaml.FullLoader) + + merged_cfg = _update_config(base_config, cfg_yaml) + else: + merged_cfg = base_config + + update_cfg = _update_config(merged_cfg, kwargs) + return update_cfg + + +def _parse_args(): + args = Dict(Fire(_fit)) + + # Cache the args as a text string to save them in the output dir later + args_text = yaml.safe_dump(args.__dict__, default_flow_style=False) + return args, args_text def main(): setup_default_logging() - args = parser.parse_args() + args, args_text = _parse_args() # might as well try to do something useful... args.pretrained = args.pretrained or not args.checkpoint diff --git a/requirements.txt b/requirements.txt index 2d29a27c..540195d8 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,3 +1,5 @@ torch>=1.4.0 torchvision>=0.5.0 pyyaml +fire +addict diff --git a/train.py b/train.py index ef3adf85..4e90a0bc 100755 --- a/train.py +++ b/train.py @@ -14,7 +14,6 @@ NVIDIA CUDA specific speedups adopted from NVIDIA Apex examples Hacked together by / Copyright 2020 Ross Wightman (https://github.com/rwightman) """ -import argparse import time import yaml import os @@ -22,6 +21,10 @@ import logging from collections import OrderedDict from contextlib import suppress from datetime import datetime +from fire import Fire +from addict import Dict +import numpy as np +import random import torch import torch.nn as nn @@ -54,229 +57,54 @@ except AttributeError: torch.backends.cudnn.benchmark = True _logger = logging.getLogger('train') -# The first arg parser parses out only the --config argument, this argument is used to -# load a yaml file containing key-values that override the defaults for the main parser below -config_parser = parser = argparse.ArgumentParser(description='Training Config', add_help=False) -parser.add_argument('-c', '--config', default='', type=str, metavar='FILE', - help='YAML config file specifying default arguments') - - -parser = argparse.ArgumentParser(description='PyTorch ImageNet Training') - -# Dataset / Model parameters -parser.add_argument('data', metavar='DIR', - help='path to dataset') -parser.add_argument('--model', default='resnet101', type=str, metavar='MODEL', - help='Name of model to train (default: "countception"') -parser.add_argument('--pretrained', action='store_true', default=False, - help='Start with pretrained version of specified network (if avail)') -parser.add_argument('--initial-checkpoint', default='', type=str, metavar='PATH', - help='Initialize model from this checkpoint (default: none)') -parser.add_argument('--resume', default='', type=str, metavar='PATH', - help='Resume full model and optimizer state from checkpoint (default: none)') -parser.add_argument('--no-resume-opt', action='store_true', default=False, - help='prevent resume of optimizer state when resuming model') -parser.add_argument('--num-classes', type=int, default=1000, metavar='N', - help='number of label classes (default: 1000)') -parser.add_argument('--gp', default=None, type=str, metavar='POOL', - help='Global pool type, one of (fast, avg, max, avgmax, avgmaxc). Model default if None.') -parser.add_argument('--img-size', type=int, default=None, metavar='N', - help='Image patch size (default: None => model default)') -parser.add_argument('--crop-pct', default=None, type=float, - metavar='N', help='Input image center crop percent (for validation only)') -parser.add_argument('--mean', type=float, nargs='+', default=None, metavar='MEAN', - help='Override mean pixel value of dataset') -parser.add_argument('--std', type=float, nargs='+', default=None, metavar='STD', - help='Override std deviation of of dataset') -parser.add_argument('--interpolation', default='', type=str, metavar='NAME', - help='Image resize interpolation type (overrides model)') -parser.add_argument('-b', '--batch-size', type=int, default=32, metavar='N', - help='input batch size for training (default: 32)') -parser.add_argument('-vb', '--validation-batch-size-multiplier', type=int, default=1, metavar='N', - help='ratio of validation batch size to training batch size (default: 1)') - -# Optimizer parameters -parser.add_argument('--opt', default='sgd', type=str, metavar='OPTIMIZER', - help='Optimizer (default: "sgd"') -parser.add_argument('--opt-eps', default=None, type=float, metavar='EPSILON', - help='Optimizer Epsilon (default: None, use opt default)') -parser.add_argument('--opt-betas', default=None, type=float, nargs='+', metavar='BETA', - help='Optimizer Betas (default: None, use opt default)') -parser.add_argument('--momentum', type=float, default=0.9, metavar='M', - help='Optimizer momentum (default: 0.9)') -parser.add_argument('--weight-decay', type=float, default=0.0001, - help='weight decay (default: 0.0001)') -parser.add_argument('--clip-grad', type=float, default=None, metavar='NORM', - help='Clip gradient norm (default: None, no clipping)') - - - -# Learning rate schedule parameters -parser.add_argument('--sched', default='step', type=str, metavar='SCHEDULER', - help='LR scheduler (default: "step"') -parser.add_argument('--lr', type=float, default=0.01, metavar='LR', - help='learning rate (default: 0.01)') -parser.add_argument('--lr-noise', type=float, nargs='+', default=None, metavar='pct, pct', - help='learning rate noise on/off epoch percentages') -parser.add_argument('--lr-noise-pct', type=float, default=0.67, metavar='PERCENT', - help='learning rate noise limit percent (default: 0.67)') -parser.add_argument('--lr-noise-std', type=float, default=1.0, metavar='STDDEV', - help='learning rate noise std-dev (default: 1.0)') -parser.add_argument('--lr-cycle-mul', type=float, default=1.0, metavar='MULT', - help='learning rate cycle len multiplier (default: 1.0)') -parser.add_argument('--lr-cycle-limit', type=int, default=1, metavar='N', - help='learning rate cycle limit') -parser.add_argument('--warmup-lr', type=float, default=0.0001, metavar='LR', - help='warmup learning rate (default: 0.0001)') -parser.add_argument('--min-lr', type=float, default=1e-5, metavar='LR', - help='lower lr bound for cyclic schedulers that hit 0 (1e-5)') -parser.add_argument('--epochs', type=int, default=200, metavar='N', - help='number of epochs to train (default: 2)') -parser.add_argument('--start-epoch', default=None, type=int, metavar='N', - help='manual epoch number (useful on restarts)') -parser.add_argument('--decay-epochs', type=float, default=30, metavar='N', - help='epoch interval to decay LR') -parser.add_argument('--warmup-epochs', type=int, default=3, metavar='N', - help='epochs to warmup LR, if scheduler supports') -parser.add_argument('--cooldown-epochs', type=int, default=10, metavar='N', - help='epochs to cooldown LR at min_lr, after cyclic schedule ends') -parser.add_argument('--patience-epochs', type=int, default=10, metavar='N', - help='patience epochs for Plateau LR scheduler (default: 10') -parser.add_argument('--decay-rate', '--dr', type=float, default=0.1, metavar='RATE', - help='LR decay rate (default: 0.1)') - -# Augmentation & regularization parameters -parser.add_argument('--no-aug', action='store_true', default=False, - help='Disable all training augmentation, override other train aug args') -parser.add_argument('--scale', type=float, nargs='+', default=[0.08, 1.0], metavar='PCT', - help='Random resize scale (default: 0.08 1.0)') -parser.add_argument('--ratio', type=float, nargs='+', default=[3./4., 4./3.], metavar='RATIO', - help='Random resize aspect ratio (default: 0.75 1.33)') -parser.add_argument('--hflip', type=float, default=0.5, - help='Horizontal flip training aug probability') -parser.add_argument('--vflip', type=float, default=0., - help='Vertical flip training aug probability') -parser.add_argument('--color-jitter', type=float, default=0.4, metavar='PCT', - help='Color jitter factor (default: 0.4)') -parser.add_argument('--aa', type=str, default=None, metavar='NAME', - help='Use AutoAugment policy. "v0" or "original". (default: None)'), -parser.add_argument('--aug-splits', type=int, default=0, - help='Number of augmentation splits (default: 0, valid: 0 or >=2)') -parser.add_argument('--jsd', action='store_true', default=False, - help='Enable Jensen-Shannon Divergence + CE loss. Use with `--aug-splits`.') -parser.add_argument('--reprob', type=float, default=0., metavar='PCT', - help='Random erase prob (default: 0.)') -parser.add_argument('--remode', type=str, default='const', - help='Random erase mode (default: "const")') -parser.add_argument('--recount', type=int, default=1, - help='Random erase count (default: 1)') -parser.add_argument('--resplit', action='store_true', default=False, - help='Do not random erase first (clean) augmentation split') -parser.add_argument('--mixup', type=float, default=0.0, - help='mixup alpha, mixup enabled if > 0. (default: 0.)') -parser.add_argument('--cutmix', type=float, default=0.0, - help='cutmix alpha, cutmix enabled if > 0. (default: 0.)') -parser.add_argument('--cutmix-minmax', type=float, nargs='+', default=None, - help='cutmix min/max ratio, overrides alpha and enables cutmix if set (default: None)') -parser.add_argument('--mixup-prob', type=float, default=1.0, - help='Probability of performing mixup or cutmix when either/both is enabled') -parser.add_argument('--mixup-switch-prob', type=float, default=0.5, - help='Probability of switching to cutmix when both mixup and cutmix enabled') -parser.add_argument('--mixup-mode', type=str, default='batch', - help='How to apply mixup/cutmix params. Per "batch", "pair", or "elem"') -parser.add_argument('--mixup-off-epoch', default=0, type=int, metavar='N', - help='Turn off mixup after this epoch, disabled if 0 (default: 0)') -parser.add_argument('--smoothing', type=float, default=0.1, - help='Label smoothing (default: 0.1)') -parser.add_argument('--train-interpolation', type=str, default='random', - help='Training interpolation (random, bilinear, bicubic default: "random")') -parser.add_argument('--drop', type=float, default=0.0, metavar='PCT', - help='Dropout rate (default: 0.)') -parser.add_argument('--drop-connect', type=float, default=None, metavar='PCT', - help='Drop connect rate, DEPRECATED, use drop-path (default: None)') -parser.add_argument('--drop-path', type=float, default=None, metavar='PCT', - help='Drop path rate (default: None)') -parser.add_argument('--drop-block', type=float, default=None, metavar='PCT', - help='Drop block rate (default: None)') - -# Batch norm parameters (only works with gen_efficientnet based models currently) -parser.add_argument('--bn-tf', action='store_true', default=False, - help='Use Tensorflow BatchNorm defaults for models that support it (default: False)') -parser.add_argument('--bn-momentum', type=float, default=None, - help='BatchNorm momentum override (if not None)') -parser.add_argument('--bn-eps', type=float, default=None, - help='BatchNorm epsilon override (if not None)') -parser.add_argument('--sync-bn', action='store_true', - help='Enable NVIDIA Apex or Torch synchronized BatchNorm.') -parser.add_argument('--dist-bn', type=str, default='', - help='Distribute BatchNorm stats between nodes after each epoch ("broadcast", "reduce", or "")') -parser.add_argument('--split-bn', action='store_true', - help='Enable separate BN layers per augmentation split.') - -# Model Exponential Moving Average -parser.add_argument('--model-ema', action='store_true', default=False, - help='Enable tracking moving average of model weights') -parser.add_argument('--model-ema-force-cpu', action='store_true', default=False, - help='Force ema to be tracked on CPU, rank=0 node only. Disables EMA validation.') -parser.add_argument('--model-ema-decay', type=float, default=0.9998, - help='decay factor for model weights moving average (default: 0.9998)') - -# Misc -parser.add_argument('--seed', type=int, default=42, metavar='S', - help='random seed (default: 42)') -parser.add_argument('--log-interval', type=int, default=50, metavar='N', - help='how many batches to wait before logging training status') -parser.add_argument('--recovery-interval', type=int, default=0, metavar='N', - help='how many batches to wait before writing recovery checkpoint') -parser.add_argument('-j', '--workers', type=int, default=4, metavar='N', - help='how many training processes to use (default: 1)') -parser.add_argument('--num-gpu', type=int, default=1, - help='Number of GPUS to use') -parser.add_argument('--save-images', action='store_true', default=False, - help='save images of input bathes every log interval for debugging') -parser.add_argument('--amp', action='store_true', default=False, - help='use NVIDIA Apex AMP or Native AMP for mixed precision training') -parser.add_argument('--apex-amp', action='store_true', default=False, - help='Use NVIDIA Apex AMP mixed precision') -parser.add_argument('--native-amp', action='store_true', default=False, - help='Use Native Torch AMP mixed precision') -parser.add_argument('--channels-last', action='store_true', default=False, - help='Use channels_last memory layout') -parser.add_argument('--pin-mem', action='store_true', default=False, - help='Pin CPU memory in DataLoader for more efficient (sometimes) transfer to GPU.') -parser.add_argument('--no-prefetcher', action='store_true', default=False, - help='disable fast prefetcher') -parser.add_argument('--output', default='', type=str, metavar='PATH', - help='path to output folder (default: none, current dir)') -parser.add_argument('--eval-metric', default='top1', type=str, metavar='EVAL_METRIC', - help='Best metric (default: "top1"') -parser.add_argument('--tta', type=int, default=0, metavar='N', - help='Test/inference time augmentation (oversampling) factor. 0=None (default: 0)') -parser.add_argument("--local_rank", default=0, type=int) -parser.add_argument('--use-multi-epochs-loader', action='store_true', default=False, - help='use the multi-epochs-loader to save time at the beginning of every epoch') +def _update_config(config, params): + for k, v in params.items(): + *path, key = k.split(".") + config.update({k: v}) + print(f"Overwriting {k} = {v} (was {config.get(key)})") + return config -def _parse_args(): - # Do we have a config file to parse? - args_config, remaining = config_parser.parse_known_args() - if args_config.config: - with open(args_config.config, 'r') as f: - cfg = yaml.safe_load(f) - parser.set_defaults(**cfg) - # The main arg parser parses the rest of the args, the usual - # defaults will have been overridden if config file specified. - args = parser.parse_args(remaining) +def _fit(**kwargs): + with open('configs/train.yaml') as stream: + base_config = yaml.safe_load(stream) + + if "config" in kwargs.keys(): + cfg_path = kwargs["config"] + with open(cfg_path) as cfg: + cfg_yaml = yaml.load(cfg, Loader=yaml.FullLoader) + + merged_cfg = _update_config(base_config, cfg_yaml) + else: + merged_cfg = base_config + + update_cfg = _update_config(merged_cfg, kwargs) + return update_cfg + + +def _parse_args(): + args = Dict(Fire(_fit)) # Cache the args as a text string to save them in the output dir later args_text = yaml.safe_dump(args.__dict__, default_flow_style=False) return args, args_text +def set_deterministic(seed=42, precision=13): + np.random.seed(seed) + random.seed(seed) + # torch.backends.cudnn.benchmarks = False + # torch.backends.cudnn.deterministic = True + torch.cuda.manual_seed_all(seed) + torch.manual_seed(seed) + torch.set_printoptions(precision=precision) + + def main(): setup_default_logging() args, args_text = _parse_args() + set_deterministic(args.seed) args.prefetcher = not args.no_prefetcher args.distributed = False diff --git a/validate.py b/validate.py index 5a0d388c..968d0e5d 100755 --- a/validate.py +++ b/validate.py @@ -7,17 +7,19 @@ canonical PyTorch, standard Python style, and good performance. Repurpose as you Hacked together by Ross Wightman (https://github.com/rwightman) """ -import argparse import os import csv import glob import time +import yaml import logging import torch import torch.nn as nn import torch.nn.parallel from collections import OrderedDict from contextlib import suppress +from fire import Fire +from addict import Dict from timm.models import create_model, apply_test_time_pool, load_checkpoint, is_model, list_models from timm.data import Dataset, DatasetTar, create_loader, resolve_data_config, RealLabelsImagenet @@ -41,67 +43,37 @@ torch.backends.cudnn.benchmark = True _logger = logging.getLogger('validate') -parser = argparse.ArgumentParser(description='PyTorch ImageNet Validation') -parser.add_argument('data', metavar='DIR', - help='path to dataset') -parser.add_argument('--model', '-m', metavar='MODEL', default='dpn92', - help='model architecture (default: dpn92)') -parser.add_argument('-j', '--workers', default=4, type=int, metavar='N', - help='number of data loading workers (default: 2)') -parser.add_argument('-b', '--batch-size', default=256, type=int, - metavar='N', help='mini-batch size (default: 256)') -parser.add_argument('--img-size', default=None, type=int, - metavar='N', help='Input image dimension, uses model default if empty') -parser.add_argument('--crop-pct', default=None, type=float, - metavar='N', help='Input image center crop pct') -parser.add_argument('--mean', type=float, nargs='+', default=None, metavar='MEAN', - help='Override mean pixel value of dataset') -parser.add_argument('--std', type=float, nargs='+', default=None, metavar='STD', - help='Override std deviation of of dataset') -parser.add_argument('--interpolation', default='', type=str, metavar='NAME', - help='Image resize interpolation type (overrides model)') -parser.add_argument('--num-classes', type=int, default=1000, - help='Number classes in dataset') -parser.add_argument('--class-map', default='', type=str, metavar='FILENAME', - help='path to class to idx mapping file (default: "")') -parser.add_argument('--gp', default=None, type=str, metavar='POOL', - help='Global pool type, one of (fast, avg, max, avgmax, avgmaxc). Model default if None.') -parser.add_argument('--log-freq', default=10, type=int, - metavar='N', help='batch logging frequency (default: 10)') -parser.add_argument('--checkpoint', default='', type=str, metavar='PATH', - help='path to latest checkpoint (default: none)') -parser.add_argument('--pretrained', dest='pretrained', action='store_true', - help='use pre-trained model') -parser.add_argument('--num-gpu', type=int, default=1, - help='Number of GPUS to use') -parser.add_argument('--no-test-pool', dest='no_test_pool', action='store_true', - help='disable test time pool') -parser.add_argument('--no-prefetcher', action='store_true', default=False, - help='disable fast prefetcher') -parser.add_argument('--pin-mem', action='store_true', default=False, - help='Pin CPU memory in DataLoader for more efficient (sometimes) transfer to GPU.') -parser.add_argument('--channels-last', action='store_true', default=False, - help='Use channels_last memory layout') -parser.add_argument('--amp', action='store_true', default=False, - help='Use AMP mixed precision. Defaults to Apex, fallback to native Torch AMP.') -parser.add_argument('--apex-amp', action='store_true', default=False, - help='Use NVIDIA Apex AMP mixed precision') -parser.add_argument('--native-amp', action='store_true', default=False, - help='Use Native Torch AMP mixed precision') -parser.add_argument('--tf-preprocessing', action='store_true', default=False, - help='Use Tensorflow preprocessing pipeline (require CPU TF installed') -parser.add_argument('--use-ema', dest='use_ema', action='store_true', - help='use ema version of weights if present') -parser.add_argument('--torchscript', dest='torchscript', action='store_true', - help='convert model torchscript for inference') -parser.add_argument('--legacy-jit', dest='legacy_jit', action='store_true', - help='use legacy jit mode for pytorch 1.5/1.5.1/1.6 to get back fusion performance') -parser.add_argument('--results-file', default='', type=str, metavar='FILENAME', - help='Output csv file for validation results (summary)') -parser.add_argument('--real-labels', default='', type=str, metavar='FILENAME', - help='Real labels JSON file for imagenet evaluation') -parser.add_argument('--valid-labels', default='', type=str, metavar='FILENAME', - help='Valid label indices txt file for validation of partial label space') +def _update_config(config, params): + for k, v in params.items(): + *path, key = k.split(".") + config.update({k: v}) + print(f"Overwriting {k} = {v} (was {config.get(key)})") + return config + + +def _fit(**kwargs): + with open('configs/validate.yaml') as stream: + base_config = yaml.safe_load(stream) + + if "config" in kwargs.keys(): + cfg_path = kwargs["config"] + with open(cfg_path) as cfg: + cfg_yaml = yaml.load(cfg, Loader=yaml.FullLoader) + + merged_cfg = _update_config(base_config, cfg_yaml) + else: + merged_cfg = base_config + + update_cfg = _update_config(merged_cfg, kwargs) + return update_cfg + + +def _parse_args(): + args = Dict(Fire(_fit)) + + # Cache the args as a text string to save them in the output dir later + args_text = yaml.safe_dump(args.__dict__, default_flow_style=False) + return args, args_text def validate(args): @@ -254,14 +226,15 @@ def validate(args): interpolation=data_config['interpolation']) _logger.info(' * Acc@1 {:.3f} ({:.3f}) Acc@5 {:.3f} ({:.3f})'.format( - results['top1'], results['top1_err'], results['top5'], results['top5_err'])) + results['top1'], results['top1_err'], results['top5'], results['top5_err'])) return results def main(): setup_default_logging() - args = parser.parse_args() + args, args_text = _parse_args() + model_cfgs = [] model_names = [] if os.path.isdir(args.checkpoint):