|
|
@ -79,8 +79,8 @@ parser.add_argument('--train-split', metavar='NAME', default='train',
|
|
|
|
help='dataset train split (default: train)')
|
|
|
|
help='dataset train split (default: train)')
|
|
|
|
parser.add_argument('--val-split', metavar='NAME', default='validation',
|
|
|
|
parser.add_argument('--val-split', metavar='NAME', default='validation',
|
|
|
|
help='dataset validation split (default: validation)')
|
|
|
|
help='dataset validation split (default: validation)')
|
|
|
|
parser.add_argument('--model', default='resnet101', type=str, metavar='MODEL',
|
|
|
|
parser.add_argument('--model', default='resnet50', type=str, metavar='MODEL',
|
|
|
|
help='Name of model to train (default: "countception"')
|
|
|
|
help='Name of model to train (default: "resnet50"')
|
|
|
|
parser.add_argument('--pretrained', action='store_true', default=False,
|
|
|
|
parser.add_argument('--pretrained', action='store_true', default=False,
|
|
|
|
help='Start with pretrained version of specified network (if avail)')
|
|
|
|
help='Start with pretrained version of specified network (if avail)')
|
|
|
|
parser.add_argument('--initial-checkpoint', default='', type=str, metavar='PATH',
|
|
|
|
parser.add_argument('--initial-checkpoint', default='', type=str, metavar='PATH',
|
|
|
@ -105,10 +105,10 @@ parser.add_argument('--std', type=float, nargs='+', default=None, metavar='STD',
|
|
|
|
help='Override std deviation of of dataset')
|
|
|
|
help='Override std deviation of of dataset')
|
|
|
|
parser.add_argument('--interpolation', default='', type=str, metavar='NAME',
|
|
|
|
parser.add_argument('--interpolation', default='', type=str, metavar='NAME',
|
|
|
|
help='Image resize interpolation type (overrides model)')
|
|
|
|
help='Image resize interpolation type (overrides model)')
|
|
|
|
parser.add_argument('-b', '--batch-size', type=int, default=32, metavar='N',
|
|
|
|
parser.add_argument('-b', '--batch-size', type=int, default=128, metavar='N',
|
|
|
|
help='input batch size for training (default: 32)')
|
|
|
|
help='input batch size for training (default: 128)')
|
|
|
|
parser.add_argument('-vb', '--validation-batch-size-multiplier', type=int, default=1, metavar='N',
|
|
|
|
parser.add_argument('-vb', '--validation-batch-size', type=int, default=None, metavar='N',
|
|
|
|
help='ratio of validation batch size to training batch size (default: 1)')
|
|
|
|
help='validation batch size override (default: None)')
|
|
|
|
|
|
|
|
|
|
|
|
# Optimizer parameters
|
|
|
|
# Optimizer parameters
|
|
|
|
parser.add_argument('--opt', default='sgd', type=str, metavar='OPTIMIZER',
|
|
|
|
parser.add_argument('--opt', default='sgd', type=str, metavar='OPTIMIZER',
|
|
|
@ -119,8 +119,8 @@ parser.add_argument('--opt-betas', default=None, type=float, nargs='+', metavar=
|
|
|
|
help='Optimizer Betas (default: None, use opt default)')
|
|
|
|
help='Optimizer Betas (default: None, use opt default)')
|
|
|
|
parser.add_argument('--momentum', type=float, default=0.9, metavar='M',
|
|
|
|
parser.add_argument('--momentum', type=float, default=0.9, metavar='M',
|
|
|
|
help='Optimizer momentum (default: 0.9)')
|
|
|
|
help='Optimizer momentum (default: 0.9)')
|
|
|
|
parser.add_argument('--weight-decay', type=float, default=0.0001,
|
|
|
|
parser.add_argument('--weight-decay', type=float, default=2e-5,
|
|
|
|
help='weight decay (default: 0.0001)')
|
|
|
|
help='weight decay (default: 2e-5)')
|
|
|
|
parser.add_argument('--clip-grad', type=float, default=None, metavar='NORM',
|
|
|
|
parser.add_argument('--clip-grad', type=float, default=None, metavar='NORM',
|
|
|
|
help='Clip gradient norm (default: None, no clipping)')
|
|
|
|
help='Clip gradient norm (default: None, no clipping)')
|
|
|
|
parser.add_argument('--clip-mode', type=str, default='norm',
|
|
|
|
parser.add_argument('--clip-mode', type=str, default='norm',
|
|
|
@ -128,10 +128,10 @@ parser.add_argument('--clip-mode', type=str, default='norm',
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
# Learning rate schedule parameters
|
|
|
|
# Learning rate schedule parameters
|
|
|
|
parser.add_argument('--sched', default='step', type=str, metavar='SCHEDULER',
|
|
|
|
parser.add_argument('--sched', default='cosine', type=str, metavar='SCHEDULER',
|
|
|
|
help='LR scheduler (default: "step"')
|
|
|
|
help='LR scheduler (default: "step"')
|
|
|
|
parser.add_argument('--lr', type=float, default=0.01, metavar='LR',
|
|
|
|
parser.add_argument('--lr', type=float, default=0.05, metavar='LR',
|
|
|
|
help='learning rate (default: 0.01)')
|
|
|
|
help='learning rate (default: 0.05)')
|
|
|
|
parser.add_argument('--lr-noise', type=float, nargs='+', default=None, metavar='pct, pct',
|
|
|
|
parser.add_argument('--lr-noise', type=float, nargs='+', default=None, metavar='pct, pct',
|
|
|
|
help='learning rate noise on/off epoch percentages')
|
|
|
|
help='learning rate noise on/off epoch percentages')
|
|
|
|
parser.add_argument('--lr-noise-pct', type=float, default=0.67, metavar='PERCENT',
|
|
|
|
parser.add_argument('--lr-noise-pct', type=float, default=0.67, metavar='PERCENT',
|
|
|
@ -148,15 +148,15 @@ parser.add_argument('--lr-k-decay', type=float, default=1.0,
|
|
|
|
help='learning rate k-decay for cosine/poly (default: 1.0)')
|
|
|
|
help='learning rate k-decay for cosine/poly (default: 1.0)')
|
|
|
|
parser.add_argument('--warmup-lr', type=float, default=0.0001, metavar='LR',
|
|
|
|
parser.add_argument('--warmup-lr', type=float, default=0.0001, metavar='LR',
|
|
|
|
help='warmup learning rate (default: 0.0001)')
|
|
|
|
help='warmup learning rate (default: 0.0001)')
|
|
|
|
parser.add_argument('--min-lr', type=float, default=1e-5, metavar='LR',
|
|
|
|
parser.add_argument('--min-lr', type=float, default=1e-6, metavar='LR',
|
|
|
|
help='lower lr bound for cyclic schedulers that hit 0 (1e-5)')
|
|
|
|
help='lower lr bound for cyclic schedulers that hit 0 (1e-5)')
|
|
|
|
parser.add_argument('--epochs', type=int, default=200, metavar='N',
|
|
|
|
parser.add_argument('--epochs', type=int, default=300, metavar='N',
|
|
|
|
help='number of epochs to train (default: 2)')
|
|
|
|
help='number of epochs to train (default: 300)')
|
|
|
|
parser.add_argument('--epoch-repeats', type=float, default=0., metavar='N',
|
|
|
|
parser.add_argument('--epoch-repeats', type=float, default=0., metavar='N',
|
|
|
|
help='epoch repeat multiplier (number of times to repeat dataset epoch per train epoch).')
|
|
|
|
help='epoch repeat multiplier (number of times to repeat dataset epoch per train epoch).')
|
|
|
|
parser.add_argument('--start-epoch', default=None, type=int, metavar='N',
|
|
|
|
parser.add_argument('--start-epoch', default=None, type=int, metavar='N',
|
|
|
|
help='manual epoch number (useful on restarts)')
|
|
|
|
help='manual epoch number (useful on restarts)')
|
|
|
|
parser.add_argument('--decay-epochs', type=float, default=30, metavar='N',
|
|
|
|
parser.add_argument('--decay-epochs', type=float, default=100, metavar='N',
|
|
|
|
help='epoch interval to decay LR')
|
|
|
|
help='epoch interval to decay LR')
|
|
|
|
parser.add_argument('--warmup-epochs', type=int, default=3, metavar='N',
|
|
|
|
parser.add_argument('--warmup-epochs', type=int, default=3, metavar='N',
|
|
|
|
help='epochs to warmup LR, if scheduler supports')
|
|
|
|
help='epochs to warmup LR, if scheduler supports')
|
|
|
@ -182,7 +182,7 @@ parser.add_argument('--color-jitter', type=float, default=0.4, metavar='PCT',
|
|
|
|
help='Color jitter factor (default: 0.4)')
|
|
|
|
help='Color jitter factor (default: 0.4)')
|
|
|
|
parser.add_argument('--aa', type=str, default=None, metavar='NAME',
|
|
|
|
parser.add_argument('--aa', type=str, default=None, metavar='NAME',
|
|
|
|
help='Use AutoAugment policy. "v0" or "original". (default: None)'),
|
|
|
|
help='Use AutoAugment policy. "v0" or "original". (default: None)'),
|
|
|
|
parser.add_argument('--aug-repeat', type=int, default=0,
|
|
|
|
parser.add_argument('--aug-repeats', type=int, default=0,
|
|
|
|
help='Number of augmentation repetitions (distributed training only) (default: 0)')
|
|
|
|
help='Number of augmentation repetitions (distributed training only) (default: 0)')
|
|
|
|
parser.add_argument('--aug-splits', type=int, default=0,
|
|
|
|
parser.add_argument('--aug-splits', type=int, default=0,
|
|
|
|
help='Number of augmentation splits (default: 0, valid: 0 or >=2)')
|
|
|
|
help='Number of augmentation splits (default: 0, valid: 0 or >=2)')
|
|
|
@ -192,8 +192,8 @@ parser.add_argument('--bce-loss', action='store_true', default=False,
|
|
|
|
help='Enable BCE loss w/ Mixup/CutMix use.')
|
|
|
|
help='Enable BCE loss w/ Mixup/CutMix use.')
|
|
|
|
parser.add_argument('--reprob', type=float, default=0., metavar='PCT',
|
|
|
|
parser.add_argument('--reprob', type=float, default=0., metavar='PCT',
|
|
|
|
help='Random erase prob (default: 0.)')
|
|
|
|
help='Random erase prob (default: 0.)')
|
|
|
|
parser.add_argument('--remode', type=str, default='const',
|
|
|
|
parser.add_argument('--remode', type=str, default='pixel',
|
|
|
|
help='Random erase mode (default: "const")')
|
|
|
|
help='Random erase mode (default: "pixel")')
|
|
|
|
parser.add_argument('--recount', type=int, default=1,
|
|
|
|
parser.add_argument('--recount', type=int, default=1,
|
|
|
|
help='Random erase count (default: 1)')
|
|
|
|
help='Random erase count (default: 1)')
|
|
|
|
parser.add_argument('--resplit', action='store_true', default=False,
|
|
|
|
parser.add_argument('--resplit', action='store_true', default=False,
|
|
|
@ -234,7 +234,7 @@ parser.add_argument('--bn-eps', type=float, default=None,
|
|
|
|
help='BatchNorm epsilon override (if not None)')
|
|
|
|
help='BatchNorm epsilon override (if not None)')
|
|
|
|
parser.add_argument('--sync-bn', action='store_true',
|
|
|
|
parser.add_argument('--sync-bn', action='store_true',
|
|
|
|
help='Enable NVIDIA Apex or Torch synchronized BatchNorm.')
|
|
|
|
help='Enable NVIDIA Apex or Torch synchronized BatchNorm.')
|
|
|
|
parser.add_argument('--dist-bn', type=str, default='',
|
|
|
|
parser.add_argument('--dist-bn', type=str, default='reduce',
|
|
|
|
help='Distribute BatchNorm stats between nodes after each epoch ("broadcast", "reduce", or "")')
|
|
|
|
help='Distribute BatchNorm stats between nodes after each epoch ("broadcast", "reduce", or "")')
|
|
|
|
parser.add_argument('--split-bn', action='store_true',
|
|
|
|
parser.add_argument('--split-bn', action='store_true',
|
|
|
|
help='Enable separate BN layers per augmentation split.')
|
|
|
|
help='Enable separate BN layers per augmentation split.')
|
|
|
@ -257,7 +257,7 @@ parser.add_argument('--recovery-interval', type=int, default=0, metavar='N',
|
|
|
|
parser.add_argument('--checkpoint-hist', type=int, default=10, metavar='N',
|
|
|
|
parser.add_argument('--checkpoint-hist', type=int, default=10, metavar='N',
|
|
|
|
help='number of checkpoints to keep (default: 10)')
|
|
|
|
help='number of checkpoints to keep (default: 10)')
|
|
|
|
parser.add_argument('-j', '--workers', type=int, default=4, metavar='N',
|
|
|
|
parser.add_argument('-j', '--workers', type=int, default=4, metavar='N',
|
|
|
|
help='how many training processes to use (default: 1)')
|
|
|
|
help='how many training processes to use (default: 4)')
|
|
|
|
parser.add_argument('--save-images', action='store_true', default=False,
|
|
|
|
parser.add_argument('--save-images', action='store_true', default=False,
|
|
|
|
help='save images of input bathes every log interval for debugging')
|
|
|
|
help='save images of input bathes every log interval for debugging')
|
|
|
|
parser.add_argument('--amp', action='store_true', default=False,
|
|
|
|
parser.add_argument('--amp', action='store_true', default=False,
|
|
|
@ -539,7 +539,7 @@ def main():
|
|
|
|
loader_eval = create_loader(
|
|
|
|
loader_eval = create_loader(
|
|
|
|
dataset_eval,
|
|
|
|
dataset_eval,
|
|
|
|
input_size=data_config['input_size'],
|
|
|
|
input_size=data_config['input_size'],
|
|
|
|
batch_size=args.validation_batch_size_multiplier * args.batch_size,
|
|
|
|
batch_size=args.validation_batch_size,
|
|
|
|
is_training=False,
|
|
|
|
is_training=False,
|
|
|
|
use_prefetcher=args.prefetcher,
|
|
|
|
use_prefetcher=args.prefetcher,
|
|
|
|
interpolation=data_config['interpolation'],
|
|
|
|
interpolation=data_config['interpolation'],
|
|
|
|