diff --git a/timm/data/auto_augment.py b/timm/data/auto_augment.py index 1b51ccb4..e461f67c 100644 --- a/timm/data/auto_augment.py +++ b/timm/data/auto_augment.py @@ -1,4 +1,4 @@ -""" AutoAugment, RandAugment, and AugMix for PyTorch +""" AutoAugment, RandAugment, AugMix, and 3-Augment for PyTorch This code implements the searched ImageNet policies with various tweaks and improvements and does not include any of the search code. @@ -9,18 +9,24 @@ AA and RA Implementation adapted from: AugMix adapted from: https://github.com/google-research/augmix +3-Augment based on: https://github.com/facebookresearch/deit/blob/main/README_revenge.md + Papers: AutoAugment: Learning Augmentation Policies from Data - https://arxiv.org/abs/1805.09501 Learning Data Augmentation Strategies for Object Detection - https://arxiv.org/abs/1906.11172 RandAugment: Practical automated data augmentation... - https://arxiv.org/abs/1909.13719 AugMix: A Simple Data Processing Method to Improve Robustness and Uncertainty - https://arxiv.org/abs/1912.02781 + 3-Augment: DeiT III: Revenge of the ViT - https://arxiv.org/abs/2204.07118 Hacked together by / Copyright 2019, Ross Wightman """ import random import math import re -from PIL import Image, ImageOps, ImageEnhance, ImageChops +from functools import partial +from typing import Dict, List, Optional, Union + +from PIL import Image, ImageOps, ImageEnhance, ImageChops, ImageFilter import PIL import numpy as np @@ -175,6 +181,24 @@ def sharpness(img, factor, **__): return ImageEnhance.Sharpness(img).enhance(factor) +def gaussian_blur(img, factor, **__): + img = img.filter(ImageFilter.GaussianBlur(radius=factor)) + return img + + +def gaussian_blur_rand(img, factor, **__): + radius_min = 0.1 + radius_max = 2.0 + img = img.filter(ImageFilter.GaussianBlur(radius=random.uniform(radius_min, radius_max * factor))) + return img + + +def desaturate(img, factor, **_): + factor = min(1., max(0., 1. - factor)) + # enhance factor 0 = grayscale, 1.0 = no-change + return ImageEnhance.Color(img).enhance(factor) + + def _randomly_negate(v): """With 50% prob, negate the value""" return -v if random.random() > 0.5 else v @@ -200,6 +224,14 @@ def _enhance_increasing_level_to_arg(level, _hparams): return level, +def _minmax_level_to_arg(level, _hparams, min_val=0., max_val=1.0, clamp=True): + level = (level / _LEVEL_DENOM) + min_val + (max_val - min_val) * level + if clamp: + level = min(min_val, max(max_val, level)) + return level, + + def _shear_level_to_arg(level, _hparams): # range [-0.3, 0.3] level = (level / _LEVEL_DENOM) * 0.3 @@ -246,7 +278,7 @@ def _posterize_original_level_to_arg(level, _hparams): def _solarize_level_to_arg(level, _hparams): # range [0, 256] # intensity/severity of augmentation decreases with level - return int((level / _LEVEL_DENOM) * 256), + return min(256, int((level / _LEVEL_DENOM) * 256)), def _solarize_increasing_level_to_arg(level, _hparams): @@ -257,7 +289,7 @@ def _solarize_increasing_level_to_arg(level, _hparams): def _solarize_add_level_to_arg(level, _hparams): # range [0, 110] - return int((level / _LEVEL_DENOM) * 110), + return min(128, int((level / _LEVEL_DENOM) * 110)), LEVEL_TO_ARG = { @@ -286,6 +318,9 @@ LEVEL_TO_ARG = { 'TranslateY': _translate_abs_level_to_arg, 'TranslateXRel': _translate_rel_level_to_arg, 'TranslateYRel': _translate_rel_level_to_arg, + 'Desaturate': partial(_minmax_level_to_arg, min_val=0.5, max_val=1.0), + 'GaussianBlur': partial(_minmax_level_to_arg, min_val=0.1, max_val=2.0), + 'GaussianBlurRand': _minmax_level_to_arg, } @@ -314,6 +349,9 @@ NAME_TO_OP = { 'TranslateY': translate_y_abs, 'TranslateXRel': translate_x_rel, 'TranslateYRel': translate_y_rel, + 'Desaturate': desaturate, + 'GaussianBlur': gaussian_blur, + 'GaussianBlurRand': gaussian_blur_rand, } @@ -347,6 +385,7 @@ class AugmentOp: if self.magnitude_std > 0: # magnitude randomization enabled if self.magnitude_std == float('inf'): + # inf == uniform sampling magnitude = random.uniform(0, magnitude) elif self.magnitude_std > 0: magnitude = random.gauss(magnitude, self.magnitude_std) @@ -499,6 +538,16 @@ def auto_augment_policy_originalr(hparams): return pc +def auto_augment_policy_3a(hparams): + policy = [ + [('Solarize', 1.0, 5)], # 128 solarize threshold @ 5 magnitude + [('Desaturate', 1.0, 10)], # grayscale at 10 magnitude + [('GaussianBlurRand', 1.0, 10)], + ] + pc = [[AugmentOp(*a, hparams=hparams) for a in sp] for sp in policy] + return pc + + def auto_augment_policy(name='v0', hparams=None): hparams = hparams or _HPARAMS_DEFAULT if name == 'original': @@ -509,6 +558,8 @@ def auto_augment_policy(name='v0', hparams=None): return auto_augment_policy_v0(hparams) elif name == 'v0r': return auto_augment_policy_v0r(hparams) + elif name == '3a': + return auto_augment_policy_3a(hparams) else: assert False, 'Unknown AA policy (%s)' % name @@ -534,19 +585,23 @@ class AutoAugment: return fs -def auto_augment_transform(config_str, hparams): +def auto_augment_transform(config_str: str, hparams: Optional[Dict] = None): """ Create a AutoAugment transform - :param config_str: String defining configuration of auto augmentation. Consists of multiple sections separated by - dashes ('-'). The first section defines the AutoAugment policy (one of 'v0', 'v0r', 'original', 'originalr'). - The remaining sections, not order sepecific determine - 'mstd' - float std deviation of magnitude noise applied - Ex 'original-mstd0.5' results in AutoAugment with original policy, magnitude_std 0.5 + Args: + config_str: String defining configuration of auto augmentation. Consists of multiple sections separated by + dashes ('-'). + The first section defines the AutoAugment policy (one of 'v0', 'v0r', 'original', 'originalr'). - :param hparams: Other hparams (kwargs) for the AutoAugmentation scheme + The remaining sections: + 'mstd' - float std deviation of magnitude noise applied + Ex 'original-mstd0.5' results in AutoAugment with original policy, magnitude_std 0.5 - :return: A PyTorch compatible Transform + hparams: Other hparams (kwargs) for the AutoAugmentation scheme + + Returns: + A PyTorch compatible Transform """ config = config_str.split('-') policy_name = config[0] @@ -605,42 +660,80 @@ _RAND_INCREASING_TRANSFORMS = [ ] +_RAND_3A = [ + 'SolarizeIncreasing', + 'Desaturate', + 'GaussianBlur', +] + + +_RAND_CHOICE_3A = { + 'SolarizeIncreasing': 6, + 'Desaturate': 6, + 'GaussianBlur': 6, + 'Rotate': 3, + 'ShearX': 2, + 'ShearY': 2, + 'PosterizeIncreasing': 1, + 'AutoContrast': 1, + 'ColorIncreasing': 1, + 'SharpnessIncreasing': 1, + 'ContrastIncreasing': 1, + 'BrightnessIncreasing': 1, + 'Equalize': 1, + 'Invert': 1, +} + # These experimental weights are based loosely on the relative improvements mentioned in paper. # They may not result in increased performance, but could likely be tuned to so. _RAND_CHOICE_WEIGHTS_0 = { - 'Rotate': 0.3, - 'ShearX': 0.2, - 'ShearY': 0.2, - 'TranslateXRel': 0.1, - 'TranslateYRel': 0.1, - 'Color': .025, - 'Sharpness': 0.025, - 'AutoContrast': 0.025, - 'Solarize': .005, - 'SolarizeAdd': .005, - 'Contrast': .005, - 'Brightness': .005, - 'Equalize': .005, - 'Posterize': 0, - 'Invert': 0, + 'Rotate': 3, + 'ShearX': 2, + 'ShearY': 2, + 'TranslateXRel': 1, + 'TranslateYRel': 1, + 'ColorIncreasing': .25, + 'SharpnessIncreasing': 0.25, + 'AutoContrast': 0.25, + 'SolarizeIncreasing': .05, + 'SolarizeAdd': .05, + 'ContrastIncreasing': .05, + 'BrightnessIncreasing': .05, + 'Equalize': .05, + 'PosterizeIncreasing': 0.05, + 'Invert': 0.05, } -def _select_rand_weights(weight_idx=0, transforms=None): - transforms = transforms or _RAND_TRANSFORMS - assert weight_idx == 0 # only one set of weights currently - rand_weights = _RAND_CHOICE_WEIGHTS_0 - probs = [rand_weights[k] for k in transforms] - probs /= np.sum(probs) - return probs +def _get_weighted_transforms(transforms: Dict): + transforms, probs = list(zip(*transforms.items())) + probs = np.array(probs) + probs = probs / np.sum(probs) + return transforms, probs + +def rand_augment_choices(name: str, increasing=True): + if name == 'weights': + return _RAND_CHOICE_WEIGHTS_0 + elif name == '3aw': + return _RAND_CHOICE_3A + elif name == '3a': + return _RAND_3A + else: + return _RAND_INCREASING_TRANSFORMS if increasing else _RAND_TRANSFORMS -def rand_augment_ops(magnitude=10, hparams=None, transforms=None): + +def rand_augment_ops( + magnitude: Union[int, float] = 10, + prob: float = 0.5, + hparams: Optional[Dict] = None, + transforms: Optional[Union[Dict, List]] = None, +): hparams = hparams or _HPARAMS_DEFAULT transforms = transforms or _RAND_TRANSFORMS return [AugmentOp( - name, prob=0.5, magnitude=magnitude, hparams=hparams) for name in transforms] + name, prob=prob, magnitude=magnitude, hparams=hparams) for name in transforms] class RandAugment: @@ -648,11 +741,16 @@ class RandAugment: self.ops = ops self.num_layers = num_layers self.choice_weights = choice_weights + print(self.ops, self.choice_weights) def __call__(self, img): # no replacement when using weighted choice ops = np.random.choice( - self.ops, self.num_layers, replace=self.choice_weights is None, p=self.choice_weights) + self.ops, + self.num_layers, + replace=self.choice_weights is None, + p=self.choice_weights, + ) for op in ops: img = op(img) return img @@ -665,61 +763,84 @@ class RandAugment: return fs -def rand_augment_transform(config_str, hparams): +def rand_augment_transform( + config_str: str, + hparams: Optional[Dict] = None, + transforms: Optional[Union[str, Dict, List]] = None, +): """ Create a RandAugment transform - :param config_str: String defining configuration of random augmentation. Consists of multiple sections separated by - dashes ('-'). The first section defines the specific variant of rand augment (currently only 'rand'). The remaining - sections, not order sepecific determine - 'm' - integer magnitude of rand augment - 'n' - integer num layers (number of transform ops selected per image) - 'w' - integer probabiliy weight index (index of a set of weights to influence choice of op) - 'mstd' - float std deviation of magnitude noise applied, or uniform sampling if infinity (or > 100) - 'mmax' - set upper bound for magnitude to something other than default of _LEVEL_DENOM (10) - 'inc' - integer (bool), use augmentations that increase in severity with magnitude (default: 0) - Ex 'rand-m9-n3-mstd0.5' results in RandAugment with magnitude 9, num_layers 3, magnitude_std 0.5 - 'rand-mstd1-w0' results in magnitude_std 1.0, weights 0, default magnitude of 10 and num_layers 2 - - :param hparams: Other hparams (kwargs) for the RandAugmentation scheme - - :return: A PyTorch compatible Transform + Args: + config_str (str): String defining configuration of random augmentation. Consists of multiple sections separated + by dashes ('-'). The first section defines the specific variant of rand augment (currently only 'rand'). + The remaining sections, not order sepecific determine + 'm' - integer magnitude of rand augment + 'n' - integer num layers (number of transform ops selected per image) + 'p' - float probability of applying each layer (default 0.5) + 'mstd' - float std deviation of magnitude noise applied, or uniform sampling if infinity (or > 100) + 'mmax' - set upper bound for magnitude to something other than default of _LEVEL_DENOM (10) + 'inc' - integer (bool), use augmentations that increase in severity with magnitude (default: 0) + 't' - str name of transform set to use + Ex 'rand-m9-n3-mstd0.5' results in RandAugment with magnitude 9, num_layers 3, magnitude_std 0.5 + 'rand-mstd1-tweights' results in mag std 1.0, weighted transforms, default mag of 10 and num_layers 2 + + hparams (dict): Other hparams (kwargs) for the RandAugmentation scheme + + Returns: + A PyTorch compatible Transform """ magnitude = _LEVEL_DENOM # default to _LEVEL_DENOM for magnitude (currently 10) num_layers = 2 # default to 2 ops per image - weight_idx = None # default to no probability weights for op choice - transforms = _RAND_TRANSFORMS + increasing = False + prob = 0.5 config = config_str.split('-') assert config[0] == 'rand' config = config[1:] for c in config: - cs = re.split(r'(\d.*)', c) - if len(cs) < 2: - continue - key, val = cs[:2] - if key == 'mstd': - # noise param / randomization of magnitude values - mstd = float(val) - if mstd > 100: - # use uniform sampling in 0 to magnitude if mstd is > 100 - mstd = float('inf') - hparams.setdefault('magnitude_std', mstd) - elif key == 'mmax': - # clip magnitude between [0, mmax] instead of default [0, _LEVEL_DENOM] - hparams.setdefault('magnitude_max', int(val)) - elif key == 'inc': - if bool(val): - transforms = _RAND_INCREASING_TRANSFORMS - elif key == 'm': - magnitude = int(val) - elif key == 'n': - num_layers = int(val) - elif key == 'w': - weight_idx = int(val) + if c.startswith('t'): + # NOTE old 'w' key was removed, 'w0' is not equivalent to 'tweights' + val = str(c[1:]) + if transforms is None: + transforms = val else: - assert False, 'Unknown RandAugment config section' - ra_ops = rand_augment_ops(magnitude=magnitude, hparams=hparams, transforms=transforms) - choice_weights = None if weight_idx is None else _select_rand_weights(weight_idx) + # numeric options + cs = re.split(r'(\d.*)', c) + if len(cs) < 2: + continue + key, val = cs[:2] + if key == 'mstd': + # noise param / randomization of magnitude values + mstd = float(val) + if mstd > 100: + # use uniform sampling in 0 to magnitude if mstd is > 100 + mstd = float('inf') + hparams.setdefault('magnitude_std', mstd) + elif key == 'mmax': + # clip magnitude between [0, mmax] instead of default [0, _LEVEL_DENOM] + hparams.setdefault('magnitude_max', int(val)) + elif key == 'inc': + if bool(val): + increasing = True + elif key == 'm': + magnitude = int(val) + elif key == 'n': + num_layers = int(val) + elif key == 'p': + prob = float(val) + else: + assert False, 'Unknown RandAugment config section' + + if isinstance(transforms, str): + transforms = rand_augment_choices(transforms, increasing=increasing) + elif transforms is None: + transforms = _RAND_INCREASING_TRANSFORMS if increasing else _RAND_TRANSFORMS + + choice_weights = None + if isinstance(transforms, Dict): + transforms, choice_weights = _get_weighted_transforms(transforms) + + ra_ops = rand_augment_ops(magnitude=magnitude, prob=prob, hparams=hparams, transforms=transforms) return RandAugment(ra_ops, num_layers, choice_weights=choice_weights) @@ -740,11 +861,19 @@ _AUGMIX_TRANSFORMS = [ ] -def augmix_ops(magnitude=10, hparams=None, transforms=None): +def augmix_ops( + magnitude: Union[int, float] = 10, + hparams: Optional[Dict] = None, + transforms: Optional[Union[str, Dict, List]] = None, +): hparams = hparams or _HPARAMS_DEFAULT transforms = transforms or _AUGMIX_TRANSFORMS return [AugmentOp( - name, prob=1.0, magnitude=magnitude, hparams=hparams) for name in transforms] + name, + prob=1.0, + magnitude=magnitude, + hparams=hparams + ) for name in transforms] class AugMixAugment: @@ -820,22 +949,24 @@ class AugMixAugment: return fs -def augment_and_mix_transform(config_str, hparams): +def augment_and_mix_transform(config_str: str, hparams: Optional[Dict] = None): """ Create AugMix PyTorch transform - :param config_str: String defining configuration of random augmentation. Consists of multiple sections separated by - dashes ('-'). The first section defines the specific variant of rand augment (currently only 'rand'). The remaining - sections, not order sepecific determine - 'm' - integer magnitude (severity) of augmentation mix (default: 3) - 'w' - integer width of augmentation chain (default: 3) - 'd' - integer depth of augmentation chain (-1 is random [1, 3], default: -1) - 'b' - integer (bool), blend each branch of chain into end result without a final blend, less CPU (default: 0) - 'mstd' - float std deviation of magnitude noise applied (default: 0) - Ex 'augmix-m5-w4-d2' results in AugMix with severity 5, chain width 4, chain depth 2 - - :param hparams: Other hparams (kwargs) for the Augmentation transforms - - :return: A PyTorch compatible Transform + Args: + config_str (str): String defining configuration of random augmentation. Consists of multiple sections separated + by dashes ('-'). The first section defines the specific variant of rand augment (currently only 'rand'). + The remaining sections, not order sepecific determine + 'm' - integer magnitude (severity) of augmentation mix (default: 3) + 'w' - integer width of augmentation chain (default: 3) + 'd' - integer depth of augmentation chain (-1 is random [1, 3], default: -1) + 'b' - integer (bool), blend each branch of chain into end result without a final blend, less CPU (default: 0) + 'mstd' - float std deviation of magnitude noise applied (default: 0) + Ex 'augmix-m5-w4-d2' results in AugMix with severity 5, chain width 4, chain depth 2 + + hparams: Other hparams (kwargs) for the Augmentation transforms + + Returns: + A PyTorch compatible Transform """ magnitude = 3 width = 3 diff --git a/timm/data/transforms_factory.py b/timm/data/transforms_factory.py index 6c28383a..7749b206 100644 --- a/timm/data/transforms_factory.py +++ b/timm/data/transforms_factory.py @@ -59,6 +59,7 @@ def transforms_imagenet_train( re_count=1, re_num_splits=0, separate=False, + force_color_jitter=False, ): """ If separate==True, the transforms are returned as a tuple of 3 separate transforms @@ -77,8 +78,12 @@ def transforms_imagenet_train( primary_tfl += [transforms.RandomVerticalFlip(p=vflip)] secondary_tfl = [] + disable_color_jitter = False if auto_augment: assert isinstance(auto_augment, str) + # color jitter is typically disabled if AA/RA on, + # this allows override without breaking old hparm cfgs + disable_color_jitter = not (force_color_jitter or '3a' in auto_augment) if isinstance(img_size, (tuple, list)): img_size_min = min(img_size) else: @@ -96,8 +101,9 @@ def transforms_imagenet_train( secondary_tfl += [augment_and_mix_transform(auto_augment, aa_params)] else: secondary_tfl += [auto_augment_transform(auto_augment, aa_params)] - elif color_jitter is not None: - # color jitter is enabled when not using AA + + if color_jitter is not None and not disable_color_jitter: + # color jitter is enabled when not using AA or when forced if isinstance(color_jitter, (list, tuple)): # color jitter should be a 3-tuple/list if spec brightness/contrast/saturation # or 4 if also augmenting hue