pytorch-image-models/timm/data/transforms.py

import torch
from torchvision import transforms
import torchvision.transforms.functional as F
from PIL import Image
import warnings
import math
import random
import numpy as np

from .constants import DEFAULT_CROP_PCT, IMAGENET_DEFAULT_MEAN, IMAGENET_DEFAULT_STD
from .random_erasing import RandomErasing


class ToNumpy:

    def __call__(self, pil_img):
        np_img = np.array(pil_img, dtype=np.uint8)
        if np_img.ndim < 3:
            np_img = np.expand_dims(np_img, axis=-1)
        np_img = np.rollaxis(np_img, 2)  # HWC to CHW
        return np_img


class ToTensor:

    def __init__(self, dtype=torch.float32):
        self.dtype = dtype

    def __call__(self, pil_img):
        np_img = np.array(pil_img, dtype=np.uint8)
        if np_img.ndim < 3:
            np_img = np.expand_dims(np_img, axis=-1)
        np_img = np.rollaxis(np_img, 2)  # HWC to CHW
        return torch.from_numpy(np_img).to(dtype=self.dtype)


_pil_interpolation_to_str = {
    Image.NEAREST: 'PIL.Image.NEAREST',
    Image.BILINEAR: 'PIL.Image.BILINEAR',
    Image.BICUBIC: 'PIL.Image.BICUBIC',
    Image.LANCZOS: 'PIL.Image.LANCZOS',
    Image.HAMMING: 'PIL.Image.HAMMING',
    Image.BOX: 'PIL.Image.BOX',
}


def _pil_interp(method):
    if method == 'bicubic':
        return Image.BICUBIC
    elif method == 'lanczos':
        return Image.LANCZOS
    elif method == 'hamming':
        return Image.HAMMING
    else:
        # default bilinear, do we want to allow nearest?
        return Image.BILINEAR


RANDOM_INTERPOLATION = (Image.BILINEAR, Image.BICUBIC)


class RandomResizedCropAndInterpolation(object):
    """Crop the given PIL Image to random size and aspect ratio with random interpolation.

    A crop of random size (default: of 0.08 to 1.0) of the original size and a random
    aspect ratio (default: of 3/4 to 4/3) of the original aspect ratio is made. This crop
    is finally resized to given size.
    This is popularly used to train the Inception networks.

    Args:
        size: expected output size of each edge
        scale: range of size of the origin size cropped
        ratio: range of aspect ratio of the origin aspect ratio cropped
        interpolation: Default: PIL.Image.BILINEAR
    """

    def __init__(self, size, scale=(0.08, 1.0), ratio=(3. / 4., 4. / 3.),
                 interpolation='bilinear'):
        if isinstance(size, tuple):
            self.size = size
        else:
            self.size = (size, size)
        if (scale[0] > scale[1]) or (ratio[0] > ratio[1]):
            warnings.warn("range should be of kind (min, max)")

        if interpolation == 'random':
            self.interpolation = RANDOM_INTERPOLATION
        else:
            self.interpolation = _pil_interp(interpolation)
        self.scale = scale
        self.ratio = ratio

    @staticmethod
    def get_params(img, scale, ratio):
        """Get parameters for ``crop`` for a random sized crop.

        Args:
            img (PIL Image): Image to be cropped.
            scale (tuple): range of size of the origin size cropped
            ratio (tuple): range of aspect ratio of the origin aspect ratio cropped

        Returns:
            tuple: params (i, j, h, w) to be passed to ``crop`` for a random
                sized crop.
        """
        area = img.size[0] * img.size[1]

        for attempt in range(10):
            target_area = random.uniform(*scale) * area
            aspect_ratio = random.uniform(*ratio)

            w = int(round(math.sqrt(target_area * aspect_ratio)))
            h = int(round(math.sqrt(target_area / aspect_ratio)))

            if random.random() < 0.5 and min(ratio) <= (h / w) <= max(ratio):
                w, h = h, w

            if w <= img.size[0] and h <= img.size[1]:
                i = random.randint(0, img.size[1] - h)
                j = random.randint(0, img.size[0] - w)
                return i, j, h, w

        # Fallback
        w = min(img.size[0], img.size[1])
        i = (img.size[1] - w) // 2
        j = (img.size[0] - w) // 2
        return i, j, w, w

    def __call__(self, img):
        """
        Args:
            img (PIL Image): Image to be cropped and resized.

        Returns:
            PIL Image: Randomly cropped and resized image.
        """
        i, j, h, w = self.get_params(img, self.scale, self.ratio)
        if isinstance(self.interpolation, (tuple, list)):
            interpolation = random.choice(self.interpolation)
        else:
            interpolation = self.interpolation
        return F.resized_crop(img, i, j, h, w, self.size, interpolation)

    def __repr__(self):
        if isinstance(self.interpolation, (tuple, list)):
            interpolate_str = ' '.join([_pil_interpolation_to_str[x] for x in self.interpolation])
        else:
            interpolate_str = _pil_interpolation_to_str[self.interpolation]
        format_string = self.__class__.__name__ + '(size={0}'.format(self.size)
        format_string += ', scale={0}'.format(tuple(round(s, 4) for s in self.scale))
        format_string += ', ratio={0}'.format(tuple(round(r, 4) for r in self.ratio))
        format_string += ', interpolation={0})'.format(interpolate_str)
        return format_string


def transforms_imagenet_train(
        img_size=224,
        scale=(0.08, 1.0),
        color_jitter=(0.4, 0.4, 0.4),
        interpolation='random',
        random_erasing=0.4,
        random_erasing_mode='const',
        use_prefetcher=False,
        mean=IMAGENET_DEFAULT_MEAN,
        std=IMAGENET_DEFAULT_STD
):

    tfl = [
        RandomResizedCropAndInterpolation(
            img_size, scale=scale, interpolation=interpolation),
        transforms.RandomHorizontalFlip(),
        transforms.ColorJitter(*color_jitter),
    ]

    if use_prefetcher:
        # prefetcher and collate will handle tensor conversion and norm
        tfl += [ToNumpy()]
    else:
        tfl += [
            transforms.ToTensor(),
            transforms.Normalize(
                mean=torch.tensor(mean),
                std=torch.tensor(std))
        ]
        if random_erasing > 0.:
            tfl.append(RandomErasing(random_erasing, mode=random_erasing_mode, device='cpu'))
    return transforms.Compose(tfl)


def transforms_imagenet_eval(
        img_size=224,
        crop_pct=None,
        interpolation='bilinear',
        use_prefetcher=False,
        mean=IMAGENET_DEFAULT_MEAN,
        std=IMAGENET_DEFAULT_STD):
    crop_pct = crop_pct or DEFAULT_CROP_PCT

    if isinstance(img_size, tuple):
        assert len(img_size) == 2
        if img_size[-1] == img_size[-2]:
            # fall-back to older behaviour so Resize scales to shortest edge if target is square
            scale_size = int(math.floor(img_size[0] / crop_pct))
        else:
            scale_size = tuple([int(x / crop_pct) for x in img_size])
    else:
        scale_size = int(math.floor(img_size / crop_pct))

    tfl = [
        transforms.Resize(scale_size, _pil_interp(interpolation)),
        transforms.CenterCrop(img_size),
    ]
    if use_prefetcher:
        # prefetcher and collate will handle tensor conversion and norm
        tfl += [ToNumpy()]
    else:
        tfl += [
            transforms.ToTensor(),
            transforms.Normalize(
                     mean=torch.tensor(mean),
                     std=torch.tensor(std))
        ]

    return transforms.Compose(tfl)
Add some Nvidia performance enhancements (prefetch loader, fast collate), and refactor some of training and model fact/transforms 6 years ago			`import torch`
			`from torchvision import transforms`
Some transform/data/loader refactoring, hopefully didn't break things * factor out data related constants to own file * move data related config helpers to own file * add a variant of RandomResizeCrop that randomizes interpolation method * remove old Numpy version of RandomErasing * cleanup torch version of RandomErasing and use it in either GPU loader batch mode or single image cpu Transform 6 years ago			`import torchvision.transforms.functional as F`
Add some Nvidia performance enhancements (prefetch loader, fast collate), and refactor some of training and model fact/transforms 6 years ago			`from PIL import Image`
Some transform/data/loader refactoring, hopefully didn't break things * factor out data related constants to own file * move data related config helpers to own file * add a variant of RandomResizeCrop that randomizes interpolation method * remove old Numpy version of RandomErasing * cleanup torch version of RandomErasing and use it in either GPU loader batch mode or single image cpu Transform 6 years ago			`import warnings`
Add some Nvidia performance enhancements (prefetch loader, fast collate), and refactor some of training and model fact/transforms 6 years ago			`import math`
Some transform/data/loader refactoring, hopefully didn't break things * factor out data related constants to own file * move data related config helpers to own file * add a variant of RandomResizeCrop that randomizes interpolation method * remove old Numpy version of RandomErasing * cleanup torch version of RandomErasing and use it in either GPU loader batch mode or single image cpu Transform 6 years ago			`import random`
Add some Nvidia performance enhancements (prefetch loader, fast collate), and refactor some of training and model fact/transforms 6 years ago			`import numpy as np`

Big re-org, working towards making pip/module as 'timm' 5 years ago			`from .constants import DEFAULT_CROP_PCT, IMAGENET_DEFAULT_MEAN, IMAGENET_DEFAULT_STD`
			`from .random_erasing import RandomErasing`
Improve creation of data pipeline with prefetch enabled vs disabled, fixup inception_res_v2 and dpn models 6 years ago

			`class ToNumpy:`
Add some Nvidia performance enhancements (prefetch loader, fast collate), and refactor some of training and model fact/transforms 6 years ago
			`def __call__(self, pil_img):`
			`np_img = np.array(pil_img, dtype=np.uint8)`
			`if np_img.ndim < 3:`
			`np_img = np.expand_dims(np_img, axis=-1)`
			`np_img = np.rollaxis(np_img, 2) # HWC to CHW`
			`return np_img`


Improve creation of data pipeline with prefetch enabled vs disabled, fixup inception_res_v2 and dpn models 6 years ago			`class ToTensor:`

			`def __init__(self, dtype=torch.float32):`
			`self.dtype = dtype`

			`def __call__(self, pil_img):`
			`np_img = np.array(pil_img, dtype=np.uint8)`
			`if np_img.ndim < 3:`
			`np_img = np.expand_dims(np_img, axis=-1)`
			`np_img = np.rollaxis(np_img, 2) # HWC to CHW`
			`return torch.from_numpy(np_img).to(dtype=self.dtype)`


Some transform/data/loader refactoring, hopefully didn't break things * factor out data related constants to own file * move data related config helpers to own file * add a variant of RandomResizeCrop that randomizes interpolation method * remove old Numpy version of RandomErasing * cleanup torch version of RandomErasing and use it in either GPU loader batch mode or single image cpu Transform 6 years ago			`_pil_interpolation_to_str = {`
			`Image.NEAREST: 'PIL.Image.NEAREST',`
			`Image.BILINEAR: 'PIL.Image.BILINEAR',`
			`Image.BICUBIC: 'PIL.Image.BICUBIC',`
			`Image.LANCZOS: 'PIL.Image.LANCZOS',`
			`Image.HAMMING: 'PIL.Image.HAMMING',`
			`Image.BOX: 'PIL.Image.BOX',`
			`}`


Add seresnet26_32x4d cfg and weights + interpolation str->PIL enum fn 6 years ago			`def _pil_interp(method):`
			`if method == 'bicubic':`
			`return Image.BICUBIC`
			`elif method == 'lanczos':`
			`return Image.LANCZOS`
			`elif method == 'hamming':`
			`return Image.HAMMING`
			`else:`
			`# default bilinear, do we want to allow nearest?`
			`return Image.BILINEAR`


Some transform/data/loader refactoring, hopefully didn't break things * factor out data related constants to own file * move data related config helpers to own file * add a variant of RandomResizeCrop that randomizes interpolation method * remove old Numpy version of RandomErasing * cleanup torch version of RandomErasing and use it in either GPU loader batch mode or single image cpu Transform 6 years ago			`RANDOM_INTERPOLATION = (Image.BILINEAR, Image.BICUBIC)`


			`class RandomResizedCropAndInterpolation(object):`
			`"""Crop the given PIL Image to random size and aspect ratio with random interpolation.`

			`A crop of random size (default: of 0.08 to 1.0) of the original size and a random`
			`aspect ratio (default: of 3/4 to 4/3) of the original aspect ratio is made. This crop`
			`is finally resized to given size.`
			`This is popularly used to train the Inception networks.`

			`Args:`
			`size: expected output size of each edge`
			`scale: range of size of the origin size cropped`
			`ratio: range of aspect ratio of the origin aspect ratio cropped`
			`interpolation: Default: PIL.Image.BILINEAR`
			`"""`

			`def __init__(self, size, scale=(0.08, 1.0), ratio=(3. / 4., 4. / 3.),`
			`interpolation='bilinear'):`
			`if isinstance(size, tuple):`
			`self.size = size`
			`else:`
			`self.size = (size, size)`
			`if (scale[0] > scale[1]) or (ratio[0] > ratio[1]):`
			`warnings.warn("range should be of kind (min, max)")`

			`if interpolation == 'random':`
			`self.interpolation = RANDOM_INTERPOLATION`
			`else:`
			`self.interpolation = _pil_interp(interpolation)`
			`self.scale = scale`
			`self.ratio = ratio`

			`@staticmethod`
			`def get_params(img, scale, ratio):`
			"""Get parameters for ``crop`` for a random sized crop.

			`Args:`
			`img (PIL Image): Image to be cropped.`
			`scale (tuple): range of size of the origin size cropped`
			`ratio (tuple): range of aspect ratio of the origin aspect ratio cropped`

			`Returns:`
			tuple: params (i, j, h, w) to be passed to ``crop`` for a random
			`sized crop.`
			`"""`
			`area = img.size[0] * img.size[1]`

			`for attempt in range(10):`
			`target_area = random.uniform(scale) area`
			`aspect_ratio = random.uniform(*ratio)`

			`w = int(round(math.sqrt(target_area * aspect_ratio)))`
			`h = int(round(math.sqrt(target_area / aspect_ratio)))`

			`if random.random() < 0.5 and min(ratio) <= (h / w) <= max(ratio):`
			`w, h = h, w`

			`if w <= img.size[0] and h <= img.size[1]:`
			`i = random.randint(0, img.size[1] - h)`
			`j = random.randint(0, img.size[0] - w)`
			`return i, j, h, w`

			`# Fallback`
			`w = min(img.size[0], img.size[1])`
			`i = (img.size[1] - w) // 2`
			`j = (img.size[0] - w) // 2`
			`return i, j, w, w`

			`def __call__(self, img):`
			`"""`
			`Args:`
			`img (PIL Image): Image to be cropped and resized.`

			`Returns:`
			`PIL Image: Randomly cropped and resized image.`
			`"""`
			`i, j, h, w = self.get_params(img, self.scale, self.ratio)`
			`if isinstance(self.interpolation, (tuple, list)):`
			`interpolation = random.choice(self.interpolation)`
			`else:`
			`interpolation = self.interpolation`
			`return F.resized_crop(img, i, j, h, w, self.size, interpolation)`

			`def __repr__(self):`
			`if isinstance(self.interpolation, (tuple, list)):`
			`interpolate_str = ' '.join([_pil_interpolation_to_str[x] for x in self.interpolation])`
			`else:`
			`interpolate_str = _pil_interpolation_to_str[self.interpolation]`
			`format_string = self.__class__.__name__ + '(size={0}'.format(self.size)`
			`format_string += ', scale={0}'.format(tuple(round(s, 4) for s in self.scale))`
			`format_string += ', ratio={0}'.format(tuple(round(r, 4) for r in self.ratio))`
			`format_string += ', interpolation={0})'.format(interpolate_str)`
			`return format_string`


Add some Nvidia performance enhancements (prefetch loader, fast collate), and refactor some of training and model fact/transforms 6 years ago			`def transforms_imagenet_train(`
			`img_size=224,`
Some transform/data/loader refactoring, hopefully didn't break things * factor out data related constants to own file * move data related config helpers to own file * add a variant of RandomResizeCrop that randomizes interpolation method * remove old Numpy version of RandomErasing * cleanup torch version of RandomErasing and use it in either GPU loader batch mode or single image cpu Transform 6 years ago			`scale=(0.08, 1.0),`
Add some Nvidia performance enhancements (prefetch loader, fast collate), and refactor some of training and model fact/transforms 6 years ago			`color_jitter=(0.4, 0.4, 0.4),`
Some transform/data/loader refactoring, hopefully didn't break things * factor out data related constants to own file * move data related config helpers to own file * add a variant of RandomResizeCrop that randomizes interpolation method * remove old Numpy version of RandomErasing * cleanup torch version of RandomErasing and use it in either GPU loader batch mode or single image cpu Transform 6 years ago			`interpolation='random',`
Improve creation of data pipeline with prefetch enabled vs disabled, fixup inception_res_v2 and dpn models 6 years ago			`random_erasing=0.4,`
Mixup and prefetcher improvements * Do mixup in custom collate fn if prefetcher enabled, reduces performance impact * Move mixup code to own file * Add arg to disable prefetcher * Fix no cuda transfer when prefetcher off * Random erasing when prefetcher off wasn't changed to match new args, fixed * Default random erasing to off (prob = 0.) for train 6 years ago			`random_erasing_mode='const',`
Improve creation of data pipeline with prefetch enabled vs disabled, fixup inception_res_v2 and dpn models 6 years ago			`use_prefetcher=False,`
			`mean=IMAGENET_DEFAULT_MEAN,`
			`std=IMAGENET_DEFAULT_STD`
			`):`
Add some Nvidia performance enhancements (prefetch loader, fast collate), and refactor some of training and model fact/transforms 6 years ago
			`tfl = [`
Some transform/data/loader refactoring, hopefully didn't break things * factor out data related constants to own file * move data related config helpers to own file * add a variant of RandomResizeCrop that randomizes interpolation method * remove old Numpy version of RandomErasing * cleanup torch version of RandomErasing and use it in either GPU loader batch mode or single image cpu Transform 6 years ago			`RandomResizedCropAndInterpolation(`
			`img_size, scale=scale, interpolation=interpolation),`
Add some Nvidia performance enhancements (prefetch loader, fast collate), and refactor some of training and model fact/transforms 6 years ago			`transforms.RandomHorizontalFlip(),`
			`transforms.ColorJitter(*color_jitter),`
			`]`
Improve creation of data pipeline with prefetch enabled vs disabled, fixup inception_res_v2 and dpn models 6 years ago
			`if use_prefetcher:`
			`# prefetcher and collate will handle tensor conversion and norm`
			`tfl += [ToNumpy()]`
			`else:`
			`tfl += [`
Some transform/data/loader refactoring, hopefully didn't break things * factor out data related constants to own file * move data related config helpers to own file * add a variant of RandomResizeCrop that randomizes interpolation method * remove old Numpy version of RandomErasing * cleanup torch version of RandomErasing and use it in either GPU loader batch mode or single image cpu Transform 6 years ago			`transforms.ToTensor(),`
Improve creation of data pipeline with prefetch enabled vs disabled, fixup inception_res_v2 and dpn models 6 years ago			`transforms.Normalize(`
Uniform pretrained model handling. * All models have 'default_cfgs' dict * load/resume/pretrained helpers factored out * pretrained load operates on state_dict based on default_cfg * test all models in validate * schedule, optim factor factored out * test time pool wrapper applied based on default_cfg 6 years ago			`mean=torch.tensor(mean),`
			`std=torch.tensor(std))`
Improve creation of data pipeline with prefetch enabled vs disabled, fixup inception_res_v2 and dpn models 6 years ago			`]`
			`if random_erasing > 0.:`
Mixup and prefetcher improvements * Do mixup in custom collate fn if prefetcher enabled, reduces performance impact * Move mixup code to own file * Add arg to disable prefetcher * Fix no cuda transfer when prefetcher off * Random erasing when prefetcher off wasn't changed to match new args, fixed * Default random erasing to off (prob = 0.) for train 6 years ago			`tfl.append(RandomErasing(random_erasing, mode=random_erasing_mode, device='cpu'))`
Add some Nvidia performance enhancements (prefetch loader, fast collate), and refactor some of training and model fact/transforms 6 years ago			`return transforms.Compose(tfl)`


Improve creation of data pipeline with prefetch enabled vs disabled, fixup inception_res_v2 and dpn models 6 years ago			`def transforms_imagenet_eval(`
			`img_size=224,`
			`crop_pct=None,`
Add per model crop pct, interpolation defaults, tie it all together * create one resolve fn to pull together model defaults + cmd line args * update attribution comments in some models * test update train/validation/inference scripts 6 years ago			`interpolation='bilinear',`
Improve creation of data pipeline with prefetch enabled vs disabled, fixup inception_res_v2 and dpn models 6 years ago			`use_prefetcher=False,`
			`mean=IMAGENET_DEFAULT_MEAN,`
			`std=IMAGENET_DEFAULT_STD):`
Add some Nvidia performance enhancements (prefetch loader, fast collate), and refactor some of training and model fact/transforms 6 years ago			`crop_pct = crop_pct or DEFAULT_CROP_PCT`
Add per model crop pct, interpolation defaults, tie it all together * create one resolve fn to pull together model defaults + cmd line args * update attribution comments in some models * test update train/validation/inference scripts 6 years ago
			`if isinstance(img_size, tuple):`
			`assert len(img_size) == 2`
Fix small bug in seresnet input size and eval transform handling of img size 6 years ago			`if img_size[-1] == img_size[-2]:`
Add per model crop pct, interpolation defaults, tie it all together * create one resolve fn to pull together model defaults + cmd line args * update attribution comments in some models * test update train/validation/inference scripts 6 years ago			`# fall-back to older behaviour so Resize scales to shortest edge if target is square`
			`scale_size = int(math.floor(img_size[0] / crop_pct))`
			`else:`
Fix small bug in seresnet input size and eval transform handling of img size 6 years ago			`scale_size = tuple([int(x / crop_pct) for x in img_size])`
Add per model crop pct, interpolation defaults, tie it all together * create one resolve fn to pull together model defaults + cmd line args * update attribution comments in some models * test update train/validation/inference scripts 6 years ago			`else:`
			`scale_size = int(math.floor(img_size / crop_pct))`
Add some Nvidia performance enhancements (prefetch loader, fast collate), and refactor some of training and model fact/transforms 6 years ago
Improve creation of data pipeline with prefetch enabled vs disabled, fixup inception_res_v2 and dpn models 6 years ago			`tfl = [`
Add seresnet26_32x4d cfg and weights + interpolation str->PIL enum fn 6 years ago			`transforms.Resize(scale_size, _pil_interp(interpolation)),`
Add some Nvidia performance enhancements (prefetch loader, fast collate), and refactor some of training and model fact/transforms 6 years ago			`transforms.CenterCrop(img_size),`
Improve creation of data pipeline with prefetch enabled vs disabled, fixup inception_res_v2 and dpn models 6 years ago			`]`
			`if use_prefetcher:`
			`# prefetcher and collate will handle tensor conversion and norm`
			`tfl += [ToNumpy()]`
			`else:`
			`tfl += [`
			`transforms.ToTensor(),`
			`transforms.Normalize(`
			`mean=torch.tensor(mean),`
			`std=torch.tensor(std))`
			`]`

			`return transforms.Compose(tfl)`