pytorch-image-models/timm/data/dataset.py

""" Quick n Simple Image Folder, Tarfile based DataSet

Hacked together by / Copyright 2020 Ross Wightman
"""
import torch.utils.data as data
import os
import torch
import logging

from PIL import Image

from .parsers import ParserImageFolder, ParserImageTar, ParserImageClassInTar

_logger = logging.getLogger(__name__)


_ERROR_RETRY = 50


class ImageDataset(data.Dataset):

    def __init__(
            self,
            root,
            parser=None,
            class_map='',
            load_bytes=False,
            transform=None,
    ):
        if parser is None:
            if os.path.isfile(root) and os.path.splitext(root)[1] == '.tar':
                parser = ParserImageTar(root, class_map=class_map)
            else:
                parser = ParserImageFolder(root, class_map=class_map)
        self.parser = parser
        self.load_bytes = load_bytes
        self.transform = transform
        self._consecutive_errors = 0

    def __getitem__(self, index):
        img, target = self.parser[index]
        try:
            img = img.read() if self.load_bytes else Image.open(img).convert('RGB')
        except Exception as e:
            _logger.warning(f'Skipped sample (index {index}, file {self.parser.filename(index)}). {str(e)}')
            self._consecutive_errors += 1
            if self._consecutive_errors < _ERROR_RETRY:
                return self.__getitem__((index + 1) % len(self.parser))
            else:
                raise e
        self._consecutive_errors = 0
        if self.transform is not None:
            img = self.transform(img)
        if target is None:
            target = torch.tensor(-1, dtype=torch.long)
        return img, target

    def __len__(self):
        return len(self.parser)

    def filename(self, index, basename=False, absolute=False):
        return self.parser.filename(index, basename, absolute)

    def filenames(self, basename=False, absolute=False):
        return self.parser.filenames(basename, absolute)


class AugMixDataset(torch.utils.data.Dataset):
    """Dataset wrapper to perform AugMix or other clean/augmentation mixes"""

    def __init__(self, dataset, num_splits=2):
        self.augmentation = None
        self.normalize = None
        self.dataset = dataset
        if self.dataset.transform is not None:
            self._set_transforms(self.dataset.transform)
        self.num_splits = num_splits

    def _set_transforms(self, x):
        assert isinstance(x, (list, tuple)) and len(x) == 3, 'Expecting a tuple/list of 3 transforms'
        self.dataset.transform = x[0]
        self.augmentation = x[1]
        self.normalize = x[2]

    @property
    def transform(self):
        return self.dataset.transform

    @transform.setter
    def transform(self, x):
        self._set_transforms(x)

    def _normalize(self, x):
        return x if self.normalize is None else self.normalize(x)

    def __getitem__(self, i):
        x, y = self.dataset[i]  # all splits share the same dataset base transform
        x_list = [self._normalize(x)]  # first split only normalizes (this is the 'clean' split)
        # run the full augmentation on the remaining splits
        for _ in range(self.num_splits - 1):
            x_list.append(self._normalize(self.augmentation(x)))
        return tuple(x_list), y

    def __len__(self):
        return len(self.dataset)
Fix some attributions, add copyrights to some file docstrings 4 years ago			`""" Quick n Simple Image Folder, Tarfile based DataSet`

			`Hacked together by / Copyright 2020 Ross Wightman`
			`"""`
Initial commit, puting some ol pieces together 6 years ago			`import torch.utils.data as data`
			`import os`
			`import torch`
More work on dataset / parser split and imagenet21k (tar) support 4 years ago			`import logging`
Initial commit, puting some ol pieces together 6 years ago
More work on dataset / parser split and imagenet21k (tar) support 4 years ago			`from PIL import Image`

			`from .parsers import ParserImageFolder, ParserImageTar, ParserImageClassInTar`

			`_logger = logging.getLogger(__name__)`


			`_ERROR_RETRY = 50`
Add support to Dataset for class id mapping file, clean up a bit of old logic. Add results file arg for validation and update script. 5 years ago

Initial commit for dataset / parser reorg to support additional datasets / types 4 years ago			`class ImageDataset(data.Dataset):`
Initial commit, puting some ol pieces together 6 years ago
			`def __init__(`
			`self,`
More work on dataset / parser split and imagenet21k (tar) support 4 years ago			`root,`
Initial commit for dataset / parser reorg to support additional datasets / types 4 years ago			`parser=None,`
			`class_map='',`
Ported Tensorflow pretrained EfficientNet weights and some model cleanup * B0-B3 weights ported from TF with close to paper accuracy * Renamed gen_mobilenet to gen_efficientnet since scaling params go well beyond 'mobile' specific * Add Tensorflow preprocessing option for closer images to source repo 6 years ago			`load_bytes=False,`
Add support to Dataset for class id mapping file, clean up a bit of old logic. Add results file arg for validation and update script. 5 years ago			`transform=None,`
Initial commit for dataset / parser reorg to support additional datasets / types 4 years ago			`):`
			`if parser is None:`
More work on dataset / parser split and imagenet21k (tar) support 4 years ago			`if os.path.isfile(root) and os.path.splitext(root)[1] == '.tar':`
			`parser = ParserImageTar(root, class_map=class_map)`
Initial commit for dataset / parser reorg to support additional datasets / types 4 years ago			`else:`
More work on dataset / parser split and imagenet21k (tar) support 4 years ago			`parser = ParserImageFolder(root, class_map=class_map)`
Initial commit for dataset / parser reorg to support additional datasets / types 4 years ago			`self.parser = parser`
Ported Tensorflow pretrained EfficientNet weights and some model cleanup * B0-B3 weights ported from TF with close to paper accuracy * Renamed gen_mobilenet to gen_efficientnet since scaling params go well beyond 'mobile' specific * Add Tensorflow preprocessing option for closer images to source repo 6 years ago			`self.load_bytes = load_bytes`
Initial commit, puting some ol pieces together 6 years ago			`self.transform = transform`
More work on dataset / parser split and imagenet21k (tar) support 4 years ago			`self._consecutive_errors = 0`
Initial commit, puting some ol pieces together 6 years ago
			`def __getitem__(self, index):`
Initial commit for dataset / parser reorg to support additional datasets / types 4 years ago			`img, target = self.parser[index]`
More work on dataset / parser split and imagenet21k (tar) support 4 years ago			`try:`
			`img = img.read() if self.load_bytes else Image.open(img).convert('RGB')`
			`except Exception as e:`
			`_logger.warning(f'Skipped sample (index {index}, file {self.parser.filename(index)}). {str(e)}')`
			`self._consecutive_errors += 1`
			`if self._consecutive_errors < _ERROR_RETRY:`
			`return self.__getitem__((index + 1) % len(self.parser))`
			`else:`
			`raise e`
			`self._consecutive_errors = 0`
Improve creation of data pipeline with prefetch enabled vs disabled, fixup inception_res_v2 and dpn models 6 years ago			`if self.transform is not None:`
			`img = self.transform(img)`
Initial commit, puting some ol pieces together 6 years ago			`if target is None:`
Initial commit for dataset / parser reorg to support additional datasets / types 4 years ago			`target = torch.tensor(-1, dtype=torch.long)`
Initial commit, puting some ol pieces together 6 years ago			`return img, target`

			`def __len__(self):`
Initial commit for dataset / parser reorg to support additional datasets / types 4 years ago			`return len(self.parser)`
Initial commit, puting some ol pieces together 6 years ago
More models in sotabench, more control over sotabench run, dataset filename extraction consistency 4 years ago			`def filename(self, index, basename=False, absolute=False):`
Initial commit for dataset / parser reorg to support additional datasets / types 4 years ago			`return self.parser.filename(index, basename, absolute)`
More models in sotabench, more control over sotabench run, dataset filename extraction consistency 4 years ago
			`def filenames(self, basename=False, absolute=False):`
Initial commit for dataset / parser reorg to support additional datasets / types 4 years ago			`return self.parser.filenames(basename, absolute)`
More models in sotabench, more control over sotabench run, dataset filename extraction consistency 4 years ago
Working on an implementation of AugMix with JensenShannonDivergence loss that's compatible with my AutoAugment and RandAugment impl 5 years ago
			`class AugMixDataset(torch.utils.data.Dataset):`
			`"""Dataset wrapper to perform AugMix or other clean/augmentation mixes"""`

Add SplitBatchNorm. AugMix, Rand/AutoAugment, Split (Aux) BatchNorm, Jensen-Shannon Divergence, RandomErasing all working together 5 years ago			`def __init__(self, dataset, num_splits=2):`
Working on an implementation of AugMix with JensenShannonDivergence loss that's compatible with my AutoAugment and RandAugment impl 5 years ago			`self.augmentation = None`
			`self.normalize = None`
			`self.dataset = dataset`
			`if self.dataset.transform is not None:`
			`self._set_transforms(self.dataset.transform)`
Add SplitBatchNorm. AugMix, Rand/AutoAugment, Split (Aux) BatchNorm, Jensen-Shannon Divergence, RandomErasing all working together 5 years ago			`self.num_splits = num_splits`
Working on an implementation of AugMix with JensenShannonDivergence loss that's compatible with my AutoAugment and RandAugment impl 5 years ago
			`def _set_transforms(self, x):`
			`assert isinstance(x, (list, tuple)) and len(x) == 3, 'Expecting a tuple/list of 3 transforms'`
			`self.dataset.transform = x[0]`
			`self.augmentation = x[1]`
			`self.normalize = x[2]`

			`@property`
			`def transform(self):`
			`return self.dataset.transform`

			`@transform.setter`
			`def transform(self, x):`
			`self._set_transforms(x)`

			`def _normalize(self, x):`
			`return x if self.normalize is None else self.normalize(x)`

			`def __getitem__(self, i):`
Add SplitBatchNorm. AugMix, Rand/AutoAugment, Split (Aux) BatchNorm, Jensen-Shannon Divergence, RandomErasing all working together 5 years ago			`x, y = self.dataset[i] # all splits share the same dataset base transform`
			`x_list = [self._normalize(x)] # first split only normalizes (this is the 'clean' split)`
			`# run the full augmentation on the remaining splits`
			`for _ in range(self.num_splits - 1):`
Working on an implementation of AugMix with JensenShannonDivergence loss that's compatible with my AutoAugment and RandAugment impl 5 years ago			`x_list.append(self._normalize(self.augmentation(x)))`
			`return tuple(x_list), y`

			`def __len__(self):`
			`return len(self.dataset)`