pytorch-image-models/timm/data/loader.py

""" Loader Factory, Fast Collate, CUDA Prefetcher

Prefetcher and Fast Collate inspired by NVIDIA APEX example at
https://github.com/NVIDIA/apex/commit/d5e2bb4bdeedd27b1dfaf5bb2b24d6c000dee9be#diff-cf86c282ff7fba81fad27a559379d5bf

Hacked together by / Copyright 2020 Ross Wightman
"""

from typing import Tuple, Optional, Union, Callable

import torch.utils.data
import numpy as np

from timm.bits import DeviceEnv
from .collate import fast_collate
from .config import PreprocessCfg, AugCfg, MixupCfg
from .distributed_sampler import OrderedDistributedSampler
from .fetcher import Fetcher
from .mixup import FastCollateMixup
from .prefetcher_cuda import PrefetcherCuda


def _worker_init(worker_id):
    worker_info = torch.utils.data.get_worker_info()
    assert worker_info.id == worker_id
    np.random.seed(worker_info.seed % (2**32-1))


def create_loader_v2(
        dataset: torch.utils.data.Dataset,
        batch_size: int,
        is_training: bool = False,
        dev_env: Optional[DeviceEnv] = None,
        normalize=True,
        pp_cfg: PreprocessCfg = PreprocessCfg(),
        mix_cfg: MixupCfg = None,
        num_workers: int = 1,
        collate_fn: Optional[Callable] = None,
        pin_memory: bool = False,
        use_multi_epochs_loader: bool = False,
        persistent_workers: bool = True,
):
    """
    
    Args:
        dataset: 
        batch_size: 
        is_training: 
        dev_env: 
        normalize: 
        pp_cfg: 
        mix_cfg: 
        num_workers: 
        collate_fn: 
        pin_memory: 
        use_multi_epochs_loader: 
        persistent_workers: 

    Returns:

    """
    if dev_env is None:
        dev_env = DeviceEnv.instance()

    sampler = None
    if dev_env.distributed and not isinstance(dataset, torch.utils.data.IterableDataset):
        if is_training:
            sampler = torch.utils.data.distributed.DistributedSampler(
                dataset, num_replicas=dev_env.world_size, rank=dev_env.global_rank)
        else:
            # This will add extra duplicate entries to result in equal num
            # of samples per-process, will slightly alter validation results
            sampler = OrderedDistributedSampler(
                dataset, num_replicas=dev_env.world_size, rank=dev_env.global_rank)

    if collate_fn is None:
        if mix_cfg is not None and mix_cfg.prob > 0:
            collate_fn = FastCollateMixup(
                mixup_alpha=mix_cfg.mixup_alpha,
                cutmix_alpha=mix_cfg.cutmix_alpha,
                cutmix_minmax=mix_cfg.cutmix_minmax,
                prob=mix_cfg.prob,
                switch_prob=mix_cfg.switch_prob,
                mode=mix_cfg.mode,
                correct_lam=mix_cfg.correct_lam,
                label_smoothing=mix_cfg.label_smoothing,
                num_classes=mix_cfg.num_classes,
            )
        else:
            collate_fn = fast_collate

    loader_class = torch.utils.data.DataLoader
    if use_multi_epochs_loader:
        loader_class = MultiEpochsDataLoader

    loader_args = dict(
        batch_size=batch_size,
        shuffle=not isinstance(dataset, torch.utils.data.IterableDataset) and sampler is None and is_training,
        num_workers=num_workers,
        sampler=sampler,
        collate_fn=collate_fn,
        pin_memory=pin_memory,
        drop_last=is_training,
        worker_init_fn=_worker_init,
        persistent_workers=persistent_workers)
    try:
        loader = loader_class(dataset, **loader_args)
    except TypeError as e:
        loader_args.pop('persistent_workers')  # only in Pytorch 1.7+
        loader = loader_class(dataset, **loader_args)

    fetcher_kwargs = dict(
        normalize=normalize,
        mean=pp_cfg.mean,
        std=pp_cfg.std,
    )
    if normalize and is_training and pp_cfg.aug is not None:
        fetcher_kwargs.update(dict(
            re_prob=pp_cfg.aug.re_prob,
            re_mode=pp_cfg.aug.re_mode,
            re_count=pp_cfg.aug.re_count,
            num_aug_splits=pp_cfg.aug.num_aug_splits,
        ))

    if dev_env.type_cuda:
        loader = PrefetcherCuda(loader, **fetcher_kwargs)
    else:
        loader = Fetcher(loader, device=dev_env.device, **fetcher_kwargs)

    return loader


class MultiEpochsDataLoader(torch.utils.data.DataLoader):

    def __init__(self, *args, **kwargs):
        super().__init__(*args, **kwargs)
        self._DataLoader__initialized = False
        self.batch_sampler = _RepeatSampler(self.batch_sampler)
        self._DataLoader__initialized = True
        self.iterator = super().__iter__()

    def __len__(self):
        return len(self.batch_sampler.sampler)

    def __iter__(self):
        for i in range(len(self)):
            yield next(self.iterator)


class _RepeatSampler(object):
    """ Sampler that repeats forever.

    Args:
        sampler (Sampler)
    """

    def __init__(self, sampler):
        self.sampler = sampler

    def __iter__(self):
        while True:
            yield from iter(self.sampler)
Fix some attributions, add copyrights to some file docstrings 4 years ago			`""" Loader Factory, Fast Collate, CUDA Prefetcher`

			`Prefetcher and Fast Collate inspired by NVIDIA APEX example at`
			`https://github.com/NVIDIA/apex/commit/d5e2bb4bdeedd27b1dfaf5bb2b24d6c000dee9be#diff-cf86c282ff7fba81fad27a559379d5bf`

			`Hacked together by / Copyright 2020 Ross Wightman`
			`"""`

Transforms, augmentation work for bits, add RandomErasing support for XLA (pushing into transforms), revamp of transform/preproc config, etc ongoing... 3 years ago			`from typing import Tuple, Optional, Union, Callable`

Loader tweaks 6 years ago			`import torch.utils.data`
Working on an implementation of AugMix with JensenShannonDivergence loss that's compatible with my AutoAugment and RandAugment impl 5 years ago			`import numpy as np`

Add proper TrainState checkpoint save/load. Some reorg/refactoring and other cleanup. More to go... 4 years ago			`from timm.bits import DeviceEnv`
First timm.bits commit, add initial abstractions, WIP updates to train, val... some of it working 4 years ago			`from .collate import fast_collate`
Transforms, augmentation work for bits, add RandomErasing support for XLA (pushing into transforms), revamp of transform/preproc config, etc ongoing... 3 years ago			`from .config import PreprocessCfg, AugCfg, MixupCfg`
Big re-org, working towards making pip/module as 'timm' 5 years ago			`from .distributed_sampler import OrderedDistributedSampler`
Transforms, augmentation work for bits, add RandomErasing support for XLA (pushing into transforms), revamp of transform/preproc config, etc ongoing... 3 years ago			`from .fetcher import Fetcher`
			`from .mixup import FastCollateMixup`
			`from .prefetcher_cuda import PrefetcherCuda`
Mixup and prefetcher improvements * Do mixup in custom collate fn if prefetcher enabled, reduces performance impact * Move mixup code to own file * Add arg to disable prefetcher * Fix no cuda transfer when prefetcher off * Random erasing when prefetcher off wasn't changed to match new args, fixed * Default random erasing to off (prob = 0.) for train 6 years ago
Improve creation of data pipeline with prefetch enabled vs disabled, fixup inception_res_v2 and dpn models 6 years ago
Add worker_init_fn to loader for numpy seed per worker 3 years ago			`def _worker_init(worker_id):`
			`worker_info = torch.utils.data.get_worker_info()`
			`assert worker_info.id == worker_id`
			`np.random.seed(worker_info.seed % (2**32-1))`


Transforms, augmentation work for bits, add RandomErasing support for XLA (pushing into transforms), revamp of transform/preproc config, etc ongoing... 3 years ago			`def create_loader_v2(`
			`dataset: torch.utils.data.Dataset,`
			`batch_size: int,`
			`is_training: bool = False,`
			`dev_env: Optional[DeviceEnv] = None,`
			`normalize=True,`
			`pp_cfg: PreprocessCfg = PreprocessCfg(),`
			`mix_cfg: MixupCfg = None,`
			`num_workers: int = 1,`
			`collate_fn: Optional[Callable] = None,`
			`pin_memory: bool = False,`
			`use_multi_epochs_loader: bool = False,`
			`persistent_workers: bool = True,`
Add initial sotabench attempt. Split create_transform out of create_loader. Update requirements.txt 5 years ago			`):`
Transforms, augmentation work for bits, add RandomErasing support for XLA (pushing into transforms), revamp of transform/preproc config, etc ongoing... 3 years ago			`"""`

			`Args:`
			`dataset:`
			`batch_size:`
			`is_training:`
			`dev_env:`
			`normalize:`
			`pp_cfg:`
			`mix_cfg:`
			`num_workers:`
			`collate_fn:`
			`pin_memory:`
			`use_multi_epochs_loader:`
			`persistent_workers:`

			`Returns:`
Improve creation of data pipeline with prefetch enabled vs disabled, fixup inception_res_v2 and dpn models 6 years ago
Transforms, augmentation work for bits, add RandomErasing support for XLA (pushing into transforms), revamp of transform/preproc config, etc ongoing... 3 years ago			`"""`
First timm.bits commit, add initial abstractions, WIP updates to train, val... some of it working 4 years ago			`if dev_env is None:`
Add proper TrainState checkpoint save/load. Some reorg/refactoring and other cleanup. More to go... 4 years ago			`dev_env = DeviceEnv.instance()`
First timm.bits commit, add initial abstractions, WIP updates to train, val... some of it working 4 years ago
Distributed (multi-process) train, multi-gpu single process train, and NVIDIA AMP support 6 years ago			`sampler = None`
Major timm.bits update. Updater and DeviceEnv now dataclasses, after_step closure used, metrics base impl w/ distributed reduce, many tweaks/fixes. 4 years ago			`if dev_env.distributed and not isinstance(dataset, torch.utils.data.IterableDataset):`
Add distributed sampler that maintains order of original dataset (for validation) 6 years ago			`if is_training:`
Fix some bugs with XLA support, logger, add hacky xla dist launch script since torch.dist.launch doesn't work 4 years ago			`sampler = torch.utils.data.distributed.DistributedSampler(`
			`dataset, num_replicas=dev_env.world_size, rank=dev_env.global_rank)`
Add distributed sampler that maintains order of original dataset (for validation) 6 years ago			`else:`
			`# This will add extra duplicate entries to result in equal num`
			`# of samples per-process, will slightly alter validation results`
Transforms, augmentation work for bits, add RandomErasing support for XLA (pushing into transforms), revamp of transform/preproc config, etc ongoing... 3 years ago			`sampler = OrderedDistributedSampler(`
			`dataset, num_replicas=dev_env.world_size, rank=dev_env.global_rank)`
Distributed (multi-process) train, multi-gpu single process train, and NVIDIA AMP support 6 years ago
Mixup and prefetcher improvements * Do mixup in custom collate fn if prefetcher enabled, reduces performance impact * Move mixup code to own file * Add arg to disable prefetcher * Fix no cuda transfer when prefetcher off * Random erasing when prefetcher off wasn't changed to match new args, fixed * Default random erasing to off (prob = 0.) for train 6 years ago			`if collate_fn is None:`
Transforms, augmentation work for bits, add RandomErasing support for XLA (pushing into transforms), revamp of transform/preproc config, etc ongoing... 3 years ago			`if mix_cfg is not None and mix_cfg.prob > 0:`
			`collate_fn = FastCollateMixup(`
			`mixup_alpha=mix_cfg.mixup_alpha,`
			`cutmix_alpha=mix_cfg.cutmix_alpha,`
			`cutmix_minmax=mix_cfg.cutmix_minmax,`
			`prob=mix_cfg.prob,`
			`switch_prob=mix_cfg.switch_prob,`
			`mode=mix_cfg.mode,`
			`correct_lam=mix_cfg.correct_lam,`
			`label_smoothing=mix_cfg.label_smoothing,`
			`num_classes=mix_cfg.num_classes,`
			`)`
			`else:`
			`collate_fn = fast_collate`
Mixup and prefetcher improvements * Do mixup in custom collate fn if prefetcher enabled, reduces performance impact * Move mixup code to own file * Add arg to disable prefetcher * Fix no cuda transfer when prefetcher off * Random erasing when prefetcher off wasn't changed to match new args, fixed * Default random erasing to off (prob = 0.) for train 6 years ago
added MultiEpochsDataLoader 5 years ago			`loader_class = torch.utils.data.DataLoader`
			`if use_multi_epochs_loader:`
			`loader_class = MultiEpochsDataLoader`

More dataset work including factories and a tensorflow datasets (TFDS) wrapper * Add parser/dataset factory methods for more flexible dataset & parser creation * Add dataset parser that wraps TFDS image classification datasets * Tweak num_classes handling bug for 21k models * Add initial deit models so they can be benchmarked in next csv results runs 4 years ago			`loader_args = dict(`
Improve creation of data pipeline with prefetch enabled vs disabled, fixup inception_res_v2 and dpn models 6 years ago			`batch_size=batch_size,`
More dataset work including factories and a tensorflow datasets (TFDS) wrapper * Add parser/dataset factory methods for more flexible dataset & parser creation * Add dataset parser that wraps TFDS image classification datasets * Tweak num_classes handling bug for 21k models * Add initial deit models so they can be benchmarked in next csv results runs 4 years ago			`shuffle=not isinstance(dataset, torch.utils.data.IterableDataset) and sampler is None and is_training,`
Improve creation of data pipeline with prefetch enabled vs disabled, fixup inception_res_v2 and dpn models 6 years ago			`num_workers=num_workers,`
Distributed (multi-process) train, multi-gpu single process train, and NVIDIA AMP support 6 years ago			`sampler=sampler,`
Mixup and prefetcher improvements * Do mixup in custom collate fn if prefetcher enabled, reduces performance impact * Move mixup code to own file * Add arg to disable prefetcher * Fix no cuda transfer when prefetcher off * Random erasing when prefetcher off wasn't changed to match new args, fixed * Default random erasing to off (prob = 0.) for train 6 years ago			`collate_fn=collate_fn,`
Add --pin-mem arg to enable dataloader pin_memory (showing more benefit in some scenarios now), also add --torchscript arg to validate.py for testing models with jit.script 5 years ago			`pin_memory=pin_memory,`
Loader tweaks 6 years ago			`drop_last=is_training,`
Add worker_init_fn to loader for numpy seed per worker 3 years ago			`worker_init_fn=_worker_init,`
More dataset work including factories and a tensorflow datasets (TFDS) wrapper * Add parser/dataset factory methods for more flexible dataset & parser creation * Add dataset parser that wraps TFDS image classification datasets * Tweak num_classes handling bug for 21k models * Add initial deit models so they can be benchmarked in next csv results runs 4 years ago			`persistent_workers=persistent_workers)`
			`try:`
			`loader = loader_class(dataset, **loader_args)`
			`except TypeError as e:`
			`loader_args.pop('persistent_workers') # only in Pytorch 1.7+`
			`loader = loader_class(dataset, **loader_args)`
First timm.bits commit, add initial abstractions, WIP updates to train, val... some of it working 4 years ago
			`fetcher_kwargs = dict(`
Transforms, augmentation work for bits, add RandomErasing support for XLA (pushing into transforms), revamp of transform/preproc config, etc ongoing... 3 years ago			`normalize=normalize,`
			`mean=pp_cfg.mean,`
			`std=pp_cfg.std,`
First timm.bits commit, add initial abstractions, WIP updates to train, val... some of it working 4 years ago			`)`
Transforms, augmentation work for bits, add RandomErasing support for XLA (pushing into transforms), revamp of transform/preproc config, etc ongoing... 3 years ago			`if normalize and is_training and pp_cfg.aug is not None:`
			`fetcher_kwargs.update(dict(`
			`re_prob=pp_cfg.aug.re_prob,`
			`re_mode=pp_cfg.aug.re_mode,`
			`re_count=pp_cfg.aug.re_count,`
			`num_aug_splits=pp_cfg.aug.num_aug_splits,`
			`))`

Major timm.bits update. Updater and DeviceEnv now dataclasses, after_step closure used, metrics base impl w/ distributed reduce, many tweaks/fixes. 4 years ago			`if dev_env.type_cuda:`
First timm.bits commit, add initial abstractions, WIP updates to train, val... some of it working 4 years ago			`loader = PrefetcherCuda(loader, **fetcher_kwargs)`
			`else:`
			`loader = Fetcher(loader, device=dev_env.device, **fetcher_kwargs)`
Improve creation of data pipeline with prefetch enabled vs disabled, fixup inception_res_v2 and dpn models 6 years ago
			`return loader`
added MultiEpochsDataLoader 5 years ago

			`class MultiEpochsDataLoader(torch.utils.data.DataLoader):`

			`def __init__(self, args, *kwargs):`
			`super().__init__(args, *kwargs)`
			`self._DataLoader__initialized = False`
			`self.batch_sampler = _RepeatSampler(self.batch_sampler)`
			`self._DataLoader__initialized = True`
			`self.iterator = super().__iter__()`

			`def __len__(self):`
			`return len(self.batch_sampler.sampler)`

			`def __iter__(self):`
			`for i in range(len(self)):`
			`yield next(self.iterator)`


			`class _RepeatSampler(object):`
			`""" Sampler that repeats forever.`

			`Args:`
			`sampler (Sampler)`
			`"""`

			`def __init__(self, sampler):`
			`self.sampler = sampler`

			`def __iter__(self):`
			`while True:`
			`yield from iter(self.sampler)`