From 6c17d57a2c4c94fb0a1b6a6f66a64bf4e0264400 Mon Sep 17 00:00:00 2001
From: Ross Wightman <rwightman@gmail.com>
Date: Mon, 27 Jul 2020 13:44:56 -0700
Subject: [PATCH] Fix some attributions, add copyrights to some file docstrings

---
 avg_checkpoints.py                         |  2 +-
 clean_checkpoint.py                        |  2 +-
 inference.py                               |  2 +-
 timm/data/auto_augment.py                  |  2 +-
 timm/data/dataset.py                       |  4 ++++
 timm/data/loader.py                        |  8 ++++++++
 timm/data/mixup.py                         |  5 +++++
 timm/data/random_erasing.py                |  7 +++++++
 timm/data/real_labels.py                   |  6 ++++++
 timm/data/tf_preprocessing.py              |  9 +++++++++
 timm/data/transforms_factory.py            |  2 ++
 timm/loss/jsd.py                           |  2 +-
 timm/models/dpn.py                         |  5 +++--
 timm/models/efficientnet.py                |  2 +-
 timm/models/efficientnet_blocks.py         |  5 +++++
 timm/models/efficientnet_builder.py        |  8 ++++++++
 timm/models/features.py                    |  5 ++++-
 timm/models/gluon_xception.py              |  2 +-
 timm/models/inception_v3.py                |  5 +++++
 timm/models/layers/activations.py          |  2 +-
 timm/models/layers/activations_jit.py      |  2 +-
 timm/models/layers/activations_me.py       |  2 +-
 timm/models/layers/adaptive_avgmax_pool.py |  2 +-
 timm/models/layers/cbam.py                 |  2 +-
 timm/models/layers/cond_conv2d.py          |  2 +-
 timm/models/layers/conv2d_same.py          |  2 +-
 timm/models/layers/conv_bn_act.py          |  2 +-
 timm/models/layers/create_act.py           |  3 +++
 timm/models/layers/create_attn.py          |  2 +-
 timm/models/layers/create_conv2d.py        |  2 +-
 timm/models/layers/create_norm_act.py      |  8 ++++++++
 timm/models/layers/drop.py                 |  2 +-
 timm/models/layers/evo_norm.py             |  2 +-
 timm/models/layers/helpers.py              |  2 +-
 timm/models/layers/median_pool.py          | 13 +++++++------
 timm/models/layers/mixed_conv2d.py         |  2 +-
 timm/models/layers/padding.py              |  2 +-
 timm/models/layers/pool2d_same.py          |  2 +-
 timm/models/layers/selective_kernel.py     |  2 +-
 timm/models/layers/separable_conv.py       |  7 +++++++
 timm/models/layers/split_batchnorm.py      |  2 +-
 timm/models/layers/test_time_pool.py       |  2 +-
 timm/models/mobilenetv3.py                 |  2 +-
 timm/models/registry.py                    |  4 ++++
 timm/models/regnet.py                      |  1 +
 timm/models/resnet.py                      |  1 +
 timm/models/rexnet.py                      |  1 +
 timm/models/sknet.py                       |  2 +-
 timm/models/vovnet.py                      |  2 +-
 timm/models/xception_aligned.py            |  2 +-
 timm/optim/lookahead.py                    |  2 ++
 timm/optim/optim_factory.py                |  3 +++
 timm/optim/rmsprop_tf.py                   | 16 +++++++++++++++-
 timm/scheduler/cosine_lr.py                |  6 ++++++
 timm/scheduler/plateau_lr.py               |  6 ++++++
 timm/scheduler/scheduler_factory.py        |  3 +++
 timm/scheduler/step_lr.py                  |  6 ++++++
 timm/scheduler/tanh_lr.py                  |  6 ++++++
 timm/utils.py                              |  5 +++++
 train.py                                   |  2 +-
 validate.py                                |  2 +-
 61 files changed, 182 insertions(+), 42 deletions(-)

diff --git a/avg_checkpoints.py b/avg_checkpoints.py
index 99b0ab2f..feeac8af 100755
--- a/avg_checkpoints.py
+++ b/avg_checkpoints.py
@@ -9,7 +9,7 @@ For any hope of decent results, the checkpoints should be from the same or child
 EMA (exponential moving average) of the model weights or performing SWA (stochastic
 weight averaging), but post-training.
 
-Hacked together by Ross Wightman (https://github.com/rwightman)
+Hacked together by / Copyright 2020 Ross Wightman (https://github.com/rwightman)
 """
 import torch
 import argparse
diff --git a/clean_checkpoint.py b/clean_checkpoint.py
index bc86f2ac..af67f3b9 100755
--- a/clean_checkpoint.py
+++ b/clean_checkpoint.py
@@ -5,7 +5,7 @@ Takes training checkpoints with GPU tensors, optimizer state, extra dict keys, e
 and outputs a CPU  tensor checkpoint with only the `state_dict` along with SHA256
 calculation for model zoo compatibility.
 
-Hacked together by Ross Wightman (https://github.com/rwightman)
+Hacked together by / Copyright 2020 Ross Wightman (https://github.com/rwightman)
 """
 import torch
 import argparse
diff --git a/inference.py b/inference.py
index 3ee994a6..3d89c71f 100755
--- a/inference.py
+++ b/inference.py
@@ -3,7 +3,7 @@
 
 An example inference script that outputs top-k class ids for images in a folder into a csv.
 
-Hacked together by Ross Wightman (https://github.com/rwightman)
+Hacked together by / Copyright 2020 Ross Wightman (https://github.com/rwightman)
 """
 import os
 import time
diff --git a/timm/data/auto_augment.py b/timm/data/auto_augment.py
index e355eef5..cbf5464d 100644
--- a/timm/data/auto_augment.py
+++ b/timm/data/auto_augment.py
@@ -15,7 +15,7 @@ Papers:
     RandAugment: Practical automated data augmentation... - https://arxiv.org/abs/1909.13719
     AugMix: A Simple Data Processing Method to Improve Robustness and Uncertainty - https://arxiv.org/abs/1912.02781
 
-Hacked together by Ross Wightman
+Hacked together by / Copyright 2020 Ross Wightman
 """
 import random
 import math
diff --git a/timm/data/dataset.py b/timm/data/dataset.py
index 2ce79e7e..5c11078d 100644
--- a/timm/data/dataset.py
+++ b/timm/data/dataset.py
@@ -1,3 +1,7 @@
+""" Quick n Simple Image Folder, Tarfile based DataSet
+
+Hacked together by / Copyright 2020 Ross Wightman
+"""
 from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
diff --git a/timm/data/loader.py b/timm/data/loader.py
index 4dc0d697..8a5f38af 100644
--- a/timm/data/loader.py
+++ b/timm/data/loader.py
@@ -1,3 +1,11 @@
+""" Loader Factory, Fast Collate, CUDA Prefetcher
+
+Prefetcher and Fast Collate inspired by NVIDIA APEX example at
+https://github.com/NVIDIA/apex/commit/d5e2bb4bdeedd27b1dfaf5bb2b24d6c000dee9be#diff-cf86c282ff7fba81fad27a559379d5bf
+
+Hacked together by / Copyright 2020 Ross Wightman
+"""
+
 import torch.utils.data
 import numpy as np
 
diff --git a/timm/data/mixup.py b/timm/data/mixup.py
index 4678472d..4fc3d97c 100644
--- a/timm/data/mixup.py
+++ b/timm/data/mixup.py
@@ -1,3 +1,8 @@
+""" Mixup
+Paper: `mixup: Beyond Empirical Risk Minimization` - https://arxiv.org/abs/1710.09412
+
+Hacked together by / Copyright 2020 Ross Wightman
+"""
 import numpy as np
 import torch
 
diff --git a/timm/data/random_erasing.py b/timm/data/random_erasing.py
index 589b2f0b..78967d10 100644
--- a/timm/data/random_erasing.py
+++ b/timm/data/random_erasing.py
@@ -1,3 +1,10 @@
+""" Random Erasing (Cutout)
+
+Originally inspired by impl at https://github.com/zhunzhong07/Random-Erasing, Apache 2.0
+Copyright Zhun Zhong & Liang Zheng
+
+Hacked together by / Copyright 2020 Ross Wightman
+"""
 import random
 import math
 import torch
diff --git a/timm/data/real_labels.py b/timm/data/real_labels.py
index be82e5e0..a90be0c3 100644
--- a/timm/data/real_labels.py
+++ b/timm/data/real_labels.py
@@ -1,3 +1,9 @@
+""" Real labels evaluator for ImageNet
+Paper: `Are we done with ImageNet?` - https://arxiv.org/abs/2006.07159
+Based on Numpy example at https://github.com/google-research/reassessed-imagenet
+
+Hacked together by / Copyright 2020 Ross Wightman
+"""
 import os
 import json
 import numpy as np
diff --git a/timm/data/tf_preprocessing.py b/timm/data/tf_preprocessing.py
index 61dc78e3..899cf364 100644
--- a/timm/data/tf_preprocessing.py
+++ b/timm/data/tf_preprocessing.py
@@ -1,3 +1,12 @@
+""" Tensorflow Preprocessing Adapter
+
+Allows use of Tensorflow preprocessing pipeline in PyTorch Transform
+
+Copyright of original Tensorflow code below.
+
+Hacked together by / Copyright 2020 Ross Wightman
+"""
+
 # Copyright 2018 The TensorFlow Authors. All Rights Reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
diff --git a/timm/data/transforms_factory.py b/timm/data/transforms_factory.py
index 14aff454..fd987c85 100644
--- a/timm/data/transforms_factory.py
+++ b/timm/data/transforms_factory.py
@@ -1,5 +1,7 @@
 """ Transforms Factory
 Factory methods for building image transforms for use with TIMM (PyTorch Image Models)
+
+Hacked together by / Copyright 2020 Ross Wightman
 """
 import math
 
diff --git a/timm/loss/jsd.py b/timm/loss/jsd.py
index 0f8eb696..dd64e156 100644
--- a/timm/loss/jsd.py
+++ b/timm/loss/jsd.py
@@ -12,7 +12,7 @@ class JsdCrossEntropy(nn.Module):
     From paper: 'AugMix: A Simple Data Processing Method to Improve Robustness and Uncertainty -
     https://arxiv.org/abs/1912.02781
 
-    Hacked together by Ross Wightman
+    Hacked together by / Copyright 2020 Ross Wightman
     """
     def __init__(self, num_splits=3, alpha=12, smoothing=0.1):
         super().__init__()
diff --git a/timm/models/dpn.py b/timm/models/dpn.py
index 149ffad4..a0a77ab5 100644
--- a/timm/models/dpn.py
+++ b/timm/models/dpn.py
@@ -2,8 +2,9 @@
 Based on original MXNet implementation https://github.com/cypw/DPNs with
 many ideas from another PyTorch implementation https://github.com/oyam/pytorch-DPNs.
 
-This implementation is compatible with the pretrained weights
-from cypw's MXNet implementation.
+This implementation is compatible with the pretrained weights from cypw's MXNet implementation.
+
+Hacked together by / Copyright 2020 Ross Wightman
 """
 from __future__ import absolute_import
 from __future__ import division
diff --git a/timm/models/efficientnet.py b/timm/models/efficientnet.py
index 08d1df7e..0907b29b 100644
--- a/timm/models/efficientnet.py
+++ b/timm/models/efficientnet.py
@@ -22,7 +22,7 @@ An implementation of EfficienNet that covers variety of related models with effi
 
 * And likely more...
 
-Hacked together by Ross Wightman
+Hacked together by / Copyright 2020 Ross Wightman
 """
 import torch
 import torch.nn as nn
diff --git a/timm/models/efficientnet_blocks.py b/timm/models/efficientnet_blocks.py
index 98758abf..d5fdce79 100644
--- a/timm/models/efficientnet_blocks.py
+++ b/timm/models/efficientnet_blocks.py
@@ -1,3 +1,8 @@
+""" EfficientNet, MobileNetV3, etc Blocks
+
+Hacked together by / Copyright 2020 Ross Wightman
+"""
+
 import torch
 import torch.nn as nn
 from torch.nn import functional as F
diff --git a/timm/models/efficientnet_builder.py b/timm/models/efficientnet_builder.py
index 9e5f3b94..e7e47ce8 100644
--- a/timm/models/efficientnet_builder.py
+++ b/timm/models/efficientnet_builder.py
@@ -1,3 +1,11 @@
+""" EfficientNet, MobileNetV3, etc Builder
+
+Assembles EfficieNet and related network feature blocks from string definitions.
+Handles stride, dilation calculations, and selects feature extraction points.
+
+Hacked together by / Copyright 2020 Ross Wightman
+"""
+
 import logging
 import math
 import re
diff --git a/timm/models/features.py b/timm/models/features.py
index 7329851c..b1d6890f 100644
--- a/timm/models/features.py
+++ b/timm/models/features.py
@@ -3,7 +3,10 @@
 A collection of classes, functions, modules to help extract features from models
 and provide a common interface for describing them.
 
-Hacked together by Ross Wightman
+The return_layers, module re-writing idea inspired by torchvision IntermediateLayerGetter
+https://github.com/pytorch/vision/blob/d88d8961ae51507d0cb680329d985b1488b1b76b/torchvision/models/_utils.py
+
+Hacked together by / Copyright 2020 Ross Wightman
 """
 from collections import OrderedDict, defaultdict
 from copy import deepcopy
diff --git a/timm/models/gluon_xception.py b/timm/models/gluon_xception.py
index aaf5fc1f..8e7eb99f 100644
--- a/timm/models/gluon_xception.py
+++ b/timm/models/gluon_xception.py
@@ -4,7 +4,7 @@ This is a port of the Gluon Xception code and weights, itself ported from a PyTo
 Gluon model: (https://gluon-cv.mxnet.io/_modules/gluoncv/model_zoo/xception.html)
 Original PyTorch DeepLab impl: https://github.com/jfzhang95/pytorch-deeplab-xception
 
-Hacked together by Ross Wightman
+Hacked together by / Copyright 2020 Ross Wightman
 """
 from collections import OrderedDict
 
diff --git a/timm/models/inception_v3.py b/timm/models/inception_v3.py
index aa16cf06..fd7852bd 100644
--- a/timm/models/inception_v3.py
+++ b/timm/models/inception_v3.py
@@ -1,3 +1,8 @@
+""" Inception-V3
+
+Originally from torchvision Inception3 model
+Licensed BSD-Clause 3 https://github.com/pytorch/vision/blob/master/LICENSE
+"""
 import torch
 import torch.nn as nn
 import torch.nn.functional as F
diff --git a/timm/models/layers/activations.py b/timm/models/layers/activations.py
index 71904935..edb2074f 100644
--- a/timm/models/layers/activations.py
+++ b/timm/models/layers/activations.py
@@ -3,7 +3,7 @@
 A collection of activations fn and modules with a common interface so that they can
 easily be swapped. All have an `inplace` arg even if not used.
 
-Hacked together by Ross Wightman
+Hacked together by / Copyright 2020 Ross Wightman
 """
 
 import torch
diff --git a/timm/models/layers/activations_jit.py b/timm/models/layers/activations_jit.py
index dd3277fa..b4a51653 100644
--- a/timm/models/layers/activations_jit.py
+++ b/timm/models/layers/activations_jit.py
@@ -7,7 +7,7 @@ All jit scripted activations are lacking in-place variations on purpose, scripte
 currently work across in-place op boundaries, thus performance is equal to or less than the non-scripted
 versions if they contain in-place ops.
 
-Hacked together by Ross Wightman
+Hacked together by / Copyright 2020 Ross Wightman
 """
 
 import torch
diff --git a/timm/models/layers/activations_me.py b/timm/models/layers/activations_me.py
index b81f7165..0441f7c4 100644
--- a/timm/models/layers/activations_me.py
+++ b/timm/models/layers/activations_me.py
@@ -6,7 +6,7 @@ easily be swapped. All have an `inplace` arg even if not used.
 These activations are not compatible with jit scripting or ONNX export of the model, please use either
 the JIT or basic versions of the activations.
 
-Hacked together by Ross Wightman
+Hacked together by / Copyright 2020 Ross Wightman
 """
 
 import torch
diff --git a/timm/models/layers/adaptive_avgmax_pool.py b/timm/models/layers/adaptive_avgmax_pool.py
index 5838981c..c3d823e1 100644
--- a/timm/models/layers/adaptive_avgmax_pool.py
+++ b/timm/models/layers/adaptive_avgmax_pool.py
@@ -7,7 +7,7 @@ Adaptive pooling with the ability to select the type of pooling from:
 
 Both a functional and a nn.Module version of the pooling is provided.
 
-Author: Ross Wightman (rwightman)
+Hacked together by / Copyright 2020 Ross Wightman
 """
 import torch
 import torch.nn as nn
diff --git a/timm/models/layers/cbam.py b/timm/models/layers/cbam.py
index 81c0b6b3..600d51fa 100644
--- a/timm/models/layers/cbam.py
+++ b/timm/models/layers/cbam.py
@@ -5,7 +5,7 @@ Experimental impl of CBAM: Convolutional Block Attention Module: https://arxiv.o
 WARNING: Results with these attention layers have been mixed. They can significantly reduce performance on
 some tasks, especially fine-grained it seems. I may end up removing this impl.
 
-Hacked together by Ross Wightman
+Hacked together by / Copyright 2020 Ross Wightman
 """
 
 import torch
diff --git a/timm/models/layers/cond_conv2d.py b/timm/models/layers/cond_conv2d.py
index df98f71a..175292b7 100644
--- a/timm/models/layers/cond_conv2d.py
+++ b/timm/models/layers/cond_conv2d.py
@@ -3,7 +3,7 @@
 Paper: CondConv: Conditionally Parameterized Convolutions for Efficient Inference
 (https://arxiv.org/abs/1904.04971)
 
-Hacked together by Ross Wightman
+Hacked together by / Copyright 2020 Ross Wightman
 """
 
 import math
diff --git a/timm/models/layers/conv2d_same.py b/timm/models/layers/conv2d_same.py
index 06f08b4e..75f0f98d 100644
--- a/timm/models/layers/conv2d_same.py
+++ b/timm/models/layers/conv2d_same.py
@@ -1,6 +1,6 @@
 """ Conv2d w/ Same Padding
 
-Hacked together by Ross Wightman
+Hacked together by / Copyright 2020 Ross Wightman
 """
 import torch
 import torch.nn as nn
diff --git a/timm/models/layers/conv_bn_act.py b/timm/models/layers/conv_bn_act.py
index ea9d0128..90735357 100644
--- a/timm/models/layers/conv_bn_act.py
+++ b/timm/models/layers/conv_bn_act.py
@@ -1,6 +1,6 @@
 """ Conv2d + BN + Act
 
-Hacked together by Ross Wightman
+Hacked together by / Copyright 2020 Ross Wightman
 """
 from torch import nn as nn
 
diff --git a/timm/models/layers/create_act.py b/timm/models/layers/create_act.py
index 6404d62f..a0eec2ad 100644
--- a/timm/models/layers/create_act.py
+++ b/timm/models/layers/create_act.py
@@ -1,3 +1,6 @@
+""" Activation Factory
+Hacked together by / Copyright 2020 Ross Wightman
+"""
 from .activations import *
 from .activations_jit import *
 from .activations_me import *
diff --git a/timm/models/layers/create_attn.py b/timm/models/layers/create_attn.py
index 24eccaa0..59ecd858 100644
--- a/timm/models/layers/create_attn.py
+++ b/timm/models/layers/create_attn.py
@@ -1,6 +1,6 @@
 """ Select AttentionFactory Method
 
-Hacked together by Ross Wightman
+Hacked together by / Copyright 2020 Ross Wightman
 """
 import torch
 from .se import SEModule, EffectiveSEModule
diff --git a/timm/models/layers/create_conv2d.py b/timm/models/layers/create_conv2d.py
index 34fbd44f..0134b05c 100644
--- a/timm/models/layers/create_conv2d.py
+++ b/timm/models/layers/create_conv2d.py
@@ -1,6 +1,6 @@
 """ Create Conv2d Factory Method
 
-Hacked together by Ross Wightman
+Hacked together by / Copyright 2020 Ross Wightman
 """
 
 from .mixed_conv2d import MixedConv2d
diff --git a/timm/models/layers/create_norm_act.py b/timm/models/layers/create_norm_act.py
index 8fd500a3..9e7e529e 100644
--- a/timm/models/layers/create_norm_act.py
+++ b/timm/models/layers/create_norm_act.py
@@ -1,3 +1,11 @@
+""" NormAct (Normalizaiton + Activation Layer) Factory
+
+Create norm + act combo modules that attempt to be backwards compatible with separate norm + act
+isntances in models. Where these are used it will be possible to swap separate BN + act layers with
+combined modules like IABN or EvoNorms.
+
+Hacked together by / Copyright 2020 Ross Wightman
+"""
 import types
 import functools
 
diff --git a/timm/models/layers/drop.py b/timm/models/layers/drop.py
index 06f89838..625f1e70 100644
--- a/timm/models/layers/drop.py
+++ b/timm/models/layers/drop.py
@@ -12,7 +12,7 @@ DropBlock impl inspired by two Tensorflow impl that I liked:
  - https://github.com/tensorflow/tpu/blob/master/models/official/resnet/resnet_model.py#L74
  - https://github.com/clovaai/assembled-cnn/blob/master/nets/blocks.py
 
-Hacked together by Ross Wightman
+Hacked together by / Copyright 2020 Ross Wightman
 """
 import torch
 import torch.nn as nn
diff --git a/timm/models/layers/evo_norm.py b/timm/models/layers/evo_norm.py
index c7c00b80..2ff692db 100644
--- a/timm/models/layers/evo_norm.py
+++ b/timm/models/layers/evo_norm.py
@@ -6,7 +6,7 @@ in terms of memory usage and throughput (roughly 5x mem, 1/2 - 1/3x speed).
 
 Still very much a WIP, fiddling with buffer usage, in-place/jit optimizations, and layouts.
 
-Hacked together by Ross Wightman
+Hacked together by / Copyright 2020 Ross Wightman
 """
 
 import torch
diff --git a/timm/models/layers/helpers.py b/timm/models/layers/helpers.py
index d86f7bec..d9aec8af 100644
--- a/timm/models/layers/helpers.py
+++ b/timm/models/layers/helpers.py
@@ -1,6 +1,6 @@
 """ Layer/Module Helpers
 
-Hacked together by Ross Wightman
+Hacked together by / Copyright 2020 Ross Wightman
 """
 from itertools import repeat
 from torch._six import container_abcs
diff --git a/timm/models/layers/median_pool.py b/timm/models/layers/median_pool.py
index a902fa27..f900229f 100644
--- a/timm/models/layers/median_pool.py
+++ b/timm/models/layers/median_pool.py
@@ -1,8 +1,9 @@
-import math
-import torch
+""" Median Pool
+Hacked together by / Copyright 2020 Ross Wightman
+"""
 import torch.nn as nn
 import torch.nn.functional as F
-from torch.nn.modules.utils import _pair, _quadruple
+from .helpers import tup_pair, tup_quadruple
 
 
 class MedianPool2d(nn.Module):
@@ -16,9 +17,9 @@ class MedianPool2d(nn.Module):
     """
     def __init__(self, kernel_size=3, stride=1, padding=0, same=False):
         super(MedianPool2d, self).__init__()
-        self.k = _pair(kernel_size)
-        self.stride = _pair(stride)
-        self.padding = _quadruple(padding)  # convert to l, r, t, b
+        self.k = tup_pair(kernel_size)
+        self.stride = tup_pair(stride)
+        self.padding = tup_quadruple(padding)  # convert to l, r, t, b
         self.same = same
 
     def _padding(self, x):
diff --git a/timm/models/layers/mixed_conv2d.py b/timm/models/layers/mixed_conv2d.py
index 1da469b3..53d650cd 100644
--- a/timm/models/layers/mixed_conv2d.py
+++ b/timm/models/layers/mixed_conv2d.py
@@ -2,7 +2,7 @@
 
 Paper: MixConv: Mixed Depthwise Convolutional Kernels (https://arxiv.org/abs/1907.09595)
 
-Hacked together by Ross Wightman
+Hacked together by / Copyright 2020 Ross Wightman
 """
 
 import torch
diff --git a/timm/models/layers/padding.py b/timm/models/layers/padding.py
index 0fca7cc6..34afc37c 100644
--- a/timm/models/layers/padding.py
+++ b/timm/models/layers/padding.py
@@ -1,6 +1,6 @@
 """ Padding Helpers
 
-Hacked together by Ross Wightman
+Hacked together by / Copyright 2020 Ross Wightman
 """
 import math
 from typing import List, Tuple
diff --git a/timm/models/layers/pool2d_same.py b/timm/models/layers/pool2d_same.py
index 9d1b1cb4..2e61b426 100644
--- a/timm/models/layers/pool2d_same.py
+++ b/timm/models/layers/pool2d_same.py
@@ -1,6 +1,6 @@
 """ AvgPool2d w/ Same Padding
 
-Hacked together by Ross Wightman
+Hacked together by / Copyright 2020 Ross Wightman
 """
 import torch
 import torch.nn as nn
diff --git a/timm/models/layers/selective_kernel.py b/timm/models/layers/selective_kernel.py
index e7535f71..2efaa487 100644
--- a/timm/models/layers/selective_kernel.py
+++ b/timm/models/layers/selective_kernel.py
@@ -2,7 +2,7 @@
 
 Paper: Selective Kernel Networks (https://arxiv.org/abs/1903.06586)
 
-Hacked together by Ross Wightman
+Hacked together by / Copyright 2020 Ross Wightman
 """
 import torch
 from torch import nn as nn
diff --git a/timm/models/layers/separable_conv.py b/timm/models/layers/separable_conv.py
index a422e87e..e949ea43 100644
--- a/timm/models/layers/separable_conv.py
+++ b/timm/models/layers/separable_conv.py
@@ -1,3 +1,10 @@
+""" Depthwise Separable Conv Modules
+
+Basic DWS convs. Other variations of DWS exist with batch norm or activations between the
+DW and PW convs such as the Depthwise modules in MobileNetV2 / EfficientNet and Xception.
+
+Hacked together by / Copyright 2020 Ross Wightman
+"""
 from torch import nn as nn
 
 from .create_conv2d import create_conv2d
diff --git a/timm/models/layers/split_batchnorm.py b/timm/models/layers/split_batchnorm.py
index ad01cfeb..830781b3 100644
--- a/timm/models/layers/split_batchnorm.py
+++ b/timm/models/layers/split_batchnorm.py
@@ -9,7 +9,7 @@ This allows easily removing the auxiliary BN layers after training to efficientl
 achieve the 'Auxiliary BatchNorm' as described in the AdvProp Paper, section 4.2,
 'Disentangled Learning via An Auxiliary BN'
 
-Hacked together by Ross Wightman
+Hacked together by / Copyright 2020 Ross Wightman
 """
 import torch
 import torch.nn as nn
diff --git a/timm/models/layers/test_time_pool.py b/timm/models/layers/test_time_pool.py
index b2f3d2c3..27c1099d 100644
--- a/timm/models/layers/test_time_pool.py
+++ b/timm/models/layers/test_time_pool.py
@@ -1,6 +1,6 @@
 """ Test Time Pooling (Average-Max Pool)
 
-Hacked together by Ross Wightman
+Hacked together by / Copyright 2020 Ross Wightman
 """
 
 import logging
diff --git a/timm/models/mobilenetv3.py b/timm/models/mobilenetv3.py
index 9e98394c..258046bd 100644
--- a/timm/models/mobilenetv3.py
+++ b/timm/models/mobilenetv3.py
@@ -5,7 +5,7 @@ A PyTorch impl of MobileNet-V3, compatible with TF weights from official impl.
 
 Paper: Searching for MobileNetV3 - https://arxiv.org/abs/1905.02244
 
-Hacked together by Ross Wightman
+Hacked together by / Copyright 2020 Ross Wightman
 """
 import torch
 import torch.nn as nn
diff --git a/timm/models/registry.py b/timm/models/registry.py
index 2b8a3717..3317eece 100644
--- a/timm/models/registry.py
+++ b/timm/models/registry.py
@@ -1,3 +1,7 @@
+""" Model Registry
+Hacked together by / Copyright 2020 Ross Wightman
+"""
+
 import sys
 import re
 import fnmatch
diff --git a/timm/models/regnet.py b/timm/models/regnet.py
index c0926554..a93ab8a8 100644
--- a/timm/models/regnet.py
+++ b/timm/models/regnet.py
@@ -11,6 +11,7 @@ Weights from original impl have been modified
 * removed training specific dict entries from checkpoints and keep model state_dict only
 * remap names to match the ones here
 
+Hacked together by / Copyright 2020 Ross Wightman
 """
 import numpy as np
 import torch.nn as nn
diff --git a/timm/models/resnet.py b/timm/models/resnet.py
index 4fbc9564..dd9ca93d 100644
--- a/timm/models/resnet.py
+++ b/timm/models/resnet.py
@@ -4,6 +4,7 @@ This started as a copy of https://github.com/pytorch/vision 'resnet.py' (BSD-3-C
 additional dropout and dynamic global avg/max pool.
 
 ResNeXt, SE-ResNeXt, SENet, and MXNet Gluon stem/downsample variants, tiered stems added by Ross Wightman
+Copyright 2020 Ross Wightman
 """
 import math
 import copy
diff --git a/timm/models/rexnet.py b/timm/models/rexnet.py
index 8997d40c..87c594a0 100644
--- a/timm/models/rexnet.py
+++ b/timm/models/rexnet.py
@@ -7,6 +7,7 @@ Adapted from original impl at https://github.com/clovaai/rexnet
 Copyright (c) 2020-present NAVER Corp. MIT license
 
 Changes for timm, feature extraction, and rounded channel variant hacked together by Ross Wightman
+Copyright 2020 Ross Wightman
 """
 
 import torch.nn as nn
diff --git a/timm/models/sknet.py b/timm/models/sknet.py
index fa00eb5f..ddd22a35 100644
--- a/timm/models/sknet.py
+++ b/timm/models/sknet.py
@@ -6,7 +6,7 @@ This was inspired by reading 'Compounding the Performance Improvements...' (http
 and a streamlined impl at https://github.com/clovaai/assembled-cnn but I ended up building something closer
 to the original paper with some modifications of my own to better balance param count vs accuracy.
 
-Hacked together by Ross Wightman
+Hacked together by / Copyright 2020 Ross Wightman
 """
 import math
 
diff --git a/timm/models/vovnet.py b/timm/models/vovnet.py
index 87e68537..c1183b37 100644
--- a/timm/models/vovnet.py
+++ b/timm/models/vovnet.py
@@ -8,7 +8,7 @@ Looked at  https://github.com/youngwanLEE/vovnet-detectron2 &
 https://github.com/stigma0617/VoVNet.pytorch/blob/master/models_vovnet/vovnet.py
 for some reference, rewrote most of the code.
 
-Hacked together by Ross Wightman
+Hacked together by / Copyright 2020 Ross Wightman
 """
 
 from typing import List
diff --git a/timm/models/xception_aligned.py b/timm/models/xception_aligned.py
index b6bd8944..973a72da 100644
--- a/timm/models/xception_aligned.py
+++ b/timm/models/xception_aligned.py
@@ -3,7 +3,7 @@
 This is a correct, from scratch impl of Aligned Xception (Deeplab) models compatible with TF weights at
 https://github.com/tensorflow/models/blob/master/research/deeplab/g3doc/model_zoo.md
 
-Hacked together by Ross Wightman
+Hacked together by / Copyright 2020 Ross Wightman
 """
 from collections import OrderedDict
 
diff --git a/timm/optim/lookahead.py b/timm/optim/lookahead.py
index 7a58e0a6..6b5b7f38 100644
--- a/timm/optim/lookahead.py
+++ b/timm/optim/lookahead.py
@@ -1,6 +1,8 @@
 """ Lookahead Optimizer Wrapper.
 Implementation modified from: https://github.com/alphadl/lookahead.pytorch
 Paper: `Lookahead Optimizer: k steps forward, 1 step back` - https://arxiv.org/abs/1907.08610
+
+Hacked together by / Copyright 2020 Ross Wightman
 """
 import torch
 from torch.optim.optimizer import Optimizer
diff --git a/timm/optim/optim_factory.py b/timm/optim/optim_factory.py
index d97887d5..397f136b 100644
--- a/timm/optim/optim_factory.py
+++ b/timm/optim/optim_factory.py
@@ -1,3 +1,6 @@
+""" Optimizer Factory w/ Custom Weight Decay
+Hacked together by / Copyright 2020 Ross Wightman
+"""
 import torch
 from torch import optim as optim
 from timm.optim import Nadam, RMSpropTF, AdamW, RAdam, NovoGrad, NvNovoGrad, Lookahead
diff --git a/timm/optim/rmsprop_tf.py b/timm/optim/rmsprop_tf.py
index 0f44b944..5115555c 100644
--- a/timm/optim/rmsprop_tf.py
+++ b/timm/optim/rmsprop_tf.py
@@ -1,3 +1,12 @@
+""" RMSProp modified to behave like Tensorflow impl
+
+Originally cut & paste from PyTorch RMSProp
+https://github.com/pytorch/pytorch/blob/063946d2b3f3f1e953a2a3b54e0b34f1393de295/torch/optim/rmsprop.py
+Licensed under BSD-Clause 3 (ish), https://github.com/pytorch/pytorch/blob/master/LICENSE
+
+Modifications Copyright 2020 Ross Wightman
+"""
+
 import torch
 from torch.optim import Optimizer
 
@@ -6,7 +15,12 @@ class RMSpropTF(Optimizer):
     """Implements RMSprop algorithm (TensorFlow style epsilon)
 
     NOTE: This is a direct cut-and-paste of PyTorch RMSprop with eps applied before sqrt
-    to closer match Tensorflow for matching hyper-params.
+    and a few other modifications to closer match Tensorflow for matching hyper-params.
+
+    Noteworthy changes include:
+    1. Epsilon applied inside square-root
+    2. square_avg initialized to ones
+    3. LR scaling of update accumulated in momentum buffer
 
     Proposed by G. Hinton in his
     `course <http://www.cs.toronto.edu/~tijmen/csc321/slides/lecture_slides_lec6.pdf>`_.
diff --git a/timm/scheduler/cosine_lr.py b/timm/scheduler/cosine_lr.py
index 189cc3c5..15da5757 100644
--- a/timm/scheduler/cosine_lr.py
+++ b/timm/scheduler/cosine_lr.py
@@ -1,3 +1,9 @@
+""" Cosine Scheduler
+
+Cosine LR schedule with warmup, cycle/restarts, noise.
+
+Hacked together by / Copyright 2020 Ross Wightman
+"""
 import logging
 import math
 import numpy as np
diff --git a/timm/scheduler/plateau_lr.py b/timm/scheduler/plateau_lr.py
index 955178ad..4f2cacb6 100644
--- a/timm/scheduler/plateau_lr.py
+++ b/timm/scheduler/plateau_lr.py
@@ -1,3 +1,9 @@
+""" Plateau Scheduler
+
+Adapts PyTorch plateau scheduler and allows application of noise, warmup.
+
+Hacked together by / Copyright 2020 Ross Wightman
+"""
 import torch
 
 from .scheduler import Scheduler
diff --git a/timm/scheduler/scheduler_factory.py b/timm/scheduler/scheduler_factory.py
index ea332b19..9f7748f4 100644
--- a/timm/scheduler/scheduler_factory.py
+++ b/timm/scheduler/scheduler_factory.py
@@ -1,3 +1,6 @@
+""" Scheduler Factory
+Hacked together by / Copyright 2020 Ross Wightman
+"""
 from .cosine_lr import CosineLRScheduler
 from .tanh_lr import TanhLRScheduler
 from .step_lr import StepLRScheduler
diff --git a/timm/scheduler/step_lr.py b/timm/scheduler/step_lr.py
index b3c75d96..f797e1a8 100644
--- a/timm/scheduler/step_lr.py
+++ b/timm/scheduler/step_lr.py
@@ -1,3 +1,9 @@
+""" Step Scheduler
+
+Basic step LR schedule with warmup, noise.
+
+Hacked together by / Copyright 2020 Ross Wightman
+"""
 import math
 import torch
 
diff --git a/timm/scheduler/tanh_lr.py b/timm/scheduler/tanh_lr.py
index e406d13a..e32cb644 100644
--- a/timm/scheduler/tanh_lr.py
+++ b/timm/scheduler/tanh_lr.py
@@ -1,3 +1,9 @@
+""" TanH Scheduler
+
+TanH schedule with warmup, cycle/restarts, noise.
+
+Hacked together by / Copyright 2020 Ross Wightman
+"""
 import logging
 import math
 import numpy as np
diff --git a/timm/utils.py b/timm/utils.py
index 2cae024d..7afdca05 100644
--- a/timm/utils.py
+++ b/timm/utils.py
@@ -1,3 +1,8 @@
+""" Common training and validation utilities
+
+Hacked together by / Copyright 2020 Ross Wightman
+"""
+
 from copy import deepcopy
 
 import torch
diff --git a/train.py b/train.py
index 7f8d4a26..126bd5da 100755
--- a/train.py
+++ b/train.py
@@ -12,7 +12,7 @@ This script was started from an early version of the PyTorch ImageNet example
 NVIDIA CUDA specific speedups adopted from NVIDIA Apex examples
 (https://github.com/NVIDIA/apex/tree/master/examples/imagenet)
 
-Hacked together by Ross Wightman (https://github.com/rwightman)
+Hacked together by / Copyright 2020 Ross Wightman (https://github.com/rwightman)
 """
 import argparse
 import time
diff --git a/validate.py b/validate.py
index 6f5f76d1..8d893dba 100755
--- a/validate.py
+++ b/validate.py
@@ -5,7 +5,7 @@ This is intended to be a lean and easily modifiable ImageNet validation script f
 models or training checkpoints against ImageNet or similarly organized image datasets. It prioritizes
 canonical PyTorch, standard Python style, and good performance. Repurpose as you see fit.
 
-Hacked together by Ross Wightman (https://github.com/rwightman)
+Hacked together by / Copyright 2020 Ross Wightman (https://github.com/rwightman)
 """
 import argparse
 import os