|
|
|
""" Model creation / weight loading / state_dict helpers
|
|
|
|
|
|
|
|
Hacked together by / Copyright 2020 Ross Wightman
|
|
|
|
"""
|
|
|
|
import collections.abc
|
|
|
|
import logging
|
|
|
|
import math
|
|
|
|
import os
|
|
|
|
import re
|
|
|
|
from collections import OrderedDict, defaultdict
|
|
|
|
from copy import deepcopy
|
|
|
|
from itertools import chain
|
|
|
|
from typing import Any, Callable, Optional, Tuple, Dict, Union
|
|
|
|
|
|
|
|
import torch
|
|
|
|
import torch.nn as nn
|
|
|
|
from torch.hub import load_state_dict_from_url
|
|
|
|
from torch.utils.checkpoint import checkpoint
|
|
|
|
|
|
|
|
from .features import FeatureListNet, FeatureDictNet, FeatureHookNet
|
|
|
|
from .fx_features import FeatureGraphNet
|
|
|
|
from .hub import has_hf_hub, download_cached_file, load_state_dict_from_hf
|
|
|
|
from .layers import Conv2dSame, Linear, BatchNormAct2d
|
|
|
|
from .registry import get_pretrained_cfg
|
|
|
|
|
|
|
|
|
|
|
|
_logger = logging.getLogger(__name__)
|
|
|
|
|
|
|
|
|
|
|
|
# Global variables for rarely used pretrained checkpoint download progress and hash check.
|
|
|
|
# Use set_pretrained_download_progress / set_pretrained_check_hash functions to toggle.
|
|
|
|
_DOWNLOAD_PROGRESS = False
|
|
|
|
_CHECK_HASH = False
|
|
|
|
|
|
|
|
|
|
|
|
def clean_state_dict(state_dict):
|
|
|
|
# 'clean' checkpoint by removing .module prefix from state dict if it exists from parallel training
|
|
|
|
cleaned_state_dict = OrderedDict()
|
|
|
|
for k, v in state_dict.items():
|
|
|
|
name = k[7:] if k.startswith('module.') else k
|
|
|
|
cleaned_state_dict[name] = v
|
|
|
|
return cleaned_state_dict
|
|
|
|
|
|
|
|
|
|
|
|
def load_state_dict(checkpoint_path, use_ema=True):
|
|
|
|
if checkpoint_path and os.path.isfile(checkpoint_path):
|
|
|
|
checkpoint = torch.load(checkpoint_path, map_location='cpu')
|
|
|
|
state_dict_key = ''
|
|
|
|
if isinstance(checkpoint, dict):
|
|
|
|
if use_ema and checkpoint.get('state_dict_ema', None) is not None:
|
|
|
|
state_dict_key = 'state_dict_ema'
|
|
|
|
elif use_ema and checkpoint.get('model_ema', None) is not None:
|
|
|
|
state_dict_key = 'model_ema'
|
|
|
|
elif 'state_dict' in checkpoint:
|
|
|
|
state_dict_key = 'state_dict'
|
|
|
|
elif 'model' in checkpoint:
|
|
|
|
state_dict_key = 'model'
|
|
|
|
state_dict = clean_state_dict(checkpoint[state_dict_key] if state_dict_key else checkpoint)
|
|
|
|
_logger.info("Loaded {} from checkpoint '{}'".format(state_dict_key, checkpoint_path))
|
|
|
|
return state_dict
|
|
|
|
else:
|
|
|
|
_logger.error("No checkpoint found at '{}'".format(checkpoint_path))
|
|
|
|
raise FileNotFoundError()
|
|
|
|
|
|
|
|
|
|
|
|
def load_checkpoint(model, checkpoint_path, use_ema=True, strict=True):
|
|
|
|
if os.path.splitext(checkpoint_path)[-1].lower() in ('.npz', '.npy'):
|
|
|
|
# numpy checkpoint, try to load via model specific load_pretrained fn
|
|
|
|
if hasattr(model, 'load_pretrained'):
|
|
|
|
model.load_pretrained(checkpoint_path)
|
|
|
|
else:
|
|
|
|
raise NotImplementedError('Model cannot load numpy checkpoint')
|
|
|
|
return
|
|
|
|
state_dict = load_state_dict(checkpoint_path, use_ema)
|
|
|
|
incompatible_keys = model.load_state_dict(state_dict, strict=strict)
|
|
|
|
return incompatible_keys
|
|
|
|
|
|
|
|
|
|
|
|
def resume_checkpoint(model, checkpoint_path, optimizer=None, loss_scaler=None, log_info=True):
|
|
|
|
resume_epoch = None
|
|
|
|
if os.path.isfile(checkpoint_path):
|
|
|
|
checkpoint = torch.load(checkpoint_path, map_location='cpu')
|
|
|
|
if isinstance(checkpoint, dict) and 'state_dict' in checkpoint:
|
|
|
|
if log_info:
|
|
|
|
_logger.info('Restoring model state from checkpoint...')
|
|
|
|
state_dict = clean_state_dict(checkpoint['state_dict'])
|
|
|
|
model.load_state_dict(state_dict)
|
|
|
|
|
|
|
|
if optimizer is not None and 'optimizer' in checkpoint:
|
|
|
|
if log_info:
|
|
|
|
_logger.info('Restoring optimizer state from checkpoint...')
|
|
|
|
optimizer.load_state_dict(checkpoint['optimizer'])
|
|
|
|
|
|
|
|
if loss_scaler is not None and loss_scaler.state_dict_key in checkpoint:
|
|
|
|
if log_info:
|
|
|
|
_logger.info('Restoring AMP loss scaler state from checkpoint...')
|
|
|
|
loss_scaler.load_state_dict(checkpoint[loss_scaler.state_dict_key])
|
|
|
|
|
|
|
|
if 'epoch' in checkpoint:
|
|
|
|
resume_epoch = checkpoint['epoch']
|
|
|
|
if 'version' in checkpoint and checkpoint['version'] > 1:
|
|
|
|
resume_epoch += 1 # start at the next epoch, old checkpoints incremented before save
|
|
|
|
|
|
|
|
if log_info:
|
|
|
|
_logger.info("Loaded checkpoint '{}' (epoch {})".format(checkpoint_path, checkpoint['epoch']))
|
|
|
|
else:
|
|
|
|
model.load_state_dict(checkpoint)
|
|
|
|
if log_info:
|
|
|
|
_logger.info("Loaded checkpoint '{}'".format(checkpoint_path))
|
|
|
|
return resume_epoch
|
|
|
|
else:
|
|
|
|
_logger.error("No checkpoint found at '{}'".format(checkpoint_path))
|
|
|
|
raise FileNotFoundError()
|
|
|
|
|
|
|
|
|
|
|
|
def _resolve_pretrained_source(pretrained_cfg):
|
|
|
|
cfg_source = pretrained_cfg.get('source', '')
|
|
|
|
pretrained_url = pretrained_cfg.get('url', None)
|
|
|
|
pretrained_file = pretrained_cfg.get('file', None)
|
|
|
|
hf_hub_id = pretrained_cfg.get('hf_hub_id', None)
|
|
|
|
# resolve where to load pretrained weights from
|
|
|
|
load_from = ''
|
|
|
|
pretrained_loc = ''
|
|
|
|
if cfg_source == 'hf-hub' and has_hf_hub(necessary=True):
|
|
|
|
# hf-hub specified as source via model identifier
|
|
|
|
load_from = 'hf-hub'
|
|
|
|
assert hf_hub_id
|
|
|
|
pretrained_loc = hf_hub_id
|
|
|
|
else:
|
|
|
|
# default source == timm or unspecified
|
|
|
|
if pretrained_file:
|
|
|
|
load_from = 'file'
|
|
|
|
pretrained_loc = pretrained_file
|
|
|
|
elif pretrained_url:
|
|
|
|
load_from = 'url'
|
|
|
|
pretrained_loc = pretrained_url
|
|
|
|
elif hf_hub_id and has_hf_hub(necessary=False):
|
|
|
|
# hf-hub available as alternate weight source in default_cfg
|
|
|
|
load_from = 'hf-hub'
|
|
|
|
pretrained_loc = hf_hub_id
|
|
|
|
return load_from, pretrained_loc
|
|
|
|
|
|
|
|
|
|
|
|
def set_pretrained_download_progress(enable=True):
|
|
|
|
""" Set download progress for pretrained weights on/off (globally). """
|
|
|
|
global _DOWNLOAD_PROGRESS
|
|
|
|
_DOWNLOAD_PROGRESS = enable
|
|
|
|
|
|
|
|
|
|
|
|
def set_pretrained_check_hash(enable=True):
|
|
|
|
""" Set hash checking for pretrained weights on/off (globally). """
|
|
|
|
global _CHECK_HASH
|
|
|
|
_CHECK_HASH = enable
|
|
|
|
|
|
|
|
|
|
|
|
def load_custom_pretrained(
|
|
|
|
model: nn.Module,
|
|
|
|
pretrained_cfg: Optional[Dict] = None,
|
|
|
|
load_fn: Optional[Callable] = None,
|
|
|
|
):
|
|
|
|
r"""Loads a custom (read non .pth) weight file
|
|
|
|
|
|
|
|
Downloads checkpoint file into cache-dir like torch.hub based loaders, but calls
|
|
|
|
a passed in custom load fun, or the `load_pretrained` model member fn.
|
|
|
|
|
|
|
|
If the object is already present in `model_dir`, it's deserialized and returned.
|
|
|
|
The default value of `model_dir` is ``<hub_dir>/checkpoints`` where
|
|
|
|
`hub_dir` is the directory returned by :func:`~torch.hub.get_dir`.
|
|
|
|
|
|
|
|
Args:
|
|
|
|
model: The instantiated model to load weights into
|
|
|
|
pretrained_cfg (dict): Default pretrained model cfg
|
|
|
|
load_fn: An external stand alone fn that loads weights into provided model, otherwise a fn named
|
|
|
|
'laod_pretrained' on the model will be called if it exists
|
|
|
|
"""
|
|
|
|
pretrained_cfg = pretrained_cfg or getattr(model, 'pretrained_cfg', None) or {}
|
|
|
|
load_from, pretrained_loc = _resolve_pretrained_source(pretrained_cfg)
|
|
|
|
if not load_from:
|
|
|
|
_logger.warning("No pretrained weights exist for this model. Using random initialization.")
|
|
|
|
return
|
|
|
|
if load_from == 'hf-hub': # FIXME
|
|
|
|
_logger.warning("Hugging Face hub not currently supported for custom load pretrained models.")
|
|
|
|
elif load_from == 'url':
|
|
|
|
pretrained_loc = download_cached_file(pretrained_loc, check_hash=_CHECK_HASH, progress=_DOWNLOAD_PROGRESS)
|
|
|
|
|
|
|
|
if load_fn is not None:
|
|
|
|
load_fn(model, pretrained_loc)
|
|
|
|
elif hasattr(model, 'load_pretrained'):
|
|
|
|
model.load_pretrained(pretrained_loc)
|
|
|
|
else:
|
|
|
|
_logger.warning("Valid function to load pretrained weights is not available, using random initialization.")
|
|
|
|
|
|
|
|
|
|
|
|
def adapt_input_conv(in_chans, conv_weight):
|
|
|
|
conv_type = conv_weight.dtype
|
|
|
|
conv_weight = conv_weight.float() # Some weights are in torch.half, ensure it's float for sum on CPU
|
|
|
|
O, I, J, K = conv_weight.shape
|
|
|
|
if in_chans == 1:
|
|
|
|
if I > 3:
|
|
|
|
assert conv_weight.shape[1] % 3 == 0
|
|
|
|
# For models with space2depth stems
|
|
|
|
conv_weight = conv_weight.reshape(O, I // 3, 3, J, K)
|
|
|
|
conv_weight = conv_weight.sum(dim=2, keepdim=False)
|
|
|
|
else:
|
|
|
|
conv_weight = conv_weight.sum(dim=1, keepdim=True)
|
|
|
|
elif in_chans != 3:
|
|
|
|
if I != 3:
|
|
|
|
raise NotImplementedError('Weight format not supported by conversion.')
|
|
|
|
else:
|
|
|
|
# NOTE this strategy should be better than random init, but there could be other combinations of
|
|
|
|
# the original RGB input layer weights that'd work better for specific cases.
|
|
|
|
repeat = int(math.ceil(in_chans / 3))
|
|
|
|
conv_weight = conv_weight.repeat(1, repeat, 1, 1)[:, :in_chans, :, :]
|
|
|
|
conv_weight *= (3 / float(in_chans))
|
|
|
|
conv_weight = conv_weight.to(conv_type)
|
|
|
|
return conv_weight
|
|
|
|
|
|
|
|
|
|
|
|
def load_pretrained(
|
|
|
|
model: nn.Module,
|
|
|
|
pretrained_cfg: Optional[Dict] = None,
|
|
|
|
num_classes: int = 1000,
|
|
|
|
in_chans: int = 3,
|
|
|
|
filter_fn: Optional[Callable] = None,
|
|
|
|
strict: bool = True,
|
|
|
|
):
|
|
|
|
""" Load pretrained checkpoint
|
|
|
|
|
|
|
|
Args:
|
|
|
|
model (nn.Module) : PyTorch model module
|
|
|
|
pretrained_cfg (Optional[Dict]): configuration for pretrained weights / target dataset
|
|
|
|
num_classes (int): num_classes for model
|
|
|
|
in_chans (int): in_chans for model
|
|
|
|
filter_fn (Optional[Callable]): state_dict filter fn for load (takes state_dict, model as args)
|
|
|
|
strict (bool): strict load of checkpoint
|
|
|
|
|
|
|
|
"""
|
|
|
|
pretrained_cfg = pretrained_cfg or getattr(model, 'pretrained_cfg', None) or {}
|
|
|
|
load_from, pretrained_loc = _resolve_pretrained_source(pretrained_cfg)
|
|
|
|
if load_from == 'file':
|
|
|
|
_logger.info(f'Loading pretrained weights from file ({pretrained_loc})')
|
|
|
|
state_dict = load_state_dict(pretrained_loc)
|
|
|
|
elif load_from == 'url':
|
|
|
|
_logger.info(f'Loading pretrained weights from url ({pretrained_loc})')
|
|
|
|
state_dict = load_state_dict_from_url(
|
|
|
|
pretrained_loc, map_location='cpu', progress=_DOWNLOAD_PROGRESS, check_hash=_CHECK_HASH)
|
|
|
|
elif load_from == 'hf-hub':
|
|
|
|
_logger.info(f'Loading pretrained weights from Hugging Face hub ({pretrained_loc})')
|
|
|
|
state_dict = load_state_dict_from_hf(pretrained_loc)
|
|
|
|
else:
|
|
|
|
_logger.warning("No pretrained weights exist or were found for this model. Using random initialization.")
|
|
|
|
return
|
|
|
|
|
|
|
|
if filter_fn is not None:
|
|
|
|
# for backwards compat with filter fn that take one arg, try one first, the two
|
|
|
|
try:
|
|
|
|
state_dict = filter_fn(state_dict)
|
|
|
|
except TypeError:
|
|
|
|
state_dict = filter_fn(state_dict, model)
|
|
|
|
|
|
|
|
input_convs = pretrained_cfg.get('first_conv', None)
|
|
|
|
if input_convs is not None and in_chans != 3:
|
|
|
|
if isinstance(input_convs, str):
|
|
|
|
input_convs = (input_convs,)
|
|
|
|
for input_conv_name in input_convs:
|
|
|
|
weight_name = input_conv_name + '.weight'
|
|
|
|
try:
|
|
|
|
state_dict[weight_name] = adapt_input_conv(in_chans, state_dict[weight_name])
|
|
|
|
_logger.info(
|
|
|
|
f'Converted input conv {input_conv_name} pretrained weights from 3 to {in_chans} channel(s)')
|
|
|
|
except NotImplementedError as e:
|
|
|
|
del state_dict[weight_name]
|
|
|
|
strict = False
|
|
|
|
_logger.warning(
|
|
|
|
f'Unable to convert pretrained {input_conv_name} weights, using random init for this layer.')
|
|
|
|
|
|
|
|
classifiers = pretrained_cfg.get('classifier', None)
|
|
|
|
label_offset = pretrained_cfg.get('label_offset', 0)
|
|
|
|
if classifiers is not None:
|
|
|
|
if isinstance(classifiers, str):
|
|
|
|
classifiers = (classifiers,)
|
|
|
|
if num_classes != pretrained_cfg['num_classes']:
|
|
|
|
for classifier_name in classifiers:
|
|
|
|
# completely discard fully connected if model num_classes doesn't match pretrained weights
|
|
|
|
state_dict.pop(classifier_name + '.weight', None)
|
|
|
|
state_dict.pop(classifier_name + '.bias', None)
|
|
|
|
strict = False
|
|
|
|
elif label_offset > 0:
|
|
|
|
for classifier_name in classifiers:
|
|
|
|
# special case for pretrained weights with an extra background class in pretrained weights
|
|
|
|
classifier_weight = state_dict[classifier_name + '.weight']
|
|
|
|
state_dict[classifier_name + '.weight'] = classifier_weight[label_offset:]
|
|
|
|
classifier_bias = state_dict[classifier_name + '.bias']
|
|
|
|
state_dict[classifier_name + '.bias'] = classifier_bias[label_offset:]
|
|
|
|
|
|
|
|
model.load_state_dict(state_dict, strict=strict)
|
|
|
|
|
|
|
|
|
|
|
|
def extract_layer(model, layer):
|
|
|
|
layer = layer.split('.')
|
|
|
|
module = model
|
|
|
|
if hasattr(model, 'module') and layer[0] != 'module':
|
|
|
|
module = model.module
|
|
|
|
if not hasattr(model, 'module') and layer[0] == 'module':
|
|
|
|
layer = layer[1:]
|
|
|
|
for l in layer:
|
|
|
|
if hasattr(module, l):
|
|
|
|
if not l.isdigit():
|
|
|
|
module = getattr(module, l)
|
|
|
|
else:
|
|
|
|
module = module[int(l)]
|
|
|
|
else:
|
|
|
|
return module
|
|
|
|
return module
|
|
|
|
|
|
|
|
|
|
|
|
def set_layer(model, layer, val):
|
|
|
|
layer = layer.split('.')
|
|
|
|
module = model
|
|
|
|
if hasattr(model, 'module') and layer[0] != 'module':
|
|
|
|
module = model.module
|
|
|
|
lst_index = 0
|
|
|
|
module2 = module
|
|
|
|
for l in layer:
|
|
|
|
if hasattr(module2, l):
|
|
|
|
if not l.isdigit():
|
|
|
|
module2 = getattr(module2, l)
|
|
|
|
else:
|
|
|
|
module2 = module2[int(l)]
|
|
|
|
lst_index += 1
|
|
|
|
lst_index -= 1
|
|
|
|
for l in layer[:lst_index]:
|
|
|
|
if not l.isdigit():
|
|
|
|
module = getattr(module, l)
|
|
|
|
else:
|
|
|
|
module = module[int(l)]
|
|
|
|
l = layer[lst_index]
|
|
|
|
setattr(module, l, val)
|
|
|
|
|
|
|
|
|
|
|
|
def adapt_model_from_string(parent_module, model_string):
|
|
|
|
separator = '***'
|
|
|
|
state_dict = {}
|
|
|
|
lst_shape = model_string.split(separator)
|
|
|
|
for k in lst_shape:
|
|
|
|
k = k.split(':')
|
|
|
|
key = k[0]
|
|
|
|
shape = k[1][1:-1].split(',')
|
|
|
|
if shape[0] != '':
|
|
|
|
state_dict[key] = [int(i) for i in shape]
|
|
|
|
|
|
|
|
new_module = deepcopy(parent_module)
|
|
|
|
for n, m in parent_module.named_modules():
|
|
|
|
old_module = extract_layer(parent_module, n)
|
|
|
|
if isinstance(old_module, nn.Conv2d) or isinstance(old_module, Conv2dSame):
|
|
|
|
if isinstance(old_module, Conv2dSame):
|
|
|
|
conv = Conv2dSame
|
|
|
|
else:
|
|
|
|
conv = nn.Conv2d
|
|
|
|
s = state_dict[n + '.weight']
|
|
|
|
in_channels = s[1]
|
|
|
|
out_channels = s[0]
|
|
|
|
g = 1
|
|
|
|
if old_module.groups > 1:
|
|
|
|
in_channels = out_channels
|
|
|
|
g = in_channels
|
|
|
|
new_conv = conv(
|
|
|
|
in_channels=in_channels, out_channels=out_channels, kernel_size=old_module.kernel_size,
|
|
|
|
bias=old_module.bias is not None, padding=old_module.padding, dilation=old_module.dilation,
|
|
|
|
groups=g, stride=old_module.stride)
|
|
|
|
set_layer(new_module, n, new_conv)
|
|
|
|
elif isinstance(old_module, BatchNormAct2d):
|
|
|
|
new_bn = BatchNormAct2d(
|
|
|
|
state_dict[n + '.weight'][0], eps=old_module.eps, momentum=old_module.momentum,
|
|
|
|
affine=old_module.affine, track_running_stats=True)
|
|
|
|
new_bn.drop = old_module.drop
|
|
|
|
new_bn.act = old_module.act
|
|
|
|
set_layer(new_module, n, new_bn)
|
|
|
|
elif isinstance(old_module, nn.BatchNorm2d):
|
|
|
|
new_bn = nn.BatchNorm2d(
|
|
|
|
num_features=state_dict[n + '.weight'][0], eps=old_module.eps, momentum=old_module.momentum,
|
|
|
|
affine=old_module.affine, track_running_stats=True)
|
|
|
|
set_layer(new_module, n, new_bn)
|
|
|
|
elif isinstance(old_module, nn.Linear):
|
|
|
|
# FIXME extra checks to ensure this is actually the FC classifier layer and not a diff Linear layer?
|
|
|
|
num_features = state_dict[n + '.weight'][1]
|
|
|
|
new_fc = Linear(
|
|
|
|
in_features=num_features, out_features=old_module.out_features, bias=old_module.bias is not None)
|
|
|
|
set_layer(new_module, n, new_fc)
|
|
|
|
if hasattr(new_module, 'num_features'):
|
|
|
|
new_module.num_features = num_features
|
|
|
|
new_module.eval()
|
|
|
|
parent_module.eval()
|
|
|
|
|
|
|
|
return new_module
|
|
|
|
|
|
|
|
|
|
|
|
def adapt_model_from_file(parent_module, model_variant):
|
|
|
|
adapt_file = os.path.join(os.path.dirname(__file__), 'pruned', model_variant + '.txt')
|
|
|
|
with open(adapt_file, 'r') as f:
|
|
|
|
return adapt_model_from_string(parent_module, f.read().strip())
|
|
|
|
|
|
|
|
|
|
|
|
def pretrained_cfg_for_features(pretrained_cfg):
|
|
|
|
pretrained_cfg = deepcopy(pretrained_cfg)
|
|
|
|
# remove default pretrained cfg fields that don't have much relevance for feature backbone
|
|
|
|
to_remove = ('num_classes', 'crop_pct', 'classifier', 'global_pool') # add default final pool size?
|
|
|
|
for tr in to_remove:
|
|
|
|
pretrained_cfg.pop(tr, None)
|
|
|
|
return pretrained_cfg
|
|
|
|
|
|
|
|
|
|
|
|
def set_default_kwargs(kwargs, names, pretrained_cfg):
|
|
|
|
for n in names:
|
|
|
|
# for legacy reasons, model __init__args uses img_size + in_chans as separate args while
|
|
|
|
# pretrained_cfg has one input_size=(C, H ,W) entry
|
|
|
|
if n == 'img_size':
|
|
|
|
input_size = pretrained_cfg.get('input_size', None)
|
|
|
|
if input_size is not None:
|
|
|
|
assert len(input_size) == 3
|
|
|
|
kwargs.setdefault(n, input_size[-2:])
|
|
|
|
elif n == 'in_chans':
|
|
|
|
input_size = pretrained_cfg.get('input_size', None)
|
|
|
|
if input_size is not None:
|
|
|
|
assert len(input_size) == 3
|
|
|
|
kwargs.setdefault(n, input_size[0])
|
|
|
|
else:
|
|
|
|
default_val = pretrained_cfg.get(n, None)
|
|
|
|
if default_val is not None:
|
|
|
|
kwargs.setdefault(n, pretrained_cfg[n])
|
|
|
|
|
|
|
|
|
|
|
|
def filter_kwargs(kwargs, names):
|
|
|
|
if not kwargs or not names:
|
|
|
|
return
|
|
|
|
for n in names:
|
|
|
|
kwargs.pop(n, None)
|
|
|
|
|
|
|
|
|
|
|
|
def update_pretrained_cfg_and_kwargs(pretrained_cfg, kwargs, kwargs_filter):
|
|
|
|
""" Update the default_cfg and kwargs before passing to model
|
|
|
|
|
|
|
|
Args:
|
|
|
|
pretrained_cfg: input pretrained cfg (updated in-place)
|
|
|
|
kwargs: keyword args passed to model build fn (updated in-place)
|
|
|
|
kwargs_filter: keyword arg keys that must be removed before model __init__
|
|
|
|
"""
|
|
|
|
# Set model __init__ args that can be determined by default_cfg (if not already passed as kwargs)
|
|
|
|
default_kwarg_names = ('num_classes', 'global_pool', 'in_chans')
|
|
|
|
if pretrained_cfg.get('fixed_input_size', False):
|
|
|
|
# if fixed_input_size exists and is True, model takes an img_size arg that fixes its input size
|
|
|
|
default_kwarg_names += ('img_size',)
|
|
|
|
set_default_kwargs(kwargs, names=default_kwarg_names, pretrained_cfg=pretrained_cfg)
|
|
|
|
# Filter keyword args for task specific model variants (some 'features only' models, etc.)
|
|
|
|
filter_kwargs(kwargs, names=kwargs_filter)
|
|
|
|
|
|
|
|
|
|
|
|
def resolve_pretrained_cfg(variant: str, pretrained_cfg=None):
|
|
|
|
if pretrained_cfg and isinstance(pretrained_cfg, dict):
|
|
|
|
# highest priority, pretrained_cfg available and passed as arg
|
|
|
|
return deepcopy(pretrained_cfg)
|
|
|
|
# fallback to looking up pretrained cfg in model registry by variant identifier
|
|
|
|
pretrained_cfg = get_pretrained_cfg(variant)
|
|
|
|
if not pretrained_cfg:
|
|
|
|
_logger.warning(
|
|
|
|
f"No pretrained configuration specified for {variant} model. Using a default."
|
|
|
|
f" Please add a config to the model pretrained_cfg registry or pass explicitly.")
|
|
|
|
pretrained_cfg = dict(
|
|
|
|
url='',
|
|
|
|
num_classes=1000,
|
|
|
|
input_size=(3, 224, 224),
|
|
|
|
pool_size=None,
|
|
|
|
crop_pct=.9,
|
|
|
|
interpolation='bicubic',
|
|
|
|
first_conv='',
|
|
|
|
classifier='',
|
|
|
|
)
|
|
|
|
return pretrained_cfg
|
|
|
|
|
|
|
|
|
|
|
|
def build_model_with_cfg(
|
|
|
|
model_cls: Callable,
|
|
|
|
variant: str,
|
|
|
|
pretrained: bool,
|
|
|
|
pretrained_cfg: Optional[Dict] = None,
|
|
|
|
model_cfg: Optional[Any] = None,
|
|
|
|
feature_cfg: Optional[Dict] = None,
|
|
|
|
pretrained_strict: bool = True,
|
|
|
|
pretrained_filter_fn: Optional[Callable] = None,
|
|
|
|
pretrained_custom_load: bool = False,
|
|
|
|
kwargs_filter: Optional[Tuple[str]] = None,
|
|
|
|
**kwargs):
|
|
|
|
""" Build model with specified default_cfg and optional model_cfg
|
|
|
|
|
|
|
|
This helper fn aids in the construction of a model including:
|
|
|
|
* handling default_cfg and associated pretrained weight loading
|
|
|
|
* passing through optional model_cfg for models with config based arch spec
|
|
|
|
* features_only model adaptation
|
|
|
|
* pruning config / model adaptation
|
|
|
|
|
|
|
|
Args:
|
|
|
|
model_cls (nn.Module): model class
|
|
|
|
variant (str): model variant name
|
|
|
|
pretrained (bool): load pretrained weights
|
|
|
|
pretrained_cfg (dict): model's pretrained weight/task config
|
|
|
|
model_cfg (Optional[Dict]): model's architecture config
|
|
|
|
feature_cfg (Optional[Dict]: feature extraction adapter config
|
|
|
|
pretrained_strict (bool): load pretrained weights strictly
|
|
|
|
pretrained_filter_fn (Optional[Callable]): filter callable for pretrained weights
|
|
|
|
pretrained_custom_load (bool): use custom load fn, to load numpy or other non PyTorch weights
|
|
|
|
kwargs_filter (Optional[Tuple]): kwargs to filter before passing to model
|
|
|
|
**kwargs: model args passed through to model __init__
|
|
|
|
"""
|
|
|
|
pruned = kwargs.pop('pruned', False)
|
|
|
|
features = False
|
|
|
|
feature_cfg = feature_cfg or {}
|
|
|
|
|
|
|
|
# resolve and update model pretrained config and model kwargs
|
|
|
|
pretrained_cfg = resolve_pretrained_cfg(variant, pretrained_cfg=pretrained_cfg)
|
|
|
|
update_pretrained_cfg_and_kwargs(pretrained_cfg, kwargs, kwargs_filter)
|
|
|
|
pretrained_cfg.setdefault('architecture', variant)
|
|
|
|
|
|
|
|
# Setup for feature extraction wrapper done at end of this fn
|
|
|
|
if kwargs.pop('features_only', False):
|
|
|
|
features = True
|
|
|
|
feature_cfg.setdefault('out_indices', (0, 1, 2, 3, 4))
|
|
|
|
if 'out_indices' in kwargs:
|
|
|
|
feature_cfg['out_indices'] = kwargs.pop('out_indices')
|
|
|
|
|
|
|
|
# Build the model
|
|
|
|
model = model_cls(**kwargs) if model_cfg is None else model_cls(cfg=model_cfg, **kwargs)
|
|
|
|
model.pretrained_cfg = pretrained_cfg
|
|
|
|
model.default_cfg = model.pretrained_cfg # alias for backwards compat
|
|
|
|
|
|
|
|
if pruned:
|
|
|
|
model = adapt_model_from_file(model, variant)
|
|
|
|
|
|
|
|
# For classification models, check class attr, then kwargs, then default to 1k, otherwise 0 for feats
|
|
|
|
num_classes_pretrained = 0 if features else getattr(model, 'num_classes', kwargs.get('num_classes', 1000))
|
|
|
|
if pretrained:
|
|
|
|
if pretrained_custom_load:
|
|
|
|
# FIXME improve custom load trigger
|
|
|
|
load_custom_pretrained(model, pretrained_cfg=pretrained_cfg)
|
|
|
|
else:
|
|
|
|
load_pretrained(
|
|
|
|
model,
|
|
|
|
pretrained_cfg=pretrained_cfg,
|
|
|
|
num_classes=num_classes_pretrained,
|
|
|
|
in_chans=kwargs.get('in_chans', 3),
|
|
|
|
filter_fn=pretrained_filter_fn,
|
|
|
|
strict=pretrained_strict)
|
|
|
|
|
|
|
|
# Wrap the model in a feature extraction module if enabled
|
|
|
|
if features:
|
|
|
|
feature_cls = FeatureListNet
|
|
|
|
if 'feature_cls' in feature_cfg:
|
|
|
|
feature_cls = feature_cfg.pop('feature_cls')
|
|
|
|
if isinstance(feature_cls, str):
|
|
|
|
feature_cls = feature_cls.lower()
|
|
|
|
if 'hook' in feature_cls:
|
|
|
|
feature_cls = FeatureHookNet
|
|
|
|
elif feature_cls == 'fx':
|
|
|
|
feature_cls = FeatureGraphNet
|
|
|
|
else:
|
|
|
|
assert False, f'Unknown feature class {feature_cls}'
|
|
|
|
model = feature_cls(model, **feature_cfg)
|
|
|
|
model.pretrained_cfg = pretrained_cfg_for_features(pretrained_cfg) # add back default_cfg
|
|
|
|
model.default_cfg = model.pretrained_cfg # alias for backwards compat
|
|
|
|
|
|
|
|
return model
|
|
|
|
|
|
|
|
|
|
|
|
def model_parameters(model, exclude_head=False):
|
|
|
|
if exclude_head:
|
|
|
|
# FIXME this a bit of a quick and dirty hack to skip classifier head params based on ordering
|
|
|
|
return [p for p in model.parameters()][:-2]
|
|
|
|
else:
|
|
|
|
return model.parameters()
|
|
|
|
|
|
|
|
|
|
|
|
def named_apply(fn: Callable, module: nn.Module, name='', depth_first=True, include_root=False) -> nn.Module:
|
|
|
|
if not depth_first and include_root:
|
|
|
|
fn(module=module, name=name)
|
|
|
|
for child_name, child_module in module.named_children():
|
|
|
|
child_name = '.'.join((name, child_name)) if name else child_name
|
|
|
|
named_apply(fn=fn, module=child_module, name=child_name, depth_first=depth_first, include_root=True)
|
|
|
|
if depth_first and include_root:
|
|
|
|
fn(module=module, name=name)
|
|
|
|
return module
|
|
|
|
|
|
|
|
|
|
|
|
def named_modules(module: nn.Module, name='', depth_first=True, include_root=False):
|
|
|
|
if not depth_first and include_root:
|
|
|
|
yield name, module
|
|
|
|
for child_name, child_module in module.named_children():
|
|
|
|
child_name = '.'.join((name, child_name)) if name else child_name
|
|
|
|
yield from named_modules(
|
|
|
|
module=child_module, name=child_name, depth_first=depth_first, include_root=True)
|
|
|
|
if depth_first and include_root:
|
|
|
|
yield name, module
|
|
|
|
|
|
|
|
|
|
|
|
def named_modules_with_params(module: nn.Module, name='', depth_first=True, include_root=False):
|
|
|
|
if module._parameters and not depth_first and include_root:
|
|
|
|
yield name, module
|
|
|
|
for child_name, child_module in module.named_children():
|
|
|
|
child_name = '.'.join((name, child_name)) if name else child_name
|
|
|
|
yield from named_modules_with_params(
|
|
|
|
module=child_module, name=child_name, depth_first=depth_first, include_root=True)
|
|
|
|
if module._parameters and depth_first and include_root:
|
|
|
|
yield name, module
|
|
|
|
|
|
|
|
|
|
|
|
MATCH_PREV_GROUP = (99999,)
|
|
|
|
|
|
|
|
|
|
|
|
def group_with_matcher(
|
|
|
|
named_objects,
|
|
|
|
group_matcher: Union[Dict, Callable],
|
|
|
|
output_values: bool = False,
|
|
|
|
reverse: bool = False
|
|
|
|
):
|
|
|
|
if isinstance(group_matcher, dict):
|
|
|
|
# dictionary matcher contains a dict of raw-string regex expr that must be compiled
|
|
|
|
compiled = []
|
|
|
|
for group_ordinal, (group_name, mspec) in enumerate(group_matcher.items()):
|
|
|
|
if mspec is None:
|
|
|
|
continue
|
|
|
|
# map all matching specifications into 3-tuple (compiled re, prefix, suffix)
|
|
|
|
if isinstance(mspec, (tuple, list)):
|
|
|
|
# multi-entry match specifications require each sub-spec to be a 2-tuple (re, suffix)
|
|
|
|
for sspec in mspec:
|
|
|
|
compiled += [(re.compile(sspec[0]), (group_ordinal,), sspec[1])]
|
|
|
|
else:
|
|
|
|
compiled += [(re.compile(mspec), (group_ordinal,), None)]
|
|
|
|
group_matcher = compiled
|
|
|
|
|
|
|
|
def _get_grouping(name):
|
|
|
|
if isinstance(group_matcher, (list, tuple)):
|
|
|
|
for match_fn, prefix, suffix in group_matcher:
|
|
|
|
r = match_fn.match(name)
|
|
|
|
if r:
|
|
|
|
parts = (prefix, r.groups(), suffix)
|
|
|
|
# map all tuple elem to int for numeric sort, filter out None entries
|
|
|
|
return tuple(map(float, chain.from_iterable(filter(None, parts))))
|
|
|
|
return float('inf'), # un-matched layers (neck, head) mapped to largest ordinal
|
|
|
|
else:
|
|
|
|
ord = group_matcher(name)
|
|
|
|
if not isinstance(ord, collections.abc.Iterable):
|
|
|
|
return ord,
|
|
|
|
return tuple(ord)
|
|
|
|
|
|
|
|
# map layers into groups via ordinals (ints or tuples of ints) from matcher
|
|
|
|
grouping = defaultdict(list)
|
|
|
|
for k, v in named_objects:
|
|
|
|
grouping[_get_grouping(k)].append(v if output_values else k)
|
|
|
|
|
|
|
|
# remap to integers
|
|
|
|
layer_id_to_param = defaultdict(list)
|
|
|
|
lid = -1
|
|
|
|
for k in sorted(filter(lambda x: x is not None, grouping.keys())):
|
|
|
|
if lid < 0 or k[-1] != MATCH_PREV_GROUP[0]:
|
|
|
|
lid += 1
|
|
|
|
layer_id_to_param[lid].extend(grouping[k])
|
|
|
|
|
|
|
|
if reverse:
|
|
|
|
assert not output_values, "reverse mapping only sensible for name output"
|
|
|
|
# output reverse mapping
|
|
|
|
param_to_layer_id = {}
|
|
|
|
for lid, lm in layer_id_to_param.items():
|
|
|
|
for n in lm:
|
|
|
|
param_to_layer_id[n] = lid
|
|
|
|
return param_to_layer_id
|
|
|
|
|
|
|
|
return layer_id_to_param
|
|
|
|
|
|
|
|
|
|
|
|
def group_parameters(
|
|
|
|
module: nn.Module,
|
|
|
|
group_matcher,
|
|
|
|
output_values=False,
|
|
|
|
reverse=False,
|
|
|
|
):
|
|
|
|
return group_with_matcher(
|
|
|
|
module.named_parameters(), group_matcher, output_values=output_values, reverse=reverse)
|
|
|
|
|
|
|
|
|
|
|
|
def group_modules(
|
|
|
|
module: nn.Module,
|
|
|
|
group_matcher,
|
|
|
|
output_values=False,
|
|
|
|
reverse=False,
|
|
|
|
):
|
|
|
|
return group_with_matcher(
|
|
|
|
named_modules_with_params(module), group_matcher, output_values=output_values, reverse=reverse)
|
|
|
|
|
|
|
|
|
|
|
|
def checkpoint_seq(
|
|
|
|
functions,
|
|
|
|
x,
|
|
|
|
every=1,
|
|
|
|
flatten=False,
|
|
|
|
skip_last=False,
|
|
|
|
preserve_rng_state=True
|
|
|
|
):
|
|
|
|
r"""A helper function for checkpointing sequential models.
|
|
|
|
|
|
|
|
Sequential models execute a list of modules/functions in order
|
|
|
|
(sequentially). Therefore, we can divide such a sequence into segments
|
|
|
|
and checkpoint each segment. All segments except run in :func:`torch.no_grad`
|
|
|
|
manner, i.e., not storing the intermediate activations. The inputs of each
|
|
|
|
checkpointed segment will be saved for re-running the segment in the backward pass.
|
|
|
|
|
|
|
|
See :func:`~torch.utils.checkpoint.checkpoint` on how checkpointing works.
|
|
|
|
|
|
|
|
.. warning::
|
|
|
|
Checkpointing currently only supports :func:`torch.autograd.backward`
|
|
|
|
and only if its `inputs` argument is not passed. :func:`torch.autograd.grad`
|
|
|
|
is not supported.
|
|
|
|
|
|
|
|
.. warning:
|
|
|
|
At least one of the inputs needs to have :code:`requires_grad=True` if
|
|
|
|
grads are needed for model inputs, otherwise the checkpointed part of the
|
|
|
|
model won't have gradients.
|
|
|
|
|
|
|
|
Args:
|
|
|
|
functions: A :class:`torch.nn.Sequential` or the list of modules or functions to run sequentially.
|
|
|
|
x: A Tensor that is input to :attr:`functions`
|
|
|
|
every: checkpoint every-n functions (default: 1)
|
|
|
|
flatten (bool): flatten nn.Sequential of nn.Sequentials
|
|
|
|
skip_last (bool): skip checkpointing the last function in the sequence if True
|
|
|
|
preserve_rng_state (bool, optional, default=True): Omit stashing and restoring
|
|
|
|
the RNG state during each checkpoint.
|
|
|
|
|
|
|
|
Returns:
|
|
|
|
Output of running :attr:`functions` sequentially on :attr:`*inputs`
|
|
|
|
|
|
|
|
Example:
|
|
|
|
>>> model = nn.Sequential(...)
|
|
|
|
>>> input_var = checkpoint_seq(model, input_var, every=2)
|
|
|
|
"""
|
|
|
|
def run_function(start, end, functions):
|
|
|
|
def forward(_x):
|
|
|
|
for j in range(start, end + 1):
|
|
|
|
_x = functions[j](_x)
|
|
|
|
return _x
|
|
|
|
return forward
|
|
|
|
|
|
|
|
if isinstance(functions, torch.nn.Sequential):
|
|
|
|
functions = functions.children()
|
|
|
|
if flatten:
|
|
|
|
functions = chain.from_iterable(functions)
|
|
|
|
if not isinstance(functions, (tuple, list)):
|
|
|
|
functions = tuple(functions)
|
|
|
|
|
|
|
|
num_checkpointed = len(functions)
|
|
|
|
if skip_last:
|
|
|
|
num_checkpointed -= 1
|
|
|
|
end = -1
|
|
|
|
for start in range(0, num_checkpointed, every):
|
|
|
|
end = min(start + every - 1, num_checkpointed - 1)
|
|
|
|
x = checkpoint(run_function(start, end, functions), x, preserve_rng_state=preserve_rng_state)
|
|
|
|
if skip_last:
|
|
|
|
return run_function(end + 1, len(functions) - 1, functions)(x)
|
|
|
|
return x
|
|
|
|
|
|
|
|
|
|
|
|
def flatten_modules(named_modules, depth=1, prefix='', module_types='sequential'):
|
|
|
|
prefix_is_tuple = isinstance(prefix, tuple)
|
|
|
|
if isinstance(module_types, str):
|
|
|
|
if module_types == 'container':
|
|
|
|
module_types = (nn.Sequential, nn.ModuleList, nn.ModuleDict)
|
|
|
|
else:
|
|
|
|
module_types = (nn.Sequential,)
|
|
|
|
for name, module in named_modules:
|
|
|
|
if depth and isinstance(module, module_types):
|
|
|
|
yield from flatten_modules(
|
|
|
|
module.named_children(),
|
|
|
|
depth - 1,
|
|
|
|
prefix=(name,) if prefix_is_tuple else name,
|
|
|
|
module_types=module_types,
|
|
|
|
)
|
|
|
|
else:
|
|
|
|
if prefix_is_tuple:
|
|
|
|
name = prefix + (name,)
|
|
|
|
yield name, module
|
|
|
|
else:
|
|
|
|
if prefix:
|
|
|
|
name = '.'.join([prefix, name])
|
|
|
|
yield name, module
|