From 1a8f5900abebccf435aed4ed5c37e980618986e9 Mon Sep 17 00:00:00 2001 From: Ross Wightman Date: Thu, 9 Apr 2020 01:41:54 -0700 Subject: [PATCH] Update EfficientNet feature extraction for EfficientDet. Add needed MaxPoolSame as well. --- timm/models/efficientnet.py | 44 ++++++++++++----- timm/models/efficientnet_blocks.py | 64 ++++++++++++------------ timm/models/efficientnet_builder.py | 24 ++++----- timm/models/layers/__init__.py | 3 +- timm/models/layers/avg_pool2d_same.py | 31 ------------ timm/models/layers/cond_conv2d.py | 3 +- timm/models/layers/conv2d_same.py | 29 ++--------- timm/models/layers/padding.py | 29 +++++++++-- timm/models/layers/pool2d_same.py | 71 +++++++++++++++++++++++++++ 9 files changed, 182 insertions(+), 116 deletions(-) delete mode 100644 timm/models/layers/avg_pool2d_same.py create mode 100644 timm/models/layers/pool2d_same.py diff --git a/timm/models/efficientnet.py b/timm/models/efficientnet.py index 48678e67..4ef152d5 100644 --- a/timm/models/efficientnet.py +++ b/timm/models/efficientnet.py @@ -326,7 +326,6 @@ class EfficientNet(nn.Module): # Stem if not fix_stem: stem_size = round_channels(stem_size, channel_multiplier, channel_divisor, channel_min) - print(stem_size) self.conv_stem = create_conv2d(self._in_chs, stem_size, 3, stride=2, padding=pad_type) self.bn1 = norm_layer(stem_size, **norm_kwargs) self.act1 = act_layer(inplace=True) @@ -393,7 +392,7 @@ class EfficientNetFeatures(nn.Module): and object detection models. """ - def __init__(self, block_args, out_indices=(0, 1, 2, 3, 4), feature_location='pre_pwl', + def __init__(self, block_args, out_indices=(0, 1, 2, 3, 4), feature_location='bottleneck', in_chans=3, stem_size=32, channel_multiplier=1.0, channel_divisor=8, channel_min=None, output_stride=32, pad_type='', fix_stem=False, act_layer=nn.ReLU, drop_rate=0., drop_path_rate=0., se_kwargs=None, norm_layer=nn.BatchNorm2d, norm_kwargs=None): @@ -404,6 +403,7 @@ class EfficientNetFeatures(nn.Module): num_stages = max(out_indices) + 1 self.out_indices = out_indices + self.feature_location = feature_location self.drop_rate = drop_rate self._in_chs = in_chans @@ -420,18 +420,23 @@ class EfficientNetFeatures(nn.Module): channel_multiplier, channel_divisor, channel_min, output_stride, pad_type, act_layer, se_kwargs, norm_layer, norm_kwargs, drop_path_rate, feature_location=feature_location, verbose=_DEBUG) self.blocks = nn.Sequential(*builder(self._in_chs, block_args)) - self.feature_info = builder.features # builder provides info about feature channels for each block + self._feature_info = builder.features # builder provides info about feature channels for each block + self._stage_to_feature_idx = { + v['stage_idx']: fi for fi, v in self._feature_info.items() if fi in self.out_indices} self._in_chs = builder.in_chs efficientnet_init_weights(self) if _DEBUG: - for k, v in self.feature_info.items(): + for k, v in self._feature_info.items(): print('Feature idx: {}: Name: {}, Channels: {}'.format(k, v['name'], v['num_chs'])) # Register feature extraction hooks with FeatureHooks helper - hook_type = 'forward_pre' if feature_location == 'pre_pwl' else 'forward' - hooks = [dict(name=self.feature_info[idx]['name'], type=hook_type) for idx in out_indices] - self.feature_hooks = FeatureHooks(hooks, self.named_modules()) + self.feature_hooks = None + if feature_location != 'bottleneck': + hooks = [dict( + name=self._feature_info[idx]['module'], + type=self._feature_info[idx]['hook_type']) for idx in out_indices] + self.feature_hooks = FeatureHooks(hooks, self.named_modules()) def feature_channels(self, idx=None): """ Feature Channel Shortcut @@ -439,15 +444,32 @@ class EfficientNetFeatures(nn.Module): return feature channel count for that feature block index (independent of out_indices setting). """ if isinstance(idx, int): - return self.feature_info[idx]['num_chs'] - return [self.feature_info[i]['num_chs'] for i in self.out_indices] + return self._feature_info[idx]['num_chs'] + return [self._feature_info[i]['num_chs'] for i in self.out_indices] + + def feature_info(self, idx=None): + """ Feature Channel Shortcut + Returns feature channel count for each output index if idx == None. If idx is an integer, will + return feature channel count for that feature block index (independent of out_indices setting). + """ + if isinstance(idx, int): + return self._feature_info[idx] + return [self._feature_info[i] for i in self.out_indices] def forward(self, x): x = self.conv_stem(x) x = self.bn1(x) x = self.act1(x) - self.blocks(x) - return self.feature_hooks.get_output(x.device) + if self.feature_hooks is None: + features = [] + for i, b in enumerate(self.blocks): + x = b(x) + if i in self._stage_to_feature_idx: + features.append(x) + return features + else: + self.blocks(x) + return self.feature_hooks.get_output(x.device) def _create_model(model_kwargs, default_cfg, pretrained=False): diff --git a/timm/models/efficientnet_blocks.py b/timm/models/efficientnet_blocks.py index b5de664d..cc4cdef1 100644 --- a/timm/models/efficientnet_blocks.py +++ b/timm/models/efficientnet_blocks.py @@ -120,11 +120,13 @@ class ConvBnAct(nn.Module): self.bn1 = norm_layer(out_chs, **norm_kwargs) self.act1 = act_layer(inplace=True) - def feature_module(self, location): - return 'act1' - - def feature_channels(self, location): - return self.conv.out_channels + def feature_info(self, location): + if location == 'expansion' or location == 'depthwise': + # no expansion or depthwise this block, use act after conv + info = dict(module='act1', hook_type='forward', num_chs=self.conv.out_channels) + else: # location == 'bottleneck' + info = dict(module='', hook_type='', num_chs=self.conv.out_channels) + return info def forward(self, x): x = self.conv(x) @@ -165,12 +167,15 @@ class DepthwiseSeparableConv(nn.Module): self.bn2 = norm_layer(out_chs, **norm_kwargs) self.act2 = act_layer(inplace=True) if self.has_pw_act else nn.Identity() - def feature_module(self, location): - # no expansion in this block, pre pw only feature extraction point - return 'conv_pw' - - def feature_channels(self, location): - return self.conv_pw.in_channels + def feature_info(self, location): + if location == 'expansion': + # no expansion in this block, use depthwise, before SE + info = dict(module='act1', hook_type='forward', num_chs=self.conv_pw.in_channels) + elif location == 'depthwise': # after SE + info = dict(module='conv_pw', hook_type='forward_pre', num_chs=self.conv_pw.in_channels) + else: # location == 'bottleneck' + info = dict(module='', hook_type='', num_chs=self.conv_pw.out_channels) + return info def forward(self, x): residual = x @@ -232,16 +237,14 @@ class InvertedResidual(nn.Module): self.conv_pwl = create_conv2d(mid_chs, out_chs, pw_kernel_size, padding=pad_type, **conv_kwargs) self.bn3 = norm_layer(out_chs, **norm_kwargs) - def feature_module(self, location): - if location == 'post_exp': - return 'act1' - return 'conv_pwl' - - def feature_channels(self, location): - if location == 'post_exp': - return self.conv_pw.out_channels - # location == 'pre_pw' - return self.conv_pwl.in_channels + def feature_info(self, location): + if location == 'expansion': + info = dict(module='act1', hook_type='forward', num_chs=self.conv_pw.in_channels) + elif location == 'depthwise': # after SE + info = dict(module='conv_pwl', hook_type='forward_pre', num_chs=self.conv_pwl.in_channels) + else: # location == 'bottleneck' + info = dict(module='', hook_type='', num_chs=self.conv_pwl.out_channels) + return info def forward(self, x): residual = x @@ -359,16 +362,15 @@ class EdgeResidual(nn.Module): mid_chs, out_chs, pw_kernel_size, stride=stride, dilation=dilation, padding=pad_type) self.bn2 = norm_layer(out_chs, **norm_kwargs) - def feature_module(self, location): - if location == 'post_exp': - return 'act1' - return 'conv_pwl' - - def feature_channels(self, location): - if location == 'post_exp': - return self.conv_exp.out_channels - # location == 'pre_pw' - return self.conv_pwl.in_channels + def feature_info(self, location): + if location == 'expansion': + info = dict(module='act1', hook_type='forward', num_chs=self.conv_exp.out_channels) + elif location == 'depthwise': + # there is no depthwise, take after SE, before PWL + info = dict(module='conv_pwl', hook_type='forward_pre', num_chs=self.conv_pwl.in_channels) + else: # location == 'bottleneck' + info = dict(module='', hook_type='', num_chs=self.conv_pwl.out_channels) + return info def forward(self, x): residual = x diff --git a/timm/models/efficientnet_builder.py b/timm/models/efficientnet_builder.py index 3876a2d1..842098cf 100644 --- a/timm/models/efficientnet_builder.py +++ b/timm/models/efficientnet_builder.py @@ -218,7 +218,7 @@ class EfficientNetBuilder: self.norm_kwargs = norm_kwargs self.drop_path_rate = drop_path_rate self.feature_location = feature_location - assert feature_location in ('pre_pwl', 'post_exp', '') + assert feature_location in ('bottleneck', 'depthwise', 'expansion', '') self.verbose = verbose # state updated during build, consumed by model @@ -313,20 +313,21 @@ class EfficientNetBuilder: block_args['stride'] = 1 do_extract = False - if self.feature_location == 'pre_pwl': + if self.feature_location == 'bottleneck' or self.feature_location == 'depthwise': if last_block: next_stage_idx = stage_idx + 1 if next_stage_idx >= len(model_block_args): do_extract = True else: do_extract = model_block_args[next_stage_idx][0]['stride'] > 1 - elif self.feature_location == 'post_exp': - if block_args['stride'] > 1 or (last_stack and last_block) : + elif self.feature_location == 'expansion': + if block_args['stride'] > 1 or (last_stack and last_block): do_extract = True if do_extract: extract_features = self.feature_location next_dilation = current_dilation + next_output_stride = current_stride if block_args['stride'] > 1: next_output_stride = current_stride * block_args['stride'] if next_output_stride > self.output_stride: @@ -347,14 +348,13 @@ class EfficientNetBuilder: # stash feature module name and channel info for model feature extraction if extract_features: - feature_module = block.feature_module(extract_features) - if feature_module: - feature_module = 'blocks.{}.{}.'.format(stage_idx, block_idx) + feature_module - feature_channels = block.feature_channels(extract_features) - self.features[feature_idx] = dict( - name=feature_module, - num_chs=feature_channels - ) + feature_info = block.feature_info(extract_features) + if feature_info['module']: + feature_info['module'] = 'blocks.{}.{}.'.format(stage_idx, block_idx) + feature_info['module'] + feature_info['stage_idx'] = stage_idx + feature_info['block_idx'] = block_idx + feature_info['reduction'] = current_stride + self.features[feature_idx] = feature_info feature_idx += 1 total_block_idx += 1 # incr global block idx (across all stacks) diff --git a/timm/models/layers/__init__.py b/timm/models/layers/__init__.py index f012c3cf..3dec7498 100644 --- a/timm/models/layers/__init__.py +++ b/timm/models/layers/__init__.py @@ -1,9 +1,10 @@ from .padding import get_padding -from .avg_pool2d_same import AvgPool2dSame +from .pool2d_same import AvgPool2dSame from .conv2d_same import Conv2dSame from .conv_bn_act import ConvBnAct from .mixed_conv2d import MixedConv2d from .cond_conv2d import CondConv2d, get_condconv_initializer +from .pool2d_same import create_pool2d from .create_conv2d import create_conv2d from .create_attn import create_attn from .selective_kernel import SelectiveKernelConv diff --git a/timm/models/layers/avg_pool2d_same.py b/timm/models/layers/avg_pool2d_same.py deleted file mode 100644 index 33656e79..00000000 --- a/timm/models/layers/avg_pool2d_same.py +++ /dev/null @@ -1,31 +0,0 @@ -""" AvgPool2d w/ Same Padding - -Hacked together by Ross Wightman -""" -import torch -import torch.nn as nn -import torch.nn.functional as F -from typing import List -import math - -from .helpers import tup_pair -from .padding import pad_same - - -def avg_pool2d_same(x, kernel_size: List[int], stride: List[int], padding: List[int] = (0, 0), - ceil_mode: bool = False, count_include_pad: bool = True): - x = pad_same(x, kernel_size, stride) - return F.avg_pool2d(x, kernel_size, stride, (0, 0), ceil_mode, count_include_pad) - - -class AvgPool2dSame(nn.AvgPool2d): - """ Tensorflow like 'SAME' wrapper for 2D average pooling - """ - def __init__(self, kernel_size: int, stride=None, padding=0, ceil_mode=False, count_include_pad=True): - kernel_size = tup_pair(kernel_size) - stride = tup_pair(stride) - super(AvgPool2dSame, self).__init__(kernel_size, stride, (0, 0), ceil_mode, count_include_pad) - - def forward(self, x): - return avg_pool2d_same( - x, self.kernel_size, self.stride, self.padding, self.ceil_mode, self.count_include_pad) diff --git a/timm/models/layers/cond_conv2d.py b/timm/models/layers/cond_conv2d.py index 7b038ee7..0241b501 100644 --- a/timm/models/layers/cond_conv2d.py +++ b/timm/models/layers/cond_conv2d.py @@ -14,7 +14,8 @@ from torch import nn as nn from torch.nn import functional as F from .helpers import tup_pair -from .conv2d_same import get_padding_value, conv2d_same +from .conv2d_same import conv2d_same +from timm.models.layers.padding import get_padding_value def get_condconv_initializer(initializer, num_experts, expert_shape): diff --git a/timm/models/layers/conv2d_same.py b/timm/models/layers/conv2d_same.py index 0e29ae8c..863d1783 100644 --- a/timm/models/layers/conv2d_same.py +++ b/timm/models/layers/conv2d_same.py @@ -5,10 +5,10 @@ Hacked together by Ross Wightman import torch import torch.nn as nn import torch.nn.functional as F -from typing import Union, List, Tuple, Optional, Callable -import math +from typing import Tuple, Optional -from .padding import get_padding, pad_same, is_static_pad +from timm.models.layers.padding import get_padding_value +from .padding import pad_same def conv2d_same( @@ -31,29 +31,6 @@ class Conv2dSame(nn.Conv2d): return conv2d_same(x, self.weight, self.bias, self.stride, self.padding, self.dilation, self.groups) -def get_padding_value(padding, kernel_size, **kwargs) -> Tuple[Tuple, bool]: - dynamic = False - if isinstance(padding, str): - # for any string padding, the padding will be calculated for you, one of three ways - padding = padding.lower() - if padding == 'same': - # TF compatible 'SAME' padding, has a performance and GPU memory allocation impact - if is_static_pad(kernel_size, **kwargs): - # static case, no extra overhead - padding = get_padding(kernel_size, **kwargs) - else: - # dynamic 'SAME' padding, has runtime/GPU memory overhead - padding = 0 - dynamic = True - elif padding == 'valid': - # 'VALID' padding, same as padding=0 - padding = 0 - else: - # Default to PyTorch style 'same'-ish symmetric padding - padding = get_padding(kernel_size, **kwargs) - return padding, dynamic - - def create_conv2d_pad(in_chs, out_chs, kernel_size, **kwargs): padding = kwargs.pop('padding', '') kwargs.setdefault('bias', False) diff --git a/timm/models/layers/padding.py b/timm/models/layers/padding.py index b3653866..0fca7cc6 100644 --- a/timm/models/layers/padding.py +++ b/timm/models/layers/padding.py @@ -3,7 +3,7 @@ Hacked together by Ross Wightman """ import math -from typing import List +from typing import List, Tuple import torch.nn.functional as F @@ -25,9 +25,32 @@ def is_static_pad(kernel_size: int, stride: int = 1, dilation: int = 1, **_): # Dynamically pad input x with 'SAME' padding for conv with specified args -def pad_same(x, k: List[int], s: List[int], d: List[int] = (1, 1)): +def pad_same(x, k: List[int], s: List[int], d: List[int] = (1, 1), value: float = 0): ih, iw = x.size()[-2:] pad_h, pad_w = get_same_padding(ih, k[0], s[0], d[0]), get_same_padding(iw, k[1], s[1], d[1]) if pad_h > 0 or pad_w > 0: - x = F.pad(x, [pad_w // 2, pad_w - pad_w // 2, pad_h // 2, pad_h - pad_h // 2]) + x = F.pad(x, [pad_w // 2, pad_w - pad_w // 2, pad_h // 2, pad_h - pad_h // 2], value=value) return x + + +def get_padding_value(padding, kernel_size, **kwargs) -> Tuple[Tuple, bool]: + dynamic = False + if isinstance(padding, str): + # for any string padding, the padding will be calculated for you, one of three ways + padding = padding.lower() + if padding == 'same': + # TF compatible 'SAME' padding, has a performance and GPU memory allocation impact + if is_static_pad(kernel_size, **kwargs): + # static case, no extra overhead + padding = get_padding(kernel_size, **kwargs) + else: + # dynamic 'SAME' padding, has runtime/GPU memory overhead + padding = 0 + dynamic = True + elif padding == 'valid': + # 'VALID' padding, same as padding=0 + padding = 0 + else: + # Default to PyTorch style 'same'-ish symmetric padding + padding = get_padding(kernel_size, **kwargs) + return padding, dynamic diff --git a/timm/models/layers/pool2d_same.py b/timm/models/layers/pool2d_same.py new file mode 100644 index 00000000..40f6dacc --- /dev/null +++ b/timm/models/layers/pool2d_same.py @@ -0,0 +1,71 @@ +""" AvgPool2d w/ Same Padding + +Hacked together by Ross Wightman +""" +import torch +import torch.nn as nn +import torch.nn.functional as F +from typing import Union, List, Tuple, Optional +import math + +from .helpers import tup_pair +from .padding import pad_same, get_padding_value + + +def avg_pool2d_same(x, kernel_size: List[int], stride: List[int], padding: List[int] = (0, 0), + ceil_mode: bool = False, count_include_pad: bool = True): + # FIXME how to deal with count_include_pad vs not for external padding? + x = pad_same(x, kernel_size, stride) + return F.avg_pool2d(x, kernel_size, stride, (0, 0), ceil_mode, count_include_pad) + + +class AvgPool2dSame(nn.AvgPool2d): + """ Tensorflow like 'SAME' wrapper for 2D average pooling + """ + def __init__(self, kernel_size: int, stride=None, padding=0, ceil_mode=False, count_include_pad=True): + kernel_size = tup_pair(kernel_size) + stride = tup_pair(stride) + super(AvgPool2dSame, self).__init__(kernel_size, stride, (0, 0), ceil_mode, count_include_pad) + + def forward(self, x): + return avg_pool2d_same( + x, self.kernel_size, self.stride, self.padding, self.ceil_mode, self.count_include_pad) + + +def max_pool2d_same( + x, kernel_size: List[int], stride: List[int], padding: List[int] = (0, 0), + dilation: List[int] = (1, 1), ceil_mode: bool = False): + x = pad_same(x, kernel_size, stride, value=-float('inf')) + return F.max_pool2d(x, kernel_size, stride, (0, 0), dilation, ceil_mode) + + +class MaxPool2dSame(nn.MaxPool2d): + """ Tensorflow like 'SAME' wrapper for 2D max pooling + """ + def __init__(self, kernel_size: int, stride=None, padding=0, dilation=1, ceil_mode=False, count_include_pad=True): + kernel_size = tup_pair(kernel_size) + stride = tup_pair(stride) + super(MaxPool2dSame, self).__init__(kernel_size, stride, (0, 0), dilation, ceil_mode, count_include_pad) + + def forward(self, x): + return max_pool2d_same(x, self.kernel_size, self.stride, self.padding, self.dilation, self.ceil_mode) + + +def create_pool2d(pool_type, kernel_size, stride=None, **kwargs): + stride = stride or kernel_size + padding = kwargs.pop('padding', '') + padding, is_dynamic = get_padding_value(padding, kernel_size, stride=stride, **kwargs) + if is_dynamic: + if pool_type == 'avg': + return AvgPool2dSame(kernel_size, stride=stride, **kwargs) + elif pool_type == 'max': + return MaxPool2dSame(kernel_size, stride=stride, **kwargs) + else: + assert False, f'Unsupported pool type {pool_type}' + else: + if pool_type == 'avg': + return nn.AvgPool2d(kernel_size, stride=stride, padding=padding, **kwargs) + elif pool_type == 'max': + return nn.MaxPool2d(kernel_size, stride=stride, padding=padding, **kwargs) + else: + assert False, f'Unsupported pool type {pool_type}'