Unify drop connect vs drop path under 'drop path' name, switch all EfficientNet/MobilenetV3 refs to 'drop_path'. Update factory to handle new drop args.

pull/88/head
Ross Wightman 5 years ago
parent f1d5f8a6c4
commit 43225d110c

@ -253,7 +253,7 @@ class EfficientNet(nn.Module):
def __init__(self, block_args, num_classes=1000, num_features=1280, in_chans=3, stem_size=32, def __init__(self, block_args, num_classes=1000, num_features=1280, in_chans=3, stem_size=32,
channel_multiplier=1.0, channel_divisor=8, channel_min=None, channel_multiplier=1.0, channel_divisor=8, channel_min=None,
output_stride=32, pad_type='', act_layer=nn.ReLU, drop_rate=0., drop_connect_rate=0., output_stride=32, pad_type='', act_layer=nn.ReLU, drop_rate=0., drop_path_rate=0.,
se_kwargs=None, norm_layer=nn.BatchNorm2d, norm_kwargs=None, global_pool='avg'): se_kwargs=None, norm_layer=nn.BatchNorm2d, norm_kwargs=None, global_pool='avg'):
super(EfficientNet, self).__init__() super(EfficientNet, self).__init__()
norm_kwargs = norm_kwargs or {} norm_kwargs = norm_kwargs or {}
@ -273,7 +273,7 @@ class EfficientNet(nn.Module):
# Middle stages (IR/ER/DS Blocks) # Middle stages (IR/ER/DS Blocks)
builder = EfficientNetBuilder( builder = EfficientNetBuilder(
channel_multiplier, channel_divisor, channel_min, output_stride, pad_type, act_layer, se_kwargs, channel_multiplier, channel_divisor, channel_min, output_stride, pad_type, act_layer, se_kwargs,
norm_layer, norm_kwargs, drop_connect_rate, verbose=_DEBUG) norm_layer, norm_kwargs, drop_path_rate, verbose=_DEBUG)
self.blocks = nn.Sequential(*builder(self._in_chs, block_args)) self.blocks = nn.Sequential(*builder(self._in_chs, block_args))
self.feature_info = builder.features self.feature_info = builder.features
self._in_chs = builder.in_chs self._in_chs = builder.in_chs
@ -333,7 +333,7 @@ class EfficientNetFeatures(nn.Module):
def __init__(self, block_args, out_indices=(0, 1, 2, 3, 4), feature_location='pre_pwl', def __init__(self, block_args, out_indices=(0, 1, 2, 3, 4), feature_location='pre_pwl',
in_chans=3, stem_size=32, channel_multiplier=1.0, channel_divisor=8, channel_min=None, in_chans=3, stem_size=32, channel_multiplier=1.0, channel_divisor=8, channel_min=None,
output_stride=32, pad_type='', act_layer=nn.ReLU, drop_rate=0., drop_connect_rate=0., output_stride=32, pad_type='', act_layer=nn.ReLU, drop_rate=0., drop_path_rate=0.,
se_kwargs=None, norm_layer=nn.BatchNorm2d, norm_kwargs=None): se_kwargs=None, norm_layer=nn.BatchNorm2d, norm_kwargs=None):
super(EfficientNetFeatures, self).__init__() super(EfficientNetFeatures, self).__init__()
norm_kwargs = norm_kwargs or {} norm_kwargs = norm_kwargs or {}
@ -355,7 +355,7 @@ class EfficientNetFeatures(nn.Module):
# Middle stages (IR/ER/DS Blocks) # Middle stages (IR/ER/DS Blocks)
builder = EfficientNetBuilder( builder = EfficientNetBuilder(
channel_multiplier, channel_divisor, channel_min, output_stride, pad_type, act_layer, se_kwargs, channel_multiplier, channel_divisor, channel_min, output_stride, pad_type, act_layer, se_kwargs,
norm_layer, norm_kwargs, drop_connect_rate, feature_location=feature_location, verbose=_DEBUG) norm_layer, norm_kwargs, drop_path_rate, feature_location=feature_location, verbose=_DEBUG)
self.blocks = nn.Sequential(*builder(self._in_chs, block_args)) self.blocks = nn.Sequential(*builder(self._in_chs, block_args))
self.feature_info = builder.features # builder provides info about feature channels for each block self.feature_info = builder.features # builder provides info about feature channels for each block
self._in_chs = builder.in_chs self._in_chs = builder.in_chs
@ -875,7 +875,7 @@ def spnasnet_100(pretrained=False, **kwargs):
@register_model @register_model
def efficientnet_b0(pretrained=False, **kwargs): def efficientnet_b0(pretrained=False, **kwargs):
""" EfficientNet-B0 """ """ EfficientNet-B0 """
# NOTE for train, drop_rate should be 0.2, drop_connect_rate should be 0.2 # NOTE for train, drop_rate should be 0.2, drop_path_rate should be 0.2
model = _gen_efficientnet( model = _gen_efficientnet(
'efficientnet_b0', channel_multiplier=1.0, depth_multiplier=1.0, pretrained=pretrained, **kwargs) 'efficientnet_b0', channel_multiplier=1.0, depth_multiplier=1.0, pretrained=pretrained, **kwargs)
return model return model
@ -884,7 +884,7 @@ def efficientnet_b0(pretrained=False, **kwargs):
@register_model @register_model
def efficientnet_b1(pretrained=False, **kwargs): def efficientnet_b1(pretrained=False, **kwargs):
""" EfficientNet-B1 """ """ EfficientNet-B1 """
# NOTE for train, drop_rate should be 0.2, drop_connect_rate should be 0.2 # NOTE for train, drop_rate should be 0.2, drop_path_rate should be 0.2
model = _gen_efficientnet( model = _gen_efficientnet(
'efficientnet_b1', channel_multiplier=1.0, depth_multiplier=1.1, pretrained=pretrained, **kwargs) 'efficientnet_b1', channel_multiplier=1.0, depth_multiplier=1.1, pretrained=pretrained, **kwargs)
return model return model
@ -893,7 +893,7 @@ def efficientnet_b1(pretrained=False, **kwargs):
@register_model @register_model
def efficientnet_b2(pretrained=False, **kwargs): def efficientnet_b2(pretrained=False, **kwargs):
""" EfficientNet-B2 """ """ EfficientNet-B2 """
# NOTE for train, drop_rate should be 0.3, drop_connect_rate should be 0.2 # NOTE for train, drop_rate should be 0.3, drop_path_rate should be 0.2
model = _gen_efficientnet( model = _gen_efficientnet(
'efficientnet_b2', channel_multiplier=1.1, depth_multiplier=1.2, pretrained=pretrained, **kwargs) 'efficientnet_b2', channel_multiplier=1.1, depth_multiplier=1.2, pretrained=pretrained, **kwargs)
return model return model
@ -902,7 +902,7 @@ def efficientnet_b2(pretrained=False, **kwargs):
@register_model @register_model
def efficientnet_b2a(pretrained=False, **kwargs): def efficientnet_b2a(pretrained=False, **kwargs):
""" EfficientNet-B2 @ 288x288 w/ 1.0 test crop""" """ EfficientNet-B2 @ 288x288 w/ 1.0 test crop"""
# NOTE for train, drop_rate should be 0.3, drop_connect_rate should be 0.2 # NOTE for train, drop_rate should be 0.3, drop_path_rate should be 0.2
model = _gen_efficientnet( model = _gen_efficientnet(
'efficientnet_b2a', channel_multiplier=1.1, depth_multiplier=1.2, pretrained=pretrained, **kwargs) 'efficientnet_b2a', channel_multiplier=1.1, depth_multiplier=1.2, pretrained=pretrained, **kwargs)
return model return model
@ -911,7 +911,7 @@ def efficientnet_b2a(pretrained=False, **kwargs):
@register_model @register_model
def efficientnet_b3(pretrained=False, **kwargs): def efficientnet_b3(pretrained=False, **kwargs):
""" EfficientNet-B3 """ """ EfficientNet-B3 """
# NOTE for train, drop_rate should be 0.3, drop_connect_rate should be 0.2 # NOTE for train, drop_rate should be 0.3, drop_path_rate should be 0.2
model = _gen_efficientnet( model = _gen_efficientnet(
'efficientnet_b3', channel_multiplier=1.2, depth_multiplier=1.4, pretrained=pretrained, **kwargs) 'efficientnet_b3', channel_multiplier=1.2, depth_multiplier=1.4, pretrained=pretrained, **kwargs)
return model return model
@ -920,7 +920,7 @@ def efficientnet_b3(pretrained=False, **kwargs):
@register_model @register_model
def efficientnet_b3a(pretrained=False, **kwargs): def efficientnet_b3a(pretrained=False, **kwargs):
""" EfficientNet-B3 @ 320x320 w/ 1.0 test crop-pct """ """ EfficientNet-B3 @ 320x320 w/ 1.0 test crop-pct """
# NOTE for train, drop_rate should be 0.3, drop_connect_rate should be 0.2 # NOTE for train, drop_rate should be 0.3, drop_path_rate should be 0.2
model = _gen_efficientnet( model = _gen_efficientnet(
'efficientnet_b3a', channel_multiplier=1.2, depth_multiplier=1.4, pretrained=pretrained, **kwargs) 'efficientnet_b3a', channel_multiplier=1.2, depth_multiplier=1.4, pretrained=pretrained, **kwargs)
return model return model
@ -929,7 +929,7 @@ def efficientnet_b3a(pretrained=False, **kwargs):
@register_model @register_model
def efficientnet_b4(pretrained=False, **kwargs): def efficientnet_b4(pretrained=False, **kwargs):
""" EfficientNet-B4 """ """ EfficientNet-B4 """
# NOTE for train, drop_rate should be 0.4, drop_connect_rate should be 0.2 # NOTE for train, drop_rate should be 0.4, drop_path_rate should be 0.2
model = _gen_efficientnet( model = _gen_efficientnet(
'efficientnet_b4', channel_multiplier=1.4, depth_multiplier=1.8, pretrained=pretrained, **kwargs) 'efficientnet_b4', channel_multiplier=1.4, depth_multiplier=1.8, pretrained=pretrained, **kwargs)
return model return model
@ -938,7 +938,7 @@ def efficientnet_b4(pretrained=False, **kwargs):
@register_model @register_model
def efficientnet_b5(pretrained=False, **kwargs): def efficientnet_b5(pretrained=False, **kwargs):
""" EfficientNet-B5 """ """ EfficientNet-B5 """
# NOTE for train, drop_rate should be 0.4, drop_connect_rate should be 0.2 # NOTE for train, drop_rate should be 0.4, drop_path_rate should be 0.2
model = _gen_efficientnet( model = _gen_efficientnet(
'efficientnet_b5', channel_multiplier=1.6, depth_multiplier=2.2, pretrained=pretrained, **kwargs) 'efficientnet_b5', channel_multiplier=1.6, depth_multiplier=2.2, pretrained=pretrained, **kwargs)
return model return model
@ -947,7 +947,7 @@ def efficientnet_b5(pretrained=False, **kwargs):
@register_model @register_model
def efficientnet_b6(pretrained=False, **kwargs): def efficientnet_b6(pretrained=False, **kwargs):
""" EfficientNet-B6 """ """ EfficientNet-B6 """
# NOTE for train, drop_rate should be 0.5, drop_connect_rate should be 0.2 # NOTE for train, drop_rate should be 0.5, drop_path_rate should be 0.2
model = _gen_efficientnet( model = _gen_efficientnet(
'efficientnet_b6', channel_multiplier=1.8, depth_multiplier=2.6, pretrained=pretrained, **kwargs) 'efficientnet_b6', channel_multiplier=1.8, depth_multiplier=2.6, pretrained=pretrained, **kwargs)
return model return model
@ -956,7 +956,7 @@ def efficientnet_b6(pretrained=False, **kwargs):
@register_model @register_model
def efficientnet_b7(pretrained=False, **kwargs): def efficientnet_b7(pretrained=False, **kwargs):
""" EfficientNet-B7 """ """ EfficientNet-B7 """
# NOTE for train, drop_rate should be 0.5, drop_connect_rate should be 0.2 # NOTE for train, drop_rate should be 0.5, drop_path_rate should be 0.2
model = _gen_efficientnet( model = _gen_efficientnet(
'efficientnet_b7', channel_multiplier=2.0, depth_multiplier=3.1, pretrained=pretrained, **kwargs) 'efficientnet_b7', channel_multiplier=2.0, depth_multiplier=3.1, pretrained=pretrained, **kwargs)
return model return model
@ -965,7 +965,7 @@ def efficientnet_b7(pretrained=False, **kwargs):
@register_model @register_model
def efficientnet_b8(pretrained=False, **kwargs): def efficientnet_b8(pretrained=False, **kwargs):
""" EfficientNet-B8 """ """ EfficientNet-B8 """
# NOTE for train, drop_rate should be 0.5, drop_connect_rate should be 0.2 # NOTE for train, drop_rate should be 0.5, drop_path_rate should be 0.2
model = _gen_efficientnet( model = _gen_efficientnet(
'efficientnet_b8', channel_multiplier=2.2, depth_multiplier=3.6, pretrained=pretrained, **kwargs) 'efficientnet_b8', channel_multiplier=2.2, depth_multiplier=3.6, pretrained=pretrained, **kwargs)
return model return model
@ -974,7 +974,7 @@ def efficientnet_b8(pretrained=False, **kwargs):
@register_model @register_model
def efficientnet_l2(pretrained=False, **kwargs): def efficientnet_l2(pretrained=False, **kwargs):
""" EfficientNet-L2.""" """ EfficientNet-L2."""
# NOTE for train, drop_rate should be 0.5, drop_connect_rate should be 0.2 # NOTE for train, drop_rate should be 0.5, drop_path_rate should be 0.2
model = _gen_efficientnet( model = _gen_efficientnet(
'efficientnet_l2', channel_multiplier=4.3, depth_multiplier=5.3, pretrained=pretrained, **kwargs) 'efficientnet_l2', channel_multiplier=4.3, depth_multiplier=5.3, pretrained=pretrained, **kwargs)
return model return model
@ -1007,7 +1007,7 @@ def efficientnet_el(pretrained=False, **kwargs):
@register_model @register_model
def efficientnet_cc_b0_4e(pretrained=False, **kwargs): def efficientnet_cc_b0_4e(pretrained=False, **kwargs):
""" EfficientNet-CondConv-B0 w/ 8 Experts """ """ EfficientNet-CondConv-B0 w/ 8 Experts """
# NOTE for train, drop_rate should be 0.2, drop_connect_rate should be 0.2 # NOTE for train, drop_rate should be 0.2, drop_path_rate should be 0.2
model = _gen_efficientnet_condconv( model = _gen_efficientnet_condconv(
'efficientnet_cc_b0_4e', channel_multiplier=1.0, depth_multiplier=1.0, pretrained=pretrained, **kwargs) 'efficientnet_cc_b0_4e', channel_multiplier=1.0, depth_multiplier=1.0, pretrained=pretrained, **kwargs)
return model return model
@ -1016,7 +1016,7 @@ def efficientnet_cc_b0_4e(pretrained=False, **kwargs):
@register_model @register_model
def efficientnet_cc_b0_8e(pretrained=False, **kwargs): def efficientnet_cc_b0_8e(pretrained=False, **kwargs):
""" EfficientNet-CondConv-B0 w/ 8 Experts """ """ EfficientNet-CondConv-B0 w/ 8 Experts """
# NOTE for train, drop_rate should be 0.2, drop_connect_rate should be 0.2 # NOTE for train, drop_rate should be 0.2, drop_path_rate should be 0.2
model = _gen_efficientnet_condconv( model = _gen_efficientnet_condconv(
'efficientnet_cc_b0_8e', channel_multiplier=1.0, depth_multiplier=1.0, experts_multiplier=2, 'efficientnet_cc_b0_8e', channel_multiplier=1.0, depth_multiplier=1.0, experts_multiplier=2,
pretrained=pretrained, **kwargs) pretrained=pretrained, **kwargs)
@ -1025,7 +1025,7 @@ def efficientnet_cc_b0_8e(pretrained=False, **kwargs):
@register_model @register_model
def efficientnet_cc_b1_8e(pretrained=False, **kwargs): def efficientnet_cc_b1_8e(pretrained=False, **kwargs):
""" EfficientNet-CondConv-B1 w/ 8 Experts """ """ EfficientNet-CondConv-B1 w/ 8 Experts """
# NOTE for train, drop_rate should be 0.2, drop_connect_rate should be 0.2 # NOTE for train, drop_rate should be 0.2, drop_path_rate should be 0.2
model = _gen_efficientnet_condconv( model = _gen_efficientnet_condconv(
'efficientnet_cc_b1_8e', channel_multiplier=1.0, depth_multiplier=1.1, experts_multiplier=2, 'efficientnet_cc_b1_8e', channel_multiplier=1.0, depth_multiplier=1.1, experts_multiplier=2,
pretrained=pretrained, **kwargs) pretrained=pretrained, **kwargs)
@ -1355,7 +1355,7 @@ def tf_efficientnet_el(pretrained=False, **kwargs):
@register_model @register_model
def tf_efficientnet_cc_b0_4e(pretrained=False, **kwargs): def tf_efficientnet_cc_b0_4e(pretrained=False, **kwargs):
""" EfficientNet-CondConv-B0 w/ 4 Experts. Tensorflow compatible variant """ """ EfficientNet-CondConv-B0 w/ 4 Experts. Tensorflow compatible variant """
# NOTE for train, drop_rate should be 0.2, drop_connect_rate should be 0.2 # NOTE for train, drop_rate should be 0.2, drop_path_rate should be 0.2
kwargs['bn_eps'] = BN_EPS_TF_DEFAULT kwargs['bn_eps'] = BN_EPS_TF_DEFAULT
kwargs['pad_type'] = 'same' kwargs['pad_type'] = 'same'
model = _gen_efficientnet_condconv( model = _gen_efficientnet_condconv(
@ -1366,7 +1366,7 @@ def tf_efficientnet_cc_b0_4e(pretrained=False, **kwargs):
@register_model @register_model
def tf_efficientnet_cc_b0_8e(pretrained=False, **kwargs): def tf_efficientnet_cc_b0_8e(pretrained=False, **kwargs):
""" EfficientNet-CondConv-B0 w/ 8 Experts. Tensorflow compatible variant """ """ EfficientNet-CondConv-B0 w/ 8 Experts. Tensorflow compatible variant """
# NOTE for train, drop_rate should be 0.2, drop_connect_rate should be 0.2 # NOTE for train, drop_rate should be 0.2, drop_path_rate should be 0.2
kwargs['bn_eps'] = BN_EPS_TF_DEFAULT kwargs['bn_eps'] = BN_EPS_TF_DEFAULT
kwargs['pad_type'] = 'same' kwargs['pad_type'] = 'same'
model = _gen_efficientnet_condconv( model = _gen_efficientnet_condconv(
@ -1377,7 +1377,7 @@ def tf_efficientnet_cc_b0_8e(pretrained=False, **kwargs):
@register_model @register_model
def tf_efficientnet_cc_b1_8e(pretrained=False, **kwargs): def tf_efficientnet_cc_b1_8e(pretrained=False, **kwargs):
""" EfficientNet-CondConv-B1 w/ 8 Experts. Tensorflow compatible variant """ """ EfficientNet-CondConv-B1 w/ 8 Experts. Tensorflow compatible variant """
# NOTE for train, drop_rate should be 0.2, drop_connect_rate should be 0.2 # NOTE for train, drop_rate should be 0.2, drop_path_rate should be 0.2
kwargs['bn_eps'] = BN_EPS_TF_DEFAULT kwargs['bn_eps'] = BN_EPS_TF_DEFAULT
kwargs['pad_type'] = 'same' kwargs['pad_type'] = 'same'
model = _gen_efficientnet_condconv( model = _gen_efficientnet_condconv(

@ -2,7 +2,7 @@ import torch
import torch.nn as nn import torch.nn as nn
from torch.nn import functional as F from torch.nn import functional as F
from .layers.activations import sigmoid from .layers.activations import sigmoid
from .layers import create_conv2d from .layers import create_conv2d, drop_path
# Defaults used for Google/Tensorflow training of mobile networks /w RMSprop as per # Defaults used for Google/Tensorflow training of mobile networks /w RMSprop as per
@ -69,19 +69,6 @@ def round_channels(channels, multiplier=1.0, divisor=8, channel_min=None):
return make_divisible(channels, divisor, channel_min) return make_divisible(channels, divisor, channel_min)
def drop_connect(inputs, training: bool = False, drop_connect_rate: float = 0.):
"""Apply drop connect."""
if not training:
return inputs
keep_prob = 1 - drop_connect_rate
random_tensor = keep_prob + torch.rand(
(inputs.size()[0], 1, 1, 1), dtype=inputs.dtype, device=inputs.device)
random_tensor.floor_() # binarize
output = inputs.div(keep_prob) * random_tensor
return output
class ChannelShuffle(nn.Module): class ChannelShuffle(nn.Module):
# FIXME haven't used yet # FIXME haven't used yet
def __init__(self, groups): def __init__(self, groups):
@ -154,13 +141,13 @@ class DepthwiseSeparableConv(nn.Module):
def __init__(self, in_chs, out_chs, dw_kernel_size=3, def __init__(self, in_chs, out_chs, dw_kernel_size=3,
stride=1, dilation=1, pad_type='', act_layer=nn.ReLU, noskip=False, stride=1, dilation=1, pad_type='', act_layer=nn.ReLU, noskip=False,
pw_kernel_size=1, pw_act=False, se_ratio=0., se_kwargs=None, pw_kernel_size=1, pw_act=False, se_ratio=0., se_kwargs=None,
norm_layer=nn.BatchNorm2d, norm_kwargs=None, drop_connect_rate=0.): norm_layer=nn.BatchNorm2d, norm_kwargs=None, drop_path_rate=0.):
super(DepthwiseSeparableConv, self).__init__() super(DepthwiseSeparableConv, self).__init__()
norm_kwargs = norm_kwargs or {} norm_kwargs = norm_kwargs or {}
has_se = se_ratio is not None and se_ratio > 0. has_se = se_ratio is not None and se_ratio > 0.
self.has_residual = (stride == 1 and in_chs == out_chs) and not noskip self.has_residual = (stride == 1 and in_chs == out_chs) and not noskip
self.has_pw_act = pw_act # activation after point-wise conv self.has_pw_act = pw_act # activation after point-wise conv
self.drop_connect_rate = drop_connect_rate self.drop_path_rate = drop_path_rate
self.conv_dw = create_conv2d( self.conv_dw = create_conv2d(
in_chs, in_chs, dw_kernel_size, stride=stride, dilation=dilation, padding=pad_type, depthwise=True) in_chs, in_chs, dw_kernel_size, stride=stride, dilation=dilation, padding=pad_type, depthwise=True)
@ -200,8 +187,8 @@ class DepthwiseSeparableConv(nn.Module):
x = self.act2(x) x = self.act2(x)
if self.has_residual: if self.has_residual:
if self.drop_connect_rate > 0.: if self.drop_path_rate > 0.:
x = drop_connect(x, self.training, self.drop_connect_rate) x = drop_path(x, self.drop_path_rate, self.training)
x += residual x += residual
return x return x
@ -213,14 +200,14 @@ class InvertedResidual(nn.Module):
stride=1, dilation=1, pad_type='', act_layer=nn.ReLU, noskip=False, stride=1, dilation=1, pad_type='', act_layer=nn.ReLU, noskip=False,
exp_ratio=1.0, exp_kernel_size=1, pw_kernel_size=1, exp_ratio=1.0, exp_kernel_size=1, pw_kernel_size=1,
se_ratio=0., se_kwargs=None, norm_layer=nn.BatchNorm2d, norm_kwargs=None, se_ratio=0., se_kwargs=None, norm_layer=nn.BatchNorm2d, norm_kwargs=None,
conv_kwargs=None, drop_connect_rate=0.): conv_kwargs=None, drop_path_rate=0.):
super(InvertedResidual, self).__init__() super(InvertedResidual, self).__init__()
norm_kwargs = norm_kwargs or {} norm_kwargs = norm_kwargs or {}
conv_kwargs = conv_kwargs or {} conv_kwargs = conv_kwargs or {}
mid_chs = make_divisible(in_chs * exp_ratio) mid_chs = make_divisible(in_chs * exp_ratio)
has_se = se_ratio is not None and se_ratio > 0. has_se = se_ratio is not None and se_ratio > 0.
self.has_residual = (in_chs == out_chs and stride == 1) and not noskip self.has_residual = (in_chs == out_chs and stride == 1) and not noskip
self.drop_connect_rate = drop_connect_rate self.drop_path_rate = drop_path_rate
# Point-wise expansion # Point-wise expansion
self.conv_pw = create_conv2d(in_chs, mid_chs, exp_kernel_size, padding=pad_type, **conv_kwargs) self.conv_pw = create_conv2d(in_chs, mid_chs, exp_kernel_size, padding=pad_type, **conv_kwargs)
@ -278,8 +265,8 @@ class InvertedResidual(nn.Module):
x = self.bn3(x) x = self.bn3(x)
if self.has_residual: if self.has_residual:
if self.drop_connect_rate > 0.: if self.drop_path_rate > 0.:
x = drop_connect(x, self.training, self.drop_connect_rate) x = drop_path(x, self.drop_path_rate, self.training)
x += residual x += residual
return x return x
@ -292,7 +279,7 @@ class CondConvResidual(InvertedResidual):
stride=1, dilation=1, pad_type='', act_layer=nn.ReLU, noskip=False, stride=1, dilation=1, pad_type='', act_layer=nn.ReLU, noskip=False,
exp_ratio=1.0, exp_kernel_size=1, pw_kernel_size=1, exp_ratio=1.0, exp_kernel_size=1, pw_kernel_size=1,
se_ratio=0., se_kwargs=None, norm_layer=nn.BatchNorm2d, norm_kwargs=None, se_ratio=0., se_kwargs=None, norm_layer=nn.BatchNorm2d, norm_kwargs=None,
num_experts=0, drop_connect_rate=0.): num_experts=0, drop_path_rate=0.):
self.num_experts = num_experts self.num_experts = num_experts
conv_kwargs = dict(num_experts=self.num_experts) conv_kwargs = dict(num_experts=self.num_experts)
@ -302,7 +289,7 @@ class CondConvResidual(InvertedResidual):
act_layer=act_layer, noskip=noskip, exp_ratio=exp_ratio, exp_kernel_size=exp_kernel_size, act_layer=act_layer, noskip=noskip, exp_ratio=exp_ratio, exp_kernel_size=exp_kernel_size,
pw_kernel_size=pw_kernel_size, se_ratio=se_ratio, se_kwargs=se_kwargs, pw_kernel_size=pw_kernel_size, se_ratio=se_ratio, se_kwargs=se_kwargs,
norm_layer=norm_layer, norm_kwargs=norm_kwargs, conv_kwargs=conv_kwargs, norm_layer=norm_layer, norm_kwargs=norm_kwargs, conv_kwargs=conv_kwargs,
drop_connect_rate=drop_connect_rate) drop_path_rate=drop_path_rate)
self.routing_fn = nn.Linear(in_chs, self.num_experts) self.routing_fn = nn.Linear(in_chs, self.num_experts)
@ -332,8 +319,8 @@ class CondConvResidual(InvertedResidual):
x = self.bn3(x) x = self.bn3(x)
if self.has_residual: if self.has_residual:
if self.drop_connect_rate > 0.: if self.drop_path_rate > 0.:
x = drop_connect(x, self.training, self.drop_connect_rate) x = drop_path(x, self.drop_path_rate, self.training)
x += residual x += residual
return x return x
@ -344,7 +331,7 @@ class EdgeResidual(nn.Module):
def __init__(self, in_chs, out_chs, exp_kernel_size=3, exp_ratio=1.0, fake_in_chs=0, def __init__(self, in_chs, out_chs, exp_kernel_size=3, exp_ratio=1.0, fake_in_chs=0,
stride=1, dilation=1, pad_type='', act_layer=nn.ReLU, noskip=False, pw_kernel_size=1, stride=1, dilation=1, pad_type='', act_layer=nn.ReLU, noskip=False, pw_kernel_size=1,
se_ratio=0., se_kwargs=None, norm_layer=nn.BatchNorm2d, norm_kwargs=None, se_ratio=0., se_kwargs=None, norm_layer=nn.BatchNorm2d, norm_kwargs=None,
drop_connect_rate=0.): drop_path_rate=0.):
super(EdgeResidual, self).__init__() super(EdgeResidual, self).__init__()
norm_kwargs = norm_kwargs or {} norm_kwargs = norm_kwargs or {}
if fake_in_chs > 0: if fake_in_chs > 0:
@ -353,7 +340,7 @@ class EdgeResidual(nn.Module):
mid_chs = make_divisible(in_chs * exp_ratio) mid_chs = make_divisible(in_chs * exp_ratio)
has_se = se_ratio is not None and se_ratio > 0. has_se = se_ratio is not None and se_ratio > 0.
self.has_residual = (in_chs == out_chs and stride == 1) and not noskip self.has_residual = (in_chs == out_chs and stride == 1) and not noskip
self.drop_connect_rate = drop_connect_rate self.drop_path_rate = drop_path_rate
# Expansion convolution # Expansion convolution
self.conv_exp = create_conv2d(in_chs, mid_chs, exp_kernel_size, padding=pad_type) self.conv_exp = create_conv2d(in_chs, mid_chs, exp_kernel_size, padding=pad_type)
@ -400,8 +387,8 @@ class EdgeResidual(nn.Module):
x = self.bn2(x) x = self.bn2(x)
if self.has_residual: if self.has_residual:
if self.drop_connect_rate > 0.: if self.drop_path_rate > 0.:
x = drop_connect(x, self.training, self.drop_connect_rate) x = drop_path(x, self.drop_path_rate, self.training)
x += residual x += residual
return x return x

@ -202,7 +202,7 @@ class EfficientNetBuilder:
""" """
def __init__(self, channel_multiplier=1.0, channel_divisor=8, channel_min=None, def __init__(self, channel_multiplier=1.0, channel_divisor=8, channel_min=None,
output_stride=32, pad_type='', act_layer=None, se_kwargs=None, output_stride=32, pad_type='', act_layer=None, se_kwargs=None,
norm_layer=nn.BatchNorm2d, norm_kwargs=None, drop_connect_rate=0., feature_location='', norm_layer=nn.BatchNorm2d, norm_kwargs=None, drop_path_rate=0., feature_location='',
verbose=False): verbose=False):
self.channel_multiplier = channel_multiplier self.channel_multiplier = channel_multiplier
self.channel_divisor = channel_divisor self.channel_divisor = channel_divisor
@ -213,7 +213,7 @@ class EfficientNetBuilder:
self.se_kwargs = se_kwargs self.se_kwargs = se_kwargs
self.norm_layer = norm_layer self.norm_layer = norm_layer
self.norm_kwargs = norm_kwargs self.norm_kwargs = norm_kwargs
self.drop_connect_rate = drop_connect_rate self.drop_path_rate = drop_path_rate
self.feature_location = feature_location self.feature_location = feature_location
assert feature_location in ('pre_pwl', 'post_exp', '') assert feature_location in ('pre_pwl', 'post_exp', '')
self.verbose = verbose self.verbose = verbose
@ -226,7 +226,7 @@ class EfficientNetBuilder:
return round_channels(chs, self.channel_multiplier, self.channel_divisor, self.channel_min) return round_channels(chs, self.channel_multiplier, self.channel_divisor, self.channel_min)
def _make_block(self, ba, block_idx, block_count): def _make_block(self, ba, block_idx, block_count):
drop_connect_rate = self.drop_connect_rate * block_idx / block_count drop_path_rate = self.drop_path_rate * block_idx / block_count
bt = ba.pop('block_type') bt = ba.pop('block_type')
ba['in_chs'] = self.in_chs ba['in_chs'] = self.in_chs
ba['out_chs'] = self._round_channels(ba['out_chs']) ba['out_chs'] = self._round_channels(ba['out_chs'])
@ -240,7 +240,7 @@ class EfficientNetBuilder:
ba['act_layer'] = ba['act_layer'] if ba['act_layer'] is not None else self.act_layer ba['act_layer'] = ba['act_layer'] if ba['act_layer'] is not None else self.act_layer
assert ba['act_layer'] is not None assert ba['act_layer'] is not None
if bt == 'ir': if bt == 'ir':
ba['drop_connect_rate'] = drop_connect_rate ba['drop_path_rate'] = drop_path_rate
ba['se_kwargs'] = self.se_kwargs ba['se_kwargs'] = self.se_kwargs
if self.verbose: if self.verbose:
logging.info(' InvertedResidual {}, Args: {}'.format(block_idx, str(ba))) logging.info(' InvertedResidual {}, Args: {}'.format(block_idx, str(ba)))
@ -249,13 +249,13 @@ class EfficientNetBuilder:
else: else:
block = InvertedResidual(**ba) block = InvertedResidual(**ba)
elif bt == 'ds' or bt == 'dsa': elif bt == 'ds' or bt == 'dsa':
ba['drop_connect_rate'] = drop_connect_rate ba['drop_path_rate'] = drop_path_rate
ba['se_kwargs'] = self.se_kwargs ba['se_kwargs'] = self.se_kwargs
if self.verbose: if self.verbose:
logging.info(' DepthwiseSeparable {}, Args: {}'.format(block_idx, str(ba))) logging.info(' DepthwiseSeparable {}, Args: {}'.format(block_idx, str(ba)))
block = DepthwiseSeparableConv(**ba) block = DepthwiseSeparableConv(**ba)
elif bt == 'er': elif bt == 'er':
ba['drop_connect_rate'] = drop_connect_rate ba['drop_path_rate'] = drop_path_rate
ba['se_kwargs'] = self.se_kwargs ba['se_kwargs'] = self.se_kwargs
if self.verbose: if self.verbose:
logging.info(' EdgeResidual {}, Args: {}'.format(block_idx, str(ba))) logging.info(' EdgeResidual {}, Args: {}'.format(block_idx, str(ba)))

@ -31,7 +31,21 @@ def create_model(
kwargs.pop('bn_tf', None) kwargs.pop('bn_tf', None)
kwargs.pop('bn_momentum', None) kwargs.pop('bn_momentum', None)
kwargs.pop('bn_eps', None) kwargs.pop('bn_eps', None)
kwargs.pop('drop_connect_rate', None)
# Parameters that aren't supported by all models should default to None in command line args,
# remove them if they are present and not set so that non-supporting models don't break.
if kwargs.get('drop_block_rate', None) is None:
kwargs.pop('drop_block_rate', None)
# handle backwards compat with drop_connect -> drop_path change
drop_connect_rate = kwargs.pop('drop_connect_rate', None)
if drop_connect_rate is not None and kwargs.get('drop_path_rate', None) is None:
print("WARNING: 'drop_connect' as an argument is deprecated, please use 'drop_path'."
" Setting drop_path to %f." % drop_connect_rate)
kwargs['drop_path_rate'] = drop_connect_rate
if kwargs.get('drop_path_rate', None) is None:
kwargs.pop('drop_path_rate', None)
if is_model(model_name): if is_model(model_name):
create_fn = model_entrypoint(model_name) create_fn = model_entrypoint(model_name)

@ -71,7 +71,7 @@ class MobileNetV3(nn.Module):
""" """
def __init__(self, block_args, num_classes=1000, in_chans=3, stem_size=16, num_features=1280, head_bias=True, def __init__(self, block_args, num_classes=1000, in_chans=3, stem_size=16, num_features=1280, head_bias=True,
channel_multiplier=1.0, pad_type='', act_layer=nn.ReLU, drop_rate=0., drop_connect_rate=0., channel_multiplier=1.0, pad_type='', act_layer=nn.ReLU, drop_rate=0., drop_path_rate=0.,
se_kwargs=None, norm_layer=nn.BatchNorm2d, norm_kwargs=None, global_pool='avg'): se_kwargs=None, norm_layer=nn.BatchNorm2d, norm_kwargs=None, global_pool='avg'):
super(MobileNetV3, self).__init__() super(MobileNetV3, self).__init__()
@ -90,7 +90,7 @@ class MobileNetV3(nn.Module):
# Middle stages (IR/ER/DS Blocks) # Middle stages (IR/ER/DS Blocks)
builder = EfficientNetBuilder( builder = EfficientNetBuilder(
channel_multiplier, 8, None, 32, pad_type, act_layer, se_kwargs, channel_multiplier, 8, None, 32, pad_type, act_layer, se_kwargs,
norm_layer, norm_kwargs, drop_connect_rate, verbose=_DEBUG) norm_layer, norm_kwargs, drop_path_rate, verbose=_DEBUG)
self.blocks = nn.Sequential(*builder(self._in_chs, block_args)) self.blocks = nn.Sequential(*builder(self._in_chs, block_args))
self.feature_info = builder.features self.feature_info = builder.features
self._in_chs = builder.in_chs self._in_chs = builder.in_chs
@ -148,7 +148,7 @@ class MobileNetV3Features(nn.Module):
def __init__(self, block_args, out_indices=(0, 1, 2, 3, 4), feature_location='pre_pwl', def __init__(self, block_args, out_indices=(0, 1, 2, 3, 4), feature_location='pre_pwl',
in_chans=3, stem_size=16, channel_multiplier=1.0, output_stride=32, pad_type='', in_chans=3, stem_size=16, channel_multiplier=1.0, output_stride=32, pad_type='',
act_layer=nn.ReLU, drop_rate=0., drop_connect_rate=0., se_kwargs=None, act_layer=nn.ReLU, drop_rate=0., drop_path_rate=0., se_kwargs=None,
norm_layer=nn.BatchNorm2d, norm_kwargs=None): norm_layer=nn.BatchNorm2d, norm_kwargs=None):
super(MobileNetV3Features, self).__init__() super(MobileNetV3Features, self).__init__()
norm_kwargs = norm_kwargs or {} norm_kwargs = norm_kwargs or {}
@ -170,7 +170,7 @@ class MobileNetV3Features(nn.Module):
# Middle stages (IR/ER/DS Blocks) # Middle stages (IR/ER/DS Blocks)
builder = EfficientNetBuilder( builder = EfficientNetBuilder(
channel_multiplier, 8, None, output_stride, pad_type, act_layer, se_kwargs, channel_multiplier, 8, None, output_stride, pad_type, act_layer, se_kwargs,
norm_layer, norm_kwargs, drop_connect_rate, feature_location=feature_location, verbose=_DEBUG) norm_layer, norm_kwargs, drop_path_rate, feature_location=feature_location, verbose=_DEBUG)
self.blocks = nn.Sequential(*builder(self._in_chs, block_args)) self.blocks = nn.Sequential(*builder(self._in_chs, block_args))
self.feature_info = builder.features # builder provides info about feature channels for each block self.feature_info = builder.features # builder provides info about feature channels for each block
self._in_chs = builder.in_chs self._in_chs = builder.in_chs

@ -81,10 +81,14 @@ parser.add_argument('-b', '--batch-size', type=int, default=32, metavar='N',
help='input batch size for training (default: 32)') help='input batch size for training (default: 32)')
parser.add_argument('-vb', '--validation-batch-size-multiplier', type=int, default=1, metavar='N', parser.add_argument('-vb', '--validation-batch-size-multiplier', type=int, default=1, metavar='N',
help='ratio of validation batch size to training batch size (default: 1)') help='ratio of validation batch size to training batch size (default: 1)')
parser.add_argument('--drop', type=float, default=0.0, metavar='DROP', parser.add_argument('--drop', type=float, default=0.0, metavar='PCT',
help='Dropout rate (default: 0.)') help='Dropout rate (default: 0.)')
parser.add_argument('--drop-connect', type=float, default=0.0, metavar='DROP', parser.add_argument('--drop-connect', type=float, default=None, metavar='PCT',
help='Drop connect rate (default: 0.)') help='Drop connect rate, DEPRECATED, use drop-path (default: None)')
parser.add_argument('--drop-path', type=float, default=None, metavar='PCT',
help='Drop path rate (default: None)')
parser.add_argument('--drop-block', type=float, default=None, metavar='PCT',
help='Drop block rate (default: None)')
parser.add_argument('--jsd', action='store_true', default=False, parser.add_argument('--jsd', action='store_true', default=False,
help='Enable Jensen-Shannon Divergence + CE loss. Use with `--aug-splits`.') help='Enable Jensen-Shannon Divergence + CE loss. Use with `--aug-splits`.')
# Optimizer parameters # Optimizer parameters
@ -242,7 +246,9 @@ def main():
pretrained=args.pretrained, pretrained=args.pretrained,
num_classes=args.num_classes, num_classes=args.num_classes,
drop_rate=args.drop, drop_rate=args.drop,
drop_connect_rate=args.drop_connect, drop_connect_rate=args.drop_connect, # DEPRECATED, use drop_path
drop_path_rate=args.drop_path,
drop_block_rate=args.drop_block,
global_pool=args.gp, global_pool=args.gp,
bn_tf=args.bn_tf, bn_tf=args.bn_tf,
bn_momentum=args.bn_momentum, bn_momentum=args.bn_momentum,

Loading…
Cancel
Save