Bring EfficientNet SE layer in line with others, pull se_ratio outside of blocks. Allows swapping w/ other attn layers.

more_attn
Ross Wightman 4 years ago
parent 9611458e19
commit bcec14d3b5

@ -7,7 +7,7 @@ import torch
import torch.nn as nn import torch.nn as nn
from torch.nn import functional as F from torch.nn import functional as F
from .layers import create_conv2d, drop_path, make_divisible, get_act_fn, create_act_layer from .layers import create_conv2d, drop_path, make_divisible, create_act_layer
from .layers.activations import sigmoid from .layers.activations import sigmoid
__all__ = [ __all__ = [
@ -19,31 +19,32 @@ class SqueezeExcite(nn.Module):
Args: Args:
in_chs (int): input channels to layer in_chs (int): input channels to layer
se_ratio (float): ratio of squeeze reduction rd_ratio (float): ratio of squeeze reduction
act_layer (nn.Module): activation layer of containing block act_layer (nn.Module): activation layer of containing block
gate_fn (Callable): attention gate function gate_layer (Callable): attention gate function
force_act_layer (nn.Module): override block's activation fn if this is set/bound force_act_layer (nn.Module): override block's activation fn if this is set/bound
round_chs_fn (Callable): specify a fn to calculate rounding of reduced chs rd_round_fn (Callable): specify a fn to calculate rounding of reduced chs
""" """
def __init__( def __init__(
self, in_chs, se_ratio=0.25, act_layer=nn.ReLU, gate_fn=sigmoid, self, in_chs, rd_ratio=0.25, rd_channels=None, act_layer=nn.ReLU,
force_act_layer=None, round_chs_fn=None): gate_layer=nn.Sigmoid, force_act_layer=None, rd_round_fn=None):
super(SqueezeExcite, self).__init__() super(SqueezeExcite, self).__init__()
round_chs_fn = round_chs_fn or round if rd_channels is None:
reduced_chs = round_chs_fn(in_chs * se_ratio) rd_round_fn = rd_round_fn or round
rd_channels = rd_round_fn(in_chs * rd_ratio)
act_layer = force_act_layer or act_layer act_layer = force_act_layer or act_layer
self.conv_reduce = nn.Conv2d(in_chs, reduced_chs, 1, bias=True) self.conv_reduce = nn.Conv2d(in_chs, rd_channels, 1, bias=True)
self.act1 = create_act_layer(act_layer, inplace=True) self.act1 = create_act_layer(act_layer, inplace=True)
self.conv_expand = nn.Conv2d(reduced_chs, in_chs, 1, bias=True) self.conv_expand = nn.Conv2d(rd_channels, in_chs, 1, bias=True)
self.gate_fn = get_act_fn(gate_fn) self.gate = create_act_layer(gate_layer)
def forward(self, x): def forward(self, x):
x_se = x.mean((2, 3), keepdim=True) x_se = x.mean((2, 3), keepdim=True)
x_se = self.conv_reduce(x_se) x_se = self.conv_reduce(x_se)
x_se = self.act1(x_se) x_se = self.act1(x_se)
x_se = self.conv_expand(x_se) x_se = self.conv_expand(x_se)
return x * self.gate_fn(x_se) return x * self.gate(x_se)
class ConvBnAct(nn.Module): class ConvBnAct(nn.Module):
@ -85,10 +86,9 @@ class DepthwiseSeparableConv(nn.Module):
""" """
def __init__( def __init__(
self, in_chs, out_chs, dw_kernel_size=3, stride=1, dilation=1, pad_type='', self, in_chs, out_chs, dw_kernel_size=3, stride=1, dilation=1, pad_type='',
noskip=False, pw_kernel_size=1, pw_act=False, se_ratio=0., noskip=False, pw_kernel_size=1, pw_act=False, act_layer=nn.ReLU, norm_layer=nn.BatchNorm2d,
act_layer=nn.ReLU, norm_layer=nn.BatchNorm2d, se_layer=None, drop_path_rate=0.): se_layer=None, drop_path_rate=0.):
super(DepthwiseSeparableConv, self).__init__() super(DepthwiseSeparableConv, self).__init__()
has_se = se_layer is not None and se_ratio > 0.
self.has_residual = (stride == 1 and in_chs == out_chs) and not noskip self.has_residual = (stride == 1 and in_chs == out_chs) and not noskip
self.has_pw_act = pw_act # activation after point-wise conv self.has_pw_act = pw_act # activation after point-wise conv
self.drop_path_rate = drop_path_rate self.drop_path_rate = drop_path_rate
@ -99,7 +99,7 @@ class DepthwiseSeparableConv(nn.Module):
self.act1 = act_layer(inplace=True) self.act1 = act_layer(inplace=True)
# Squeeze-and-excitation # Squeeze-and-excitation
self.se = se_layer(in_chs, se_ratio=se_ratio, act_layer=act_layer) if has_se else nn.Identity() self.se = se_layer(in_chs, act_layer=act_layer) if se_layer else nn.Identity()
self.conv_pw = create_conv2d(in_chs, out_chs, pw_kernel_size, padding=pad_type) self.conv_pw = create_conv2d(in_chs, out_chs, pw_kernel_size, padding=pad_type)
self.bn2 = norm_layer(out_chs) self.bn2 = norm_layer(out_chs)
@ -144,12 +144,11 @@ class InvertedResidual(nn.Module):
def __init__( def __init__(
self, in_chs, out_chs, dw_kernel_size=3, stride=1, dilation=1, pad_type='', self, in_chs, out_chs, dw_kernel_size=3, stride=1, dilation=1, pad_type='',
noskip=False, exp_ratio=1.0, exp_kernel_size=1, pw_kernel_size=1, se_ratio=0., noskip=False, exp_ratio=1.0, exp_kernel_size=1, pw_kernel_size=1, act_layer=nn.ReLU,
act_layer=nn.ReLU, norm_layer=nn.BatchNorm2d, se_layer=None, conv_kwargs=None, drop_path_rate=0.): norm_layer=nn.BatchNorm2d, se_layer=None, conv_kwargs=None, drop_path_rate=0.):
super(InvertedResidual, self).__init__() super(InvertedResidual, self).__init__()
conv_kwargs = conv_kwargs or {} conv_kwargs = conv_kwargs or {}
mid_chs = make_divisible(in_chs * exp_ratio) mid_chs = make_divisible(in_chs * exp_ratio)
has_se = se_layer is not None and se_ratio > 0.
self.has_residual = (in_chs == out_chs and stride == 1) and not noskip self.has_residual = (in_chs == out_chs and stride == 1) and not noskip
self.drop_path_rate = drop_path_rate self.drop_path_rate = drop_path_rate
@ -166,7 +165,7 @@ class InvertedResidual(nn.Module):
self.act2 = act_layer(inplace=True) self.act2 = act_layer(inplace=True)
# Squeeze-and-excitation # Squeeze-and-excitation
self.se = se_layer(mid_chs, se_ratio=se_ratio, act_layer=act_layer) if has_se else nn.Identity() self.se = se_layer(mid_chs, act_layer=act_layer) if se_layer else nn.Identity()
# Point-wise linear projection # Point-wise linear projection
self.conv_pwl = create_conv2d(mid_chs, out_chs, pw_kernel_size, padding=pad_type, **conv_kwargs) self.conv_pwl = create_conv2d(mid_chs, out_chs, pw_kernel_size, padding=pad_type, **conv_kwargs)
@ -212,8 +211,8 @@ class CondConvResidual(InvertedResidual):
def __init__( def __init__(
self, in_chs, out_chs, dw_kernel_size=3, stride=1, dilation=1, pad_type='', self, in_chs, out_chs, dw_kernel_size=3, stride=1, dilation=1, pad_type='',
noskip=False, exp_ratio=1.0, exp_kernel_size=1, pw_kernel_size=1, se_ratio=0., noskip=False, exp_ratio=1.0, exp_kernel_size=1, pw_kernel_size=1, act_layer=nn.ReLU,
act_layer=nn.ReLU, norm_layer=nn.BatchNorm2d, se_layer=None, num_experts=0, drop_path_rate=0.): norm_layer=nn.BatchNorm2d, se_layer=None, num_experts=0, drop_path_rate=0.):
self.num_experts = num_experts self.num_experts = num_experts
conv_kwargs = dict(num_experts=self.num_experts) conv_kwargs = dict(num_experts=self.num_experts)
@ -221,8 +220,8 @@ class CondConvResidual(InvertedResidual):
super(CondConvResidual, self).__init__( super(CondConvResidual, self).__init__(
in_chs, out_chs, dw_kernel_size=dw_kernel_size, stride=stride, dilation=dilation, pad_type=pad_type, in_chs, out_chs, dw_kernel_size=dw_kernel_size, stride=stride, dilation=dilation, pad_type=pad_type,
act_layer=act_layer, noskip=noskip, exp_ratio=exp_ratio, exp_kernel_size=exp_kernel_size, act_layer=act_layer, noskip=noskip, exp_ratio=exp_ratio, exp_kernel_size=exp_kernel_size,
pw_kernel_size=pw_kernel_size, se_ratio=se_ratio, se_layer=se_layer, pw_kernel_size=pw_kernel_size, se_layer=se_layer, norm_layer=norm_layer, conv_kwargs=conv_kwargs,
norm_layer=norm_layer, conv_kwargs=conv_kwargs, drop_path_rate=drop_path_rate) drop_path_rate=drop_path_rate)
self.routing_fn = nn.Linear(in_chs, self.num_experts) self.routing_fn = nn.Linear(in_chs, self.num_experts)
@ -271,8 +270,8 @@ class EdgeResidual(nn.Module):
def __init__( def __init__(
self, in_chs, out_chs, exp_kernel_size=3, stride=1, dilation=1, pad_type='', self, in_chs, out_chs, exp_kernel_size=3, stride=1, dilation=1, pad_type='',
force_in_chs=0, noskip=False, exp_ratio=1.0, pw_kernel_size=1, se_ratio=0., force_in_chs=0, noskip=False, exp_ratio=1.0, pw_kernel_size=1, act_layer=nn.ReLU,
act_layer=nn.ReLU, norm_layer=nn.BatchNorm2d, se_layer=None, drop_path_rate=0.): norm_layer=nn.BatchNorm2d, se_layer=None, drop_path_rate=0.):
super(EdgeResidual, self).__init__() super(EdgeResidual, self).__init__()
if force_in_chs > 0: if force_in_chs > 0:
mid_chs = make_divisible(force_in_chs * exp_ratio) mid_chs = make_divisible(force_in_chs * exp_ratio)
@ -289,7 +288,7 @@ class EdgeResidual(nn.Module):
self.act1 = act_layer(inplace=True) self.act1 = act_layer(inplace=True)
# Squeeze-and-excitation # Squeeze-and-excitation
self.se = SqueezeExcite(mid_chs, se_ratio=se_ratio, act_layer=act_layer) if has_se else nn.Identity() self.se = se_layer(mid_chs, act_layer=act_layer) if se_layer else nn.Identity()
# Point-wise linear projection # Point-wise linear projection
self.conv_pwl = create_conv2d(mid_chs, out_chs, pw_kernel_size, padding=pad_type) self.conv_pwl = create_conv2d(mid_chs, out_chs, pw_kernel_size, padding=pad_type)

@ -10,11 +10,12 @@ import logging
import math import math
import re import re
from copy import deepcopy from copy import deepcopy
from functools import partial
import torch.nn as nn import torch.nn as nn
from .efficientnet_blocks import * from .efficientnet_blocks import *
from .layers import CondConv2d, get_condconv_initializer, get_act_layer, make_divisible from .layers import CondConv2d, get_condconv_initializer, get_act_layer, get_attn, make_divisible
__all__ = ["EfficientNetBuilder", "decode_arch_def", "efficientnet_init_weights", __all__ = ["EfficientNetBuilder", "decode_arch_def", "efficientnet_init_weights",
'resolve_bn_args', 'resolve_act_layer', 'round_channels', 'BN_MOMENTUM_TF_DEFAULT', 'BN_EPS_TF_DEFAULT'] 'resolve_bn_args', 'resolve_act_layer', 'round_channels', 'BN_MOMENTUM_TF_DEFAULT', 'BN_EPS_TF_DEFAULT']
@ -120,7 +121,9 @@ def _decode_block_str(block_str):
elif v == 'hs': elif v == 'hs':
value = get_act_layer('hard_swish') value = get_act_layer('hard_swish')
elif v == 'sw': elif v == 'sw':
value = get_act_layer('swish') value = get_act_layer('swish') # aka SiLU
elif v == 'mi':
value = get_act_layer('mish')
else: else:
continue continue
options[key] = value options[key] = value
@ -273,7 +276,12 @@ class EfficientNetBuilder:
self.se_from_exp = se_from_exp # calculate se channel reduction from expanded (mid) chs self.se_from_exp = se_from_exp # calculate se channel reduction from expanded (mid) chs
self.act_layer = act_layer self.act_layer = act_layer
self.norm_layer = norm_layer self.norm_layer = norm_layer
self.se_layer = se_layer self.se_layer = get_attn(se_layer)
try:
self.se_layer(8, rd_ratio=1.0)
self.se_has_ratio = True
except RuntimeError as e:
self.se_has_ratio = False
self.drop_path_rate = drop_path_rate self.drop_path_rate = drop_path_rate
if feature_location == 'depthwise': if feature_location == 'depthwise':
# old 'depthwise' mode renamed 'expansion' to match TF impl, old expansion mode didn't make sense # old 'depthwise' mode renamed 'expansion' to match TF impl, old expansion mode didn't make sense
@ -300,18 +308,21 @@ class EfficientNetBuilder:
ba['act_layer'] = ba['act_layer'] if ba['act_layer'] is not None else self.act_layer ba['act_layer'] = ba['act_layer'] if ba['act_layer'] is not None else self.act_layer
assert ba['act_layer'] is not None assert ba['act_layer'] is not None
ba['norm_layer'] = self.norm_layer ba['norm_layer'] = self.norm_layer
ba['drop_path_rate'] = drop_path_rate
if bt != 'cn': if bt != 'cn':
ba['se_layer'] = self.se_layer se_ratio = ba.pop('se_ratio')
if not self.se_from_exp and ba['se_ratio']: if se_ratio and self.se_layer is not None:
ba['se_ratio'] /= ba.get('exp_ratio', 1.0) if not self.se_from_exp:
ba['drop_path_rate'] = drop_path_rate # adjust se_ratio by expansion ratio if calculating se channels from block input
se_ratio /= ba.get('exp_ratio', 1.0)
if self.se_has_ratio:
ba['se_layer'] = partial(self.se_layer, rd_ratio=se_ratio)
else:
ba['se_layer'] = self.se_layer
if bt == 'ir': if bt == 'ir':
_log_info_if(' InvertedResidual {}, Args: {}'.format(block_idx, str(ba)), self.verbose) _log_info_if(' InvertedResidual {}, Args: {}'.format(block_idx, str(ba)), self.verbose)
if ba.get('num_experts', 0) > 0: block = CondConvResidual(**ba) if ba.get('num_experts', 0) else InvertedResidual(**ba)
block = CondConvResidual(**ba)
else:
block = InvertedResidual(**ba)
elif bt == 'ds' or bt == 'dsa': elif bt == 'ds' or bt == 'dsa':
_log_info_if(' DepthwiseSeparable {}, Args: {}'.format(block_idx, str(ba)), self.verbose) _log_info_if(' DepthwiseSeparable {}, Args: {}'.format(block_idx, str(ba)), self.verbose)
block = DepthwiseSeparableConv(**ba) block = DepthwiseSeparableConv(**ba)

@ -40,7 +40,7 @@ default_cfgs = {
} }
_SE_LAYER = partial(SqueezeExcite, gate_fn='hard_sigmoid', round_chs_fn=partial(make_divisible, divisor=4)) _SE_LAYER = partial(SqueezeExcite, gate_layer='hard_sigmoid', rd_round_fn=partial(make_divisible, divisor=4))
class GhostModule(nn.Module): class GhostModule(nn.Module):
@ -92,7 +92,7 @@ class GhostBottleneck(nn.Module):
self.bn_dw = None self.bn_dw = None
# Squeeze-and-excitation # Squeeze-and-excitation
self.se = _SE_LAYER(mid_chs, se_ratio=se_ratio) if has_se else None self.se = _SE_LAYER(mid_chs, rd_ratio=se_ratio) if has_se else None
# Point-wise linear projection # Point-wise linear projection
self.ghost2 = GhostModule(mid_chs, out_chs, relu=False) self.ghost2 = GhostModule(mid_chs, out_chs, relu=False)

@ -39,8 +39,7 @@ def _gen_hardcorenas(pretrained, variant, arch_def, **kwargs):
""" """
num_features = 1280 num_features = 1280
se_layer = partial( se_layer = partial(SqueezeExcite, gate_layer='hard_sigmoid', force_act_layer=nn.ReLU, rd_round_fn=round_channels)
SqueezeExcite, gate_fn=get_act_fn('hard_sigmoid'), force_act_layer=nn.ReLU, round_chs_fn=round_channels)
model_kwargs = dict( model_kwargs = dict(
block_args=decode_arch_def(arch_def), block_args=decode_arch_def(arch_def),
num_features=num_features, num_features=num_features,

@ -266,7 +266,7 @@ def _gen_mobilenet_v3_rw(variant, channel_multiplier=1.0, pretrained=False, **kw
round_chs_fn=partial(round_channels, multiplier=channel_multiplier), round_chs_fn=partial(round_channels, multiplier=channel_multiplier),
norm_layer=partial(nn.BatchNorm2d, **resolve_bn_args(kwargs)), norm_layer=partial(nn.BatchNorm2d, **resolve_bn_args(kwargs)),
act_layer=resolve_act_layer(kwargs, 'hard_swish'), act_layer=resolve_act_layer(kwargs, 'hard_swish'),
se_layer=partial(SqueezeExcite, gate_fn=get_act_fn('hard_sigmoid')), se_layer=partial(SqueezeExcite, gate_layer='hard_sigmoid'),
**kwargs, **kwargs,
) )
model = _create_mnv3(variant, pretrained, **model_kwargs) model = _create_mnv3(variant, pretrained, **model_kwargs)
@ -354,8 +354,7 @@ def _gen_mobilenet_v3(variant, channel_multiplier=1.0, pretrained=False, **kwarg
# stage 6, 7x7 in # stage 6, 7x7 in
['cn_r1_k1_s1_c960'], # hard-swish ['cn_r1_k1_s1_c960'], # hard-swish
] ]
se_layer = partial( se_layer = partial(SqueezeExcite, gate_layer='hard_sigmoid', force_act_layer=nn.ReLU, rd_round_fn=round_channels)
SqueezeExcite, gate_fn=get_act_fn('hard_sigmoid'), force_act_layer=nn.ReLU, round_chs_fn=round_channels)
model_kwargs = dict( model_kwargs = dict(
block_args=decode_arch_def(arch_def), block_args=decode_arch_def(arch_def),
num_features=num_features, num_features=num_features,
@ -372,67 +371,48 @@ def _gen_mobilenet_v3(variant, channel_multiplier=1.0, pretrained=False, **kwarg
def _gen_fbnetv3(variant, channel_multiplier=1.0, pretrained=False, **kwargs): def _gen_fbnetv3(variant, channel_multiplier=1.0, pretrained=False, **kwargs):
""" FBNetV3 """ FBNetV3
Paper: `FBNetV3: Joint Architecture-Recipe Search using Predictor Pretraining`
- https://arxiv.org/abs/2006.02049
FIXME untested, this is a preliminary impl of some FBNet-V3 variants. FIXME untested, this is a preliminary impl of some FBNet-V3 variants.
""" """
vl = variant.split('_')[-1] vl = variant.split('_')[-1]
if vl in ('a', 'b'): if vl in ('a', 'b'):
stem_size = 16 stem_size = 16
arch_def = [ arch_def = [
# stage 0, 112x112 in
['ds_r2_k3_s1_e1_c16'], ['ds_r2_k3_s1_e1_c16'],
# stage 1, 112x112 in
['ir_r1_k5_s2_e4_c24', 'ir_r3_k5_s1_e2_c24'], ['ir_r1_k5_s2_e4_c24', 'ir_r3_k5_s1_e2_c24'],
# stage 2, 56x56 in
['ir_r1_k5_s2_e5_c40_se0.25', 'ir_r4_k5_s1_e3_c40_se0.25'], ['ir_r1_k5_s2_e5_c40_se0.25', 'ir_r4_k5_s1_e3_c40_se0.25'],
# stage 3, 28x28 in
['ir_r1_k5_s2_e5_c72', 'ir_r4_k3_s1_e3_c72'], ['ir_r1_k5_s2_e5_c72', 'ir_r4_k3_s1_e3_c72'],
# stage 4, 14x14in
['ir_r1_k3_s1_e5_c120_se0.25', 'ir_r5_k5_s1_e3_c120_se0.25'], ['ir_r1_k3_s1_e5_c120_se0.25', 'ir_r5_k5_s1_e3_c120_se0.25'],
# stage 5, 14x14in
['ir_r1_k3_s2_e6_c184_se0.25', 'ir_r5_k5_s1_e4_c184_se0.25', 'ir_r1_k5_s1_e6_c224_se0.25'], ['ir_r1_k3_s2_e6_c184_se0.25', 'ir_r5_k5_s1_e4_c184_se0.25', 'ir_r1_k5_s1_e6_c224_se0.25'],
# stage 6, 7x7 in
['cn_r1_k1_s1_c1344'], ['cn_r1_k1_s1_c1344'],
] ]
elif vl == 'd': elif vl == 'd':
stem_size = 24 stem_size = 24
arch_def = [ arch_def = [
# stage 0, 112x112 in
['ds_r2_k3_s1_e1_c16'], ['ds_r2_k3_s1_e1_c16'],
# stage 1, 112x112 in
['ir_r1_k3_s2_e5_c24', 'ir_r5_k3_s1_e2_c24'], ['ir_r1_k3_s2_e5_c24', 'ir_r5_k3_s1_e2_c24'],
# stage 2, 56x56 in
['ir_r1_k5_s2_e4_c40_se0.25', 'ir_r4_k3_s1_e3_c40_se0.25'], ['ir_r1_k5_s2_e4_c40_se0.25', 'ir_r4_k3_s1_e3_c40_se0.25'],
# stage 3, 28x28 in
['ir_r1_k3_s2_e5_c72', 'ir_r4_k3_s1_e3_c72'], ['ir_r1_k3_s2_e5_c72', 'ir_r4_k3_s1_e3_c72'],
# stage 4, 14x14in
['ir_r1_k3_s1_e5_c128_se0.25', 'ir_r6_k5_s1_e3_c128_se0.25'], ['ir_r1_k3_s1_e5_c128_se0.25', 'ir_r6_k5_s1_e3_c128_se0.25'],
# stage 5, 14x14in
['ir_r1_k3_s2_e6_c208_se0.25', 'ir_r5_k5_s1_e5_c208_se0.25', 'ir_r1_k5_s1_e6_c240_se0.25'], ['ir_r1_k3_s2_e6_c208_se0.25', 'ir_r5_k5_s1_e5_c208_se0.25', 'ir_r1_k5_s1_e6_c240_se0.25'],
# stage 6, 7x7 in
['cn_r1_k1_s1_c1440'], ['cn_r1_k1_s1_c1440'],
] ]
elif vl == 'g': elif vl == 'g':
stem_size = 32 stem_size = 32
arch_def = [ arch_def = [
# stage 0, 112x112 in
['ds_r3_k3_s1_e1_c24'], ['ds_r3_k3_s1_e1_c24'],
# stage 1, 112x112 in
['ir_r1_k5_s2_e4_c40', 'ir_r4_k5_s1_e2_c40'], ['ir_r1_k5_s2_e4_c40', 'ir_r4_k5_s1_e2_c40'],
# stage 2, 56x56 in
['ir_r1_k5_s2_e4_c56_se0.25', 'ir_r4_k5_s1_e3_c56_se0.25'], ['ir_r1_k5_s2_e4_c56_se0.25', 'ir_r4_k5_s1_e3_c56_se0.25'],
# stage 3, 28x28 in
['ir_r1_k5_s2_e5_c104', 'ir_r4_k3_s1_e3_c104'], ['ir_r1_k5_s2_e5_c104', 'ir_r4_k3_s1_e3_c104'],
# stage 4, 14x14in
['ir_r1_k3_s1_e5_c160_se0.25', 'ir_r8_k5_s1_e3_c160_se0.25'], ['ir_r1_k3_s1_e5_c160_se0.25', 'ir_r8_k5_s1_e3_c160_se0.25'],
# stage 5, 14x14in
['ir_r1_k3_s2_e6_c264_se0.25', 'ir_r6_k5_s1_e5_c264_se0.25', 'ir_r2_k5_s1_e6_c288_se0.25'], ['ir_r1_k3_s2_e6_c264_se0.25', 'ir_r6_k5_s1_e5_c264_se0.25', 'ir_r2_k5_s1_e6_c288_se0.25'],
# stage 6, 7x7 in ['cn_r1_k1_s1_c1728'],
['cn_r1_k1_s1_c1728'], # hard-swish
] ]
else: else:
raise NotImplemented raise NotImplemented
round_chs_fn = partial(round_channels, multiplier=channel_multiplier, round_limit=0.95) round_chs_fn = partial(round_channels, multiplier=channel_multiplier, round_limit=0.95)
se_layer = partial(SqueezeExcite, gate_fn=get_act_fn('hard_sigmoid'), round_chs_fn=round_chs_fn) se_layer = partial(SqueezeExcite, gate_layer='hard_sigmoid', rd_round_fn=round_chs_fn)
act_layer = resolve_act_layer(kwargs, 'hard_swish') act_layer = resolve_act_layer(kwargs, 'hard_swish')
model_kwargs = dict( model_kwargs = dict(
block_args=decode_arch_def(arch_def), block_args=decode_arch_def(arch_def),

Loading…
Cancel
Save