diff --git a/tests/test_models.py b/tests/test_models.py index a62625d9..f406555a 100644 --- a/tests/test_models.py +++ b/tests/test_models.py @@ -15,7 +15,7 @@ if hasattr(torch._C, '_jit_set_profiling_executor'): if 'GITHUB_ACTIONS' in os.environ: # and 'Linux' in platform.system(): # GitHub Linux runner is slower and hits memory limits sooner than MacOS, exclude bigger models - EXCLUDE_FILTERS = ['*efficientnet_l2*', '*resnext101_32x48d', 'vit_*'] + EXCLUDE_FILTERS = ['*efficientnet_l2*', '*resnext101_32x48d', 'vit_*', '*in21k', '*152x4_bitm'] else: EXCLUDE_FILTERS = ['vit_*'] MAX_FWD_SIZE = 384 diff --git a/timm/models/resnetv2.py b/timm/models/resnetv2.py index 3ce0605a..731f5dca 100644 --- a/timm/models/resnetv2.py +++ b/timm/models/resnetv2.py @@ -331,7 +331,7 @@ def create_stem(in_chs, out_chs, stem_type='', preact=True, conv_layer=None, nor if 'fixed' in stem_type: # 'fixed' SAME padding approximation that is used in BiT models - stem['pad'] = nn.ConstantPad2d(1, 0) + stem['pad'] = nn.ConstantPad2d(1, 0.) stem['pool'] = nn.MaxPool2d(kernel_size=3, stride=2, padding=0) elif 'same' in stem_type: # full, input size based 'SAME' padding, used in ViT Hybrid model @@ -421,7 +421,12 @@ class ResNetV2(nn.Module): import numpy as np weights = np.load(checkpoint_path) with torch.no_grad(): - self.stem.conv.weight.copy_(tf2th(weights[f'{prefix}root_block/standardized_conv2d/kernel'])) + stem_conv_w = tf2th(weights[f'{prefix}root_block/standardized_conv2d/kernel']) + if self.stem.conv.weight.shape[1] == 1: + self.stem.conv.weight.copy_(stem_conv_w.sum(dim=1, keepdim=True)) + # FIXME handle > 3 in_chans? + else: + self.stem.conv.weight.copy_(stem_conv_w) self.norm.weight.copy_(tf2th(weights[f'{prefix}group_norm/gamma'])) self.norm.bias.copy_(tf2th(weights[f'{prefix}group_norm/beta'])) self.head.fc.weight.copy_(tf2th(weights[f'{prefix}head/conv2d/kernel']))