diff --git a/timm/data/textdataset.py b/timm/data/textdataset.py index 895b7c2d..a1861d2d 100644 --- a/timm/data/textdataset.py +++ b/timm/data/textdataset.py @@ -24,9 +24,9 @@ class TextDataset(Dataset): # Go to file idx//32 # Get label(1x1) based on file name # Get vector(1x4096) at idx%32 in the file - #return a tensor x*y (x*y = 4096) and target tensor (1,) //Use x,y =64 - + #return a tensor x*y (x*y = 4096) and target tensor (1,) //Use x,y = 16,256 + #print("idx=", idx) def listdir_nohidden(AllVideos_Path): # To ignore hidden files file_dir_extension = os.path.join(AllVideos_Path, '*.txt') for f in glob.glob(file_dir_extension): @@ -44,17 +44,19 @@ class TextDataset(Dataset): words = f.read().split() features = np.float32(words[feat * 4096:feat * 4096 + 4096]) features = torch.tensor(features) - features = torch.reshape(features, (16, 256)) - print(VideoPath) + # features = torch.reshape(features, (16, 256)) + # features = torch.reshape(features, (196, 768)) + features = torch.reshape(features, (1, 4096)) + #print(VideoPath) if VideoPath.find('Normal') == -1: label = 0 else: label = 1 label = torch.tensor(label) - #print(features.shape) + print(features.shape) #print(features) - #print(label.shape) - print(label) + print(label.shape) + #print(label) return features, label diff --git a/timm/models/helpers.py b/timm/models/helpers.py index 880fcc63..0e3d304f 100644 --- a/timm/models/helpers.py +++ b/timm/models/helpers.py @@ -190,6 +190,15 @@ def load_pretrained(model, default_cfg=None, num_classes=1000, in_chans=3, filte elif hf_hub_id and has_hf_hub(necessary=True): _logger.info(f'Loading pretrained weights from Hugging Face hub ({hf_hub_id})') state_dict = load_state_dict_from_hf(hf_hub_id) + print("pretrain state_dict:") + print(type(state_dict)) + print(len(state_dict)) + for key in list(state_dict.keys()): + if key.startswith('stem'): + del state_dict[key] + for param_tensor in state_dict: + print(param_tensor, "\t", state_dict[param_tensor].size()) + if filter_fn is not None: # for backwards compat with filter fn that take one arg, try one first, the two try: @@ -232,7 +241,9 @@ def load_pretrained(model, default_cfg=None, num_classes=1000, in_chans=3, filte classifier_bias = state_dict[classifier_name + '.bias'] state_dict[classifier_name + '.bias'] = classifier_bias[label_offset:] - model.load_state_dict(state_dict, strict=strict) + #print(state_dict.shape) + #model.load_state_dict(state_dict, strict=strict) + model.load_state_dict(state_dict, strict=False) def extract_layer(model, layer): @@ -462,6 +473,7 @@ def build_model_with_cfg( if pretrained_custom_load: load_custom_pretrained(model) else: + print("num_classes_pretrained=",num_classes_pretrained) load_pretrained( model, num_classes=num_classes_pretrained, diff --git a/timm/models/mlp_mixer.py b/timm/models/mlp_mixer.py index c008bfb5..0444c59a 100644 --- a/timm/models/mlp_mixer.py +++ b/timm/models/mlp_mixer.py @@ -85,7 +85,7 @@ default_cfgs = dict( # Mixer ImageNet-21K-P pretraining mixer_b16_224_miil_in21k=_cfg( url='https://miil-public-eu.oss-eu-central-1.aliyuncs.com/model-zoo/ImageNet_21K_P/models/timm/mixer_b16_224_miil_in21k.pth', - mean=(0, 0, 0), std=(1, 1, 1), crop_pct=0.875, interpolation='bilinear', num_classes=1, #11221 + mean=(0, 0, 0), std=(1, 1, 1), crop_pct=0.875, interpolation='bilinear', num_classes=11221, ), mixer_b16_224_miil=_cfg( url='https://miil-public-eu.oss-eu-central-1.aliyuncs.com/model-zoo/ImageNet_21K_P/models/timm/mixer_b16_224_miil.pth', @@ -264,7 +264,7 @@ class MlpMixer(nn.Module): super().__init__() self.num_classes = num_classes self.num_features = self.embed_dim = embed_dim # num_features for consistency with other models - + self.initial_fc =nn.Linear(4096, 150528) self.stem = PatchEmbed( img_size=img_size, patch_size=patch_size, in_chans=in_chans, embed_dim=embed_dim, norm_layer=norm_layer if stem_norm else None) @@ -274,7 +274,7 @@ class MlpMixer(nn.Module): self.blocks = nn.Sequential(*[ block_layer( embed_dim - , 16 #self.stem.num_patches + ,16 #196 #self.stem.num_patches , mlp_ratio, mlp_layer=mlp_layer, norm_layer=norm_layer, act_layer=act_layer, drop=drop_rate, drop_path=drop_path_rate) for _ in range(num_blocks)]) @@ -286,24 +286,24 @@ class MlpMixer(nn.Module): for _ in range(num_blocks)]) """ self.norm = norm_layer(embed_dim) - self.head = nn.Linear(embed_dim, self.num_classes) if num_classes > 0 else nn.Identity() - #self.head = nn.Sequential( - # nn.Linear(embed_dim, self.num_classes), - # nn.ReLU(), - # nn.Dropout(p=0.3), - # nn.Linear(self.num_classes, 1024), - # nn.ReLU(), - # nn.Dropout(p=0.3), - # nn.Linear(1024, 512), - # nn.ReLU(), - # nn.Dropout(p=0.3), - # nn.Linear(512, 256), - # nn.ReLU(), - # nn.Dropout(p=0.3), - # nn.Linear(256, 1) - # ) + # self.head = nn.Linear(embed_dim, self.num_classes) if num_classes > 0 else nn.Identity() + self.head = nn.Sequential( + nn.Linear(embed_dim, self.num_classes), + nn.ReLU(), + nn.Dropout(p=0.3), + nn.Linear(self.num_classes, 1024), + nn.ReLU(), + nn.Dropout(p=0.3), + nn.Linear(1024, 512), + nn.ReLU(), + nn.Dropout(p=0.3), + nn.Linear(512, 256), + nn.ReLU(), + nn.Dropout(p=0.3), + nn.Linear(256, 2) + ) self.sigmoid = nn.Sigmoid() - #self.init_weights(nlhb=nlhb) + self.init_weights(nlhb=nlhb) def init_weights(self, nlhb=False): head_bias = -math.log(self.num_classes) if nlhb else 0. @@ -318,7 +318,6 @@ class MlpMixer(nn.Module): def forward_features(self, x): #x = self.stem(x) - #print(x.shape) print("In_Model") x = self.blocks(x) print(x) @@ -329,6 +328,8 @@ class MlpMixer(nn.Module): return x def forward(self, x): + x = self.initial_fc(x) + x = torch.reshape(x, (196, 768)) x = self.forward_features(x) x = self.head(x) print(x) @@ -384,7 +385,11 @@ def checkpoint_filter_fn(state_dict, model): if k.endswith('.alpha') or k.endswith('.beta'): v = v.reshape(1, 1, -1) out_dict[k] = v + #print("checkpoint_filter_out_dict") + #print(out_dict) return out_dict + #print("checkpoint_filter_state_dict") + #print(state_dict) return state_dict @@ -392,6 +397,9 @@ def _create_mixer(variant, pretrained=False, **kwargs): if kwargs.get('features_only', None): raise RuntimeError('features_only not implemented for MLP-Mixer models.') + print("_create_mixer") + print("Pretrained=",pretrained) + print("default_Cfgs=", default_cfgs[variant]) model = build_model_with_cfg( MlpMixer, variant, pretrained, default_cfg=default_cfgs[variant], @@ -495,7 +503,8 @@ def mixer_b16_224_miil_in21k(pretrained=False, **kwargs): """ Mixer-B/16 224x224. ImageNet-1k pretrained weights. Weights taken from: https://github.com/Alibaba-MIIL/ImageNet21K """ - model_args = dict(patch_size=16, num_blocks=12, embed_dim=256, **kwargs) + model_args = dict(patch_size=16, num_blocks=12, embed_dim=768, **kwargs) + #model_args = dict(patch_size=16, num_blocks=12, embed_dim=256, **kwargs) model = _create_mixer('mixer_b16_224_miil_in21k', pretrained=pretrained, **model_args) return model