From 450b0d57f012f5525829e4043db5542c09c257c4 Mon Sep 17 00:00:00 2001 From: kira7005 Date: Thu, 21 Apr 2022 17:06:32 +0000 Subject: [PATCH] model_freeze_and_pretraining --- timm/data/textdataset.py | 4 ++-- timm/models/mlp_mixer.py | 42 ++++++++++++++++++++++------------------ train.py | 4 ++++ 3 files changed, 29 insertions(+), 21 deletions(-) diff --git a/timm/data/textdataset.py b/timm/data/textdataset.py index 30f11309..8caa5c8d 100644 --- a/timm/data/textdataset.py +++ b/timm/data/textdataset.py @@ -49,9 +49,9 @@ class TextDataset(Dataset): if len(features) == 0: print(idx) print(VideoPath) - features = torch.reshape(features, (16, 256)) + #features = torch.reshape(features, (16, 256)) # features = torch.reshape(features, (196, 768)) - #features = torch.reshape(features, (1, 4096)) + features = torch.reshape(features, (1, 4096)) #print(VideoPath) if VideoPath.find('Normal') == -1: label = 0 diff --git a/timm/models/mlp_mixer.py b/timm/models/mlp_mixer.py index 5632a537..98767036 100644 --- a/timm/models/mlp_mixer.py +++ b/timm/models/mlp_mixer.py @@ -266,16 +266,18 @@ class MlpMixer(nn.Module): self.num_features = self.embed_dim = embed_dim # num_features for consistency with other models ##initial_fc and stem not needed - #self.initial_fc =nn.Linear(4096, 150528) + self.initial_fc =nn.Linear(4096, 150528) + #self.stem = PatchEmbed( # img_size=img_size, patch_size=patch_size, in_chans=in_chans, # embed_dim=embed_dim, norm_layer=norm_layer if stem_norm else None) # FIXME drop_path (stochastic depth scaling rule or all the same?) #print("num_classes:",self.num_classes, "embed_dim:", embed_dim) + self.blocks = nn.Sequential(*[ block_layer( embed_dim - ,16 #196 #self.stem.num_patches + ,196 #16 #self.stem.num_patches , mlp_ratio, mlp_layer=mlp_layer, norm_layer=norm_layer, act_layer=act_layer, drop=drop_rate, drop_path=drop_path_rate) for _ in range(num_blocks)]) @@ -288,21 +290,21 @@ class MlpMixer(nn.Module): """ self.norm = norm_layer(embed_dim) self.head = nn.Linear(embed_dim, self.num_classes) if num_classes > 0 else nn.Identity() - # self.head = nn.Sequential( - # nn.Linear(embed_dim, self.num_classes), - # nn.ReLU(), - # nn.Dropout(p=0.3), - # nn.Linear(self.num_classes, 1024), - # nn.ReLU(), - # nn.Dropout(p=0.3), - # nn.Linear(1024, 512), - # nn.ReLU(), - # nn.Dropout(p=0.3), - # nn.Linear(512, 256), - # nn.ReLU(), - # nn.Dropout(p=0.3), - # nn.Linear(256, 2) - # ) + self.final_head = nn.Sequential( + # nn.Linear(embed_dim, self.num_classes), + # nn.ReLU(), + # nn.Dropout(p=0.3), + nn.Linear(self.num_classes, 1024), + nn.ReLU(), + nn.Dropout(p=0.3), + nn.Linear(1024, 512), + nn.ReLU(), + nn.Dropout(p=0.3), + nn.Linear(512, 256), + nn.ReLU(), + nn.Dropout(p=0.3), + nn.Linear(256, 2) + ) #self.sigmoid = nn.Sigmoid() self.sm = nn.Softmax(dim=1) self.init_weights(nlhb=nlhb) @@ -330,10 +332,12 @@ class MlpMixer(nn.Module): return x def forward(self, x): - #x = self.initial_fc(x) - #x = torch.reshape(x, (196, 768)) + x = self.initial_fc(x) + x = nn.ReLU(x) + x = torch.reshape(x, (196, 768)) x = self.forward_features(x) x = self.head(x) + x = self.final_head(x) #print(x) #x = self.sigmoid(x) #print(x) diff --git a/train.py b/train.py index c9116938..edf8f7d2 100644 --- a/train.py +++ b/train.py @@ -668,6 +668,10 @@ def train_one_epoch( lr_scheduler=None, saver=None, output_dir=None, amp_autocast=suppress, loss_scaler=None, model_ema=None, mixup_fn=None): + for param in model.forward_features.parameters(): + param.requires_grad=False + + print(model) if args.mixup_off_epoch and epoch >= args.mixup_off_epoch: if args.prefetcher and loader.mixup_enabled: loader.mixup_enabled = False