add_softmax_layer

3 years ago · 50fd5be983
parent cbc75c808e
commit 50fd5be983
4 changed files with 2067 additions and 39 deletions
--- a/2011
+++ b/2011
--- a/timm/data/textdataset.py
+++ b/timm/data/textdataset.py
@ -39,14 +39,19 @@ class TextDataset(Dataset):
        All_Videos.sort()
        #print(All_Videos)
        VideoPath = os.path.join(self.path, All_Videos[idx//32])
+        #print(VideoPath)
        f = open(VideoPath, "r")
        feat = idx%32
        words = f.read().split()
        features = np.float32(words[feat * 4096:feat * 4096 + 4096])
        features = torch.tensor(features)
-        # features = torch.reshape(features, (16, 256))
+        #print(features.shape)
+        if len(features) == 0:
+            print(idx)
+            print(VideoPath)
+        features = torch.reshape(features, (16, 256))
        # features = torch.reshape(features, (196, 768))
-        features = torch.reshape(features, (1, 4096))
+        #features = torch.reshape(features, (1, 4096))
        #print(VideoPath)
        if VideoPath.find('Normal') == -1:
            label = 0
@ -54,9 +59,9 @@ class TextDataset(Dataset):
            label = 1

        label = torch.tensor(label)
-        print(features.shape)
+        #print(features.shape)
        #print(features)
-        print(label.shape)
+        #print(label.shape)
        #print(label)

        return features, label
--- a/timm/models/mlp_mixer.py
+++ b/timm/models/mlp_mixer.py
@ -63,7 +63,7 @@ def _cfg(url='', **kwargs):


 default_cfgs = dict(
-    mixer_s32_224=_cfg(),
+    mixer_s32_224=_cfg(num_classes=2),
    mixer_s16_224=_cfg(),
    mixer_b32_224=_cfg(),
    mixer_b16_224=_cfg(
@ -264,12 +264,13 @@ class MlpMixer(nn.Module):
        super().__init__()
        self.num_classes = num_classes
        self.num_features = self.embed_dim = embed_dim  # num_features for consistency with other models
-        self.initial_fc =nn.Linear(4096, 150528)
-        self.stem = PatchEmbed(
-            img_size=img_size, patch_size=patch_size, in_chans=in_chans,
-            embed_dim=embed_dim, norm_layer=norm_layer if stem_norm else None)
+        
+        ##initial_fc and stem not needed
+        #self.initial_fc =nn.Linear(4096, 150528)
+        #self.stem = PatchEmbed(
+        #    img_size=img_size, patch_size=patch_size, in_chans=in_chans,
+        #    embed_dim=embed_dim, norm_layer=norm_layer if stem_norm else None)
        # FIXME drop_path (stochastic depth scaling rule or all the same?)
-        #embed_dim=256
        #print("num_classes:",self.num_classes, "embed_dim:", embed_dim)
        self.blocks = nn.Sequential(*[
            block_layer(
@ -286,23 +287,24 @@ class MlpMixer(nn.Module):
            for _ in range(num_blocks)])
        """
        self.norm = norm_layer(embed_dim)
-        # self.head = nn.Linear(embed_dim, self.num_classes) if num_classes > 0 else nn.Identity()
-        self.head = nn.Sequential(
-            nn.Linear(embed_dim, self.num_classes),
-            nn.ReLU(),
-            nn.Dropout(p=0.3),
-            nn.Linear(self.num_classes, 1024),
-            nn.ReLU(),
-            nn.Dropout(p=0.3),
-            nn.Linear(1024, 512),
-            nn.ReLU(),
-            nn.Dropout(p=0.3),
-            nn.Linear(512, 256),
-            nn.ReLU(),
-            nn.Dropout(p=0.3),
-            nn.Linear(256, 2)
-        )
-        self.sigmoid = nn.Sigmoid()
+        self.head = nn.Linear(embed_dim, self.num_classes) if num_classes > 0 else nn.Identity()
+        # self.head = nn.Sequential(
+        #     nn.Linear(embed_dim, self.num_classes),
+        #     nn.ReLU(),
+        #     nn.Dropout(p=0.3),
+        #     nn.Linear(self.num_classes, 1024),
+        #     nn.ReLU(),
+        #     nn.Dropout(p=0.3),
+        #     nn.Linear(1024, 512),
+        #     nn.ReLU(),
+        #     nn.Dropout(p=0.3),
+        #     nn.Linear(512, 256),
+        #     nn.ReLU(),
+        #     nn.Dropout(p=0.3),
+        #     nn.Linear(256, 2)
+        # )
+        #self.sigmoid = nn.Sigmoid()
+        self.sm = nn.Softmax(dim=1)
        self.init_weights(nlhb=nlhb)

    def init_weights(self, nlhb=False):
@ -318,23 +320,24 @@ class MlpMixer(nn.Module):

    def forward_features(self, x):
        #x = self.stem(x)
-        print("In_Model")
+        #print("In_Model")
        x = self.blocks(x)
-        print(x)
+        #print(x)
        x = self.norm(x)
-        print(x)
+        #print(x)
        x = x.mean(dim=1)
-        print(x)
+        #print(x)
        return x

    def forward(self, x):
-        x = self.initial_fc(x)
-        x = torch.reshape(x, (196, 768))
+        #x = self.initial_fc(x)
+        #x = torch.reshape(x, (196, 768))
        x = self.forward_features(x)
        x = self.head(x)
-        print(x)
-        x = self.sigmoid(x)
-        print(x)
+        #print(x)
+        #x = self.sigmoid(x)
+        #print(x)
+        x = self.sm(x)
        return x


@ -413,7 +416,8 @@ def mixer_s32_224(pretrained=False, **kwargs):
    """ Mixer-S/32 224x224
    Paper: 'MLP-Mixer: An all-MLP Architecture for Vision' - https://arxiv.org/abs/2105.01601
    """
-    model_args = dict(patch_size=32, num_blocks=8, embed_dim=512, **kwargs)
+    #model_args = dict(patch_size=32, num_blocks=8, embed_dim=512, **kwargs)
+    model_args = dict(patch_size=16, num_blocks=8, embed_dim=256, **kwargs)
    model = _create_mixer('mixer_s32_224', pretrained=pretrained, **model_args)
    return model

--- a/train.py
+++ b/train.py
@ -679,10 +679,11 @@ def train_one_epoch(
    data_time_m = AverageMeter()
    losses_m = AverageMeter()

+    print("------Training-------")
    model.train()

    end = time.time()
-    print("loader_length=",len(loader))
+    #print("loader_length=",len(loader))
    last_idx = len(loader) - 1
    num_updates = epoch * len(loader)
    for batch_idx, (input, target) in enumerate(loader):
@ -698,13 +699,15 @@ def train_one_epoch(

        with amp_autocast():
            #print(model)
+            print(input.shape)
            output = model(input)
            print(output.shape)
            print(target.shape)
            #print(output)
            #print(target)
-            #print(loss_fn)
+            print(loss_fn)
            loss = loss_fn(output, target)
+            print("loss=", loss)

        if not args.distributed:
            losses_m.update(loss.item(), input.size(0))
@ -785,6 +788,7 @@ def validate(model, loader, loss_fn, args, amp_autocast=suppress, log_suffix='')
    top1_m = AverageMeter()
    top5_m = AverageMeter()

+    print("------Validating-------")
    model.eval()

    end = time.time()
@ -809,8 +813,12 @@ def validate(model, loader, loss_fn, args, amp_autocast=suppress, log_suffix='')
                output = output.unfold(0, reduce_factor, reduce_factor).mean(dim=2)
                target = target[0:target.size(0):reduce_factor]

+            print("output =", output)
+            print("target=", target)
            loss = loss_fn(output, target)
+            print("eval_loss=", loss)
            acc1, acc5 = accuracy(output, target, topk=(1, 5))
+            print("eval_acc1=", acc1)

            if args.distributed:
                reduced_loss = reduce_tensor(loss.data, args.world_size)