Supports Read from c3d_embeddings

3 years ago · 6c376d8139
parent fa754db940
commit 6c376d8139
12 changed files with 125 additions and 5 deletions
--- a/avg_checkpoints.py
+++ b/avg_checkpoints.py
--- a/benchmark.py
+++ b/benchmark.py
--- a/clean_checkpoint.py
+++ b/clean_checkpoint.py
--- a/distributed_train.sh
+++ b/distributed_train.sh
--- a/inference.py
+++ b/inference.py
--- a/timm/data/custom_dataset.py
+++ b/timm/data/custom_dataset.py
@ -0,0 +1,40 @@
+import glob
+import cv2
+import numpy as np
+import torch
+from torch.utils.data import Dataset, DataLoader
+
+
+class CustomDataset(Dataset):
+	def __init__(self):
+		self.imgs_path = "Dog_Cat_Dataset/"
+		file_list = glob.glob(self.imgs_path + "*")
+		print(file_list)
+		self.data = []
+		for class_path in file_list:
+			class_name = class_path.split("/")[-1]
+			for img_path in glob.glob(class_path + "/*.jpeg"):
+				self.data.append([img_path, class_name])
+		print(self.data)
+		self.class_map = {"dogs" : 0, "cats": 1}
+		self.img_dim = (416, 416)
+	
+	def __len__(self):
+		return len(self.data)
+
+	def __getitem__(self, idx):
+		img_path, class_name = self.data[idx]
+		img = cv2.imread(img_path)
+		img = cv2.resize(img, self.img_dim)
+		class_id = self.class_map[class_name]
+		img_tensor = torch.from_numpy(img)
+		img_tensor = img_tensor.permute(2, 0, 1)
+		class_id = torch.tensor([class_id])
+		return img_tensor, class_id
+
+if __name__ == "__main__":
+	dataset = CustomDataset()		
+	data_loader = DataLoader(dataset, batch_size=4, shuffle=True)
+	for imgs, labels in data_loader:
+		print("Batch of images has shape: ",imgs.shape)
+		print("Batch of labels has shape: ", labels.shape)
--- a/timm/data/dataset_factory.py
+++ b/timm/data/dataset_factory.py
@ -17,6 +17,7 @@ except ImportError:
    has_inaturalist = False

 from .dataset import IterableImageDataset, ImageDataset
+from .textdataset import TextDataset

 _TORCH_BASIC_DS = dict(
    cifar10=CIFAR10,
@ -134,6 +135,11 @@ def create_dataset(
        ds = IterableImageDataset(
            root, parser=name, split=split, is_training=is_training,
            download=download, batch_size=batch_size, repeats=repeats, **kwargs)
+    elif name == 'embeddings':
+        if search_split and os.path.isdir(root):
+            # look for split specific sub-folder in root
+            root = _search_split(root, split)
+        ds = TextDataset(root, split)
    else:
        # FIXME support more advance split cfg for ImageFolder/Tar datasets in the future
        if search_split and os.path.isdir(root):
--- a/timm/data/loader.py
+++ b/timm/data/loader.py
@ -225,6 +225,7 @@ def create_loader(
    if use_multi_epochs_loader:
        loader_class = MultiEpochsDataLoader

+    print(loader_class)
    loader_args = dict(
        batch_size=batch_size,
        shuffle=not isinstance(dataset, torch.utils.data.IterableDataset) and sampler is None and is_training,
@ -257,6 +258,7 @@ def create_loader(
    return loader
    

+
 class MultiEpochsDataLoader(torch.utils.data.DataLoader):

    def __init__(self, *args, **kwargs):
--- a/timm/data/textdataset.py
+++ b/timm/data/textdataset.py
@ -0,0 +1,60 @@
+from torch.utils.data import Dataset, DataLoader
+import os
+import torch
+import glob
+import numpy as np
+
+
+class TextDataset(Dataset):
+    def __init__(self, dir_path, split):
+        self.path = dir_path
+        self.split = split
+
+    def __len__(self):
+        count = 0
+        for root_dir, cur_dir, files in os.walk(self.path):
+            count += len(files)
+        #print('file count:', count)
+        count = count*32
+        return count
+
+    def __getitem__(self, idx):
+        # index sequentially as per file list
+
+        # Go to file idx//32
+        # Get label(1x1) based on file name
+        # Get vector(1x4096) at idx%32 in the file
+        #return a tensor x*y (x*y = 4096) and target tensor (1,) //Use x,y =64
+
+
+        def listdir_nohidden(AllVideos_Path):  # To ignore hidden files
+            file_dir_extension = os.path.join(AllVideos_Path, '*.txt')
+            for f in glob.glob(file_dir_extension):
+                if not f.startswith('.'):
+                    yield os.path.basename(f)
+
+        All_Videos = sorted(listdir_nohidden(self.path))
+        #print(self.path)
+        #print(len(All_Videos))
+        All_Videos.sort()
+        #print(All_Videos)
+        VideoPath = os.path.join(self.path, All_Videos[idx//32])
+        f = open(VideoPath, "r")
+        feat = idx%32
+        words = f.read().split()
+        features = np.float32(words[feat * 4096:feat * 4096 + 4096])
+        features = torch.tensor(features)
+        features = torch.reshape(features, (16, 256))
+        print(VideoPath)
+        if VideoPath.find('Normal') == -1:
+            label = 0
+        else:
+            label = 1
+
+        label = torch.tensor(label)
+        #print(features.shape)
+        #print(features)
+        #print(label.shape)
+        print(label)
+
+        return features, label
--- a/timm/models/mlp_mixer.py
+++ b/timm/models/mlp_mixer.py
@ -85,7 +85,7 @@ default_cfgs = dict(
    # Mixer ImageNet-21K-P pretraining
    mixer_b16_224_miil_in21k=_cfg(
        url='https://miil-public-eu.oss-eu-central-1.aliyuncs.com/model-zoo/ImageNet_21K_P/models/timm/mixer_b16_224_miil_in21k.pth',
-        mean=(0, 0, 0), std=(1, 1, 1), crop_pct=0.875, interpolation='bilinear', num_classes=11221,
+        mean=(0, 0, 0), std=(1, 1, 1), crop_pct=0.875, interpolation='bilinear', num_classes=2,
    ),
    mixer_b16_224_miil=_cfg(
        url='https://miil-public-eu.oss-eu-central-1.aliyuncs.com/model-zoo/ImageNet_21K_P/models/timm/mixer_b16_224_miil.pth',
@ -269,11 +269,22 @@ class MlpMixer(nn.Module):
            img_size=img_size, patch_size=patch_size, in_chans=in_chans,
            embed_dim=embed_dim, norm_layer=norm_layer if stem_norm else None)
        # FIXME drop_path (stochastic depth scaling rule or all the same?)
+        #embed_dim=256
+        #print("num_classes:",self.num_classes, "embed_dim:", embed_dim)
+        self.blocks = nn.Sequential(*[
+            block_layer(
+                embed_dim
+                , 16 #self.stem.num_patches
+                , mlp_ratio, mlp_layer=mlp_layer, norm_layer=norm_layer,
+                act_layer=act_layer, drop=drop_rate, drop_path=drop_path_rate)
+            for _ in range(num_blocks)])
+        """
        self.blocks = nn.Sequential(*[
            block_layer(
                embed_dim, self.stem.num_patches, mlp_ratio, mlp_layer=mlp_layer, norm_layer=norm_layer,
                act_layer=act_layer, drop=drop_rate, drop_path=drop_path_rate)
            for _ in range(num_blocks)])
+        """
        self.norm = norm_layer(embed_dim)
        self.head = nn.Linear(embed_dim, self.num_classes) if num_classes > 0 else nn.Identity()
        
@ -291,7 +302,8 @@ class MlpMixer(nn.Module):
        self.head = nn.Linear(self.embed_dim, num_classes) if num_classes > 0 else nn.Identity()

    def forward_features(self, x):
-        x = self.stem(x)
+        #x = self.stem(x)
+        #print(x.shape)
        x = self.blocks(x)
        x = self.norm(x)
        x = x.mean(dim=1)
@ -461,7 +473,7 @@ def mixer_b16_224_miil_in21k(pretrained=False, **kwargs):
    """ Mixer-B/16 224x224. ImageNet-1k pretrained weights.
    Weights taken from: https://github.com/Alibaba-MIIL/ImageNet21K
    """
-    model_args = dict(patch_size=16, num_blocks=12, embed_dim=768, **kwargs)
+    model_args = dict(patch_size=16, num_blocks=12, embed_dim=256, **kwargs)
    model = _create_mixer('mixer_b16_224_miil_in21k', pretrained=pretrained, **model_args)
    return model

--- a/train.py
+++ b/train.py
--- a/validate.py
+++ b/validate.py