Supports Read from c3d_embeddings

3 years ago · 6c376d8139
parent fa754db940
commit 6c376d8139
12 changed files with 125 additions and 5 deletions
--- a/avg_checkpoints.py
+++ b/avg_checkpoints.py
--- a/benchmark.py
+++ b/benchmark.py
--- a/clean_checkpoint.py
+++ b/clean_checkpoint.py
--- a/distributed_train.sh
+++ b/distributed_train.sh
--- a/inference.py
+++ b/inference.py
--- a/timm/data/custom_dataset.py
+++ b/timm/data/custom_dataset.py
@ -0,0 +1,40 @@
 import glob
 import cv2
 import numpy as np
 import torch
 from torch.utils.data import Dataset, DataLoader
 class CustomDataset(Dataset):
 	def __init__(self):
 		self.imgs_path = "Dog_Cat_Dataset/"
 		file_list = glob.glob(self.imgs_path + "*")
 		print(file_list)
 		self.data = []
 		for class_path in file_list:
 			class_name = class_path.split("/")[-1]
 			for img_path in glob.glob(class_path + "/*.jpeg"):
 				self.data.append([img_path, class_name])
 		print(self.data)
 		self.class_map = {"dogs" : 0, "cats": 1}
 		self.img_dim = (416, 416)
 	def __len__(self):
 		return len(self.data)
 	def __getitem__(self, idx):
 		img_path, class_name = self.data[idx]
 		img = cv2.imread(img_path)
 		img = cv2.resize(img, self.img_dim)
 		class_id = self.class_map[class_name]
 		img_tensor = torch.from_numpy(img)
 		img_tensor = img_tensor.permute(2, 0, 1)
 		class_id = torch.tensor([class_id])
 		return img_tensor, class_id
 if __name__ == "__main__":
 	dataset = CustomDataset()		
 	data_loader = DataLoader(dataset, batch_size=4, shuffle=True)
 	for imgs, labels in data_loader:
 		print("Batch of images has shape: ",imgs.shape)
 		print("Batch of labels has shape: ", labels.shape)
--- a/timm/data/dataset_factory.py
+++ b/timm/data/dataset_factory.py
@ -17,6 +17,7 @@ except ImportError:
    has_inaturalist = False
 from .dataset import IterableImageDataset, ImageDataset
 from .textdataset import TextDataset
 _TORCH_BASIC_DS = dict(
    cifar10=CIFAR10,
@ -134,6 +135,11 @@ def create_dataset(
        ds = IterableImageDataset(
            root, parser=name, split=split, is_training=is_training,
            download=download, batch_size=batch_size, repeats=repeats, **kwargs)
    elif name == 'embeddings':
        if search_split and os.path.isdir(root):
            # look for split specific sub-folder in root
            root = _search_split(root, split)
        ds = TextDataset(root, split)
    else:
        # FIXME support more advance split cfg for ImageFolder/Tar datasets in the future
        if search_split and os.path.isdir(root):
--- a/timm/data/loader.py
+++ b/timm/data/loader.py
@ -225,6 +225,7 @@ def create_loader(
    if use_multi_epochs_loader:
        loader_class = MultiEpochsDataLoader
    print(loader_class)
    loader_args = dict(
        batch_size=batch_size,
        shuffle=not isinstance(dataset, torch.utils.data.IterableDataset) and sampler is None and is_training,
@ -257,6 +258,7 @@ def create_loader(
    return loader
 class MultiEpochsDataLoader(torch.utils.data.DataLoader):
    def __init__(self, *args, **kwargs):
--- a/timm/data/textdataset.py
+++ b/timm/data/textdataset.py
@ -0,0 +1,60 @@
 from torch.utils.data import Dataset, DataLoader
 import os
 import torch
 import glob
 import numpy as np
 class TextDataset(Dataset):
    def __init__(self, dir_path, split):
        self.path = dir_path
        self.split = split
    def __len__(self):
        count = 0
        for root_dir, cur_dir, files in os.walk(self.path):
            count += len(files)
        #print('file count:', count)
        count = count*32
        return count
    def __getitem__(self, idx):
        # index sequentially as per file list
        # Go to file idx//32
        # Get label(1x1) based on file name
        # Get vector(1x4096) at idx%32 in the file
        #return a tensor x*y (x*y = 4096) and target tensor (1,) //Use x,y =64
        def listdir_nohidden(AllVideos_Path):  # To ignore hidden files
            file_dir_extension = os.path.join(AllVideos_Path, '*.txt')
            for f in glob.glob(file_dir_extension):
                if not f.startswith('.'):
                    yield os.path.basename(f)
        All_Videos = sorted(listdir_nohidden(self.path))
        #print(self.path)
        #print(len(All_Videos))
        All_Videos.sort()
        #print(All_Videos)
        VideoPath = os.path.join(self.path, All_Videos[idx//32])
        f = open(VideoPath, "r")
        feat = idx%32
        words = f.read().split()
        features = np.float32(words[feat * 4096:feat * 4096 + 4096])
        features = torch.tensor(features)
        features = torch.reshape(features, (16, 256))
        print(VideoPath)
        if VideoPath.find('Normal') == -1:
            label = 0
        else:
            label = 1
        label = torch.tensor(label)
        #print(features.shape)
        #print(features)
        #print(label.shape)
        print(label)
        return features, label
--- a/timm/models/mlp_mixer.py
+++ b/timm/models/mlp_mixer.py
@ -85,7 +85,7 @@ default_cfgs = dict(
    # Mixer ImageNet-21K-P pretraining
    mixer_b16_224_miil_in21k=_cfg(
        url='https://miil-public-eu.oss-eu-central-1.aliyuncs.com/model-zoo/ImageNet_21K_P/models/timm/mixer_b16_224_miil_in21k.pth',
-        mean=(0, 0, 0), std=(1, 1, 1), crop_pct=0.875, interpolation='bilinear', num_classes=11221,
+        mean=(0, 0, 0), std=(1, 1, 1), crop_pct=0.875, interpolation='bilinear', num_classes=2,
    ),
    mixer_b16_224_miil=_cfg(
        url='https://miil-public-eu.oss-eu-central-1.aliyuncs.com/model-zoo/ImageNet_21K_P/models/timm/mixer_b16_224_miil.pth',
@ -269,11 +269,22 @@ class MlpMixer(nn.Module):
            img_size=img_size, patch_size=patch_size, in_chans=in_chans,
            embed_dim=embed_dim, norm_layer=norm_layer if stem_norm else None)
        # FIXME drop_path (stochastic depth scaling rule or all the same?)
        #embed_dim=256
        #print("num_classes:",self.num_classes, "embed_dim:", embed_dim)
        self.blocks = nn.Sequential(*[
            block_layer(
                embed_dim
                , 16 #self.stem.num_patches
                , mlp_ratio, mlp_layer=mlp_layer, norm_layer=norm_layer,
                act_layer=act_layer, drop=drop_rate, drop_path=drop_path_rate)
            for _ in range(num_blocks)])
        """
        self.blocks = nn.Sequential(*[
            block_layer(
                embed_dim, self.stem.num_patches, mlp_ratio, mlp_layer=mlp_layer, norm_layer=norm_layer,
                act_layer=act_layer, drop=drop_rate, drop_path=drop_path_rate)
            for _ in range(num_blocks)])
        """
        self.norm = norm_layer(embed_dim)
        self.head = nn.Linear(embed_dim, self.num_classes) if num_classes > 0 else nn.Identity()
@ -291,7 +302,8 @@ class MlpMixer(nn.Module):
        self.head = nn.Linear(self.embed_dim, num_classes) if num_classes > 0 else nn.Identity()
    def forward_features(self, x):
-        x = self.stem(x)
+        #x = self.stem(x)
        #print(x.shape)
        x = self.blocks(x)
        x = self.norm(x)
        x = x.mean(dim=1)
@ -461,7 +473,7 @@ def mixer_b16_224_miil_in21k(pretrained=False, **kwargs):
    """ Mixer-B/16 224x224. ImageNet-1k pretrained weights.
    Weights taken from: https://github.com/Alibaba-MIIL/ImageNet21K
    """
-    model_args = dict(patch_size=16, num_blocks=12, embed_dim=768, **kwargs)
+    model_args = dict(patch_size=16, num_blocks=12, embed_dim=256, **kwargs)
    model = _create_mixer('mixer_b16_224_miil_in21k', pretrained=pretrained, **model_args)
    return model
--- a/train.py
+++ b/train.py
--- a/validate.py
+++ b/validate.py