diff --git a/avg_checkpoints.py b/avg_checkpoints.py old mode 100755 new mode 100644 diff --git a/benchmark.py b/benchmark.py old mode 100755 new mode 100644 diff --git a/clean_checkpoint.py b/clean_checkpoint.py old mode 100755 new mode 100644 diff --git a/distributed_train.sh b/distributed_train.sh old mode 100755 new mode 100644 diff --git a/inference.py b/inference.py old mode 100755 new mode 100644 diff --git a/timm/data/custom_dataset.py b/timm/data/custom_dataset.py new file mode 100644 index 00000000..e5e34847 --- /dev/null +++ b/timm/data/custom_dataset.py @@ -0,0 +1,40 @@ +import glob +import cv2 +import numpy as np +import torch +from torch.utils.data import Dataset, DataLoader + + +class CustomDataset(Dataset): + def __init__(self): + self.imgs_path = "Dog_Cat_Dataset/" + file_list = glob.glob(self.imgs_path + "*") + print(file_list) + self.data = [] + for class_path in file_list: + class_name = class_path.split("/")[-1] + for img_path in glob.glob(class_path + "/*.jpeg"): + self.data.append([img_path, class_name]) + print(self.data) + self.class_map = {"dogs" : 0, "cats": 1} + self.img_dim = (416, 416) + + def __len__(self): + return len(self.data) + + def __getitem__(self, idx): + img_path, class_name = self.data[idx] + img = cv2.imread(img_path) + img = cv2.resize(img, self.img_dim) + class_id = self.class_map[class_name] + img_tensor = torch.from_numpy(img) + img_tensor = img_tensor.permute(2, 0, 1) + class_id = torch.tensor([class_id]) + return img_tensor, class_id + +if __name__ == "__main__": + dataset = CustomDataset() + data_loader = DataLoader(dataset, batch_size=4, shuffle=True) + for imgs, labels in data_loader: + print("Batch of images has shape: ",imgs.shape) + print("Batch of labels has shape: ", labels.shape) \ No newline at end of file diff --git a/timm/data/dataset_factory.py b/timm/data/dataset_factory.py index 194a597e..03df81de 100644 --- a/timm/data/dataset_factory.py +++ b/timm/data/dataset_factory.py @@ -17,6 +17,7 @@ except ImportError: has_inaturalist = False from .dataset import IterableImageDataset, ImageDataset +from .textdataset import TextDataset _TORCH_BASIC_DS = dict( cifar10=CIFAR10, @@ -134,6 +135,11 @@ def create_dataset( ds = IterableImageDataset( root, parser=name, split=split, is_training=is_training, download=download, batch_size=batch_size, repeats=repeats, **kwargs) + elif name == 'embeddings': + if search_split and os.path.isdir(root): + # look for split specific sub-folder in root + root = _search_split(root, split) + ds = TextDataset(root, split) else: # FIXME support more advance split cfg for ImageFolder/Tar datasets in the future if search_split and os.path.isdir(root): diff --git a/timm/data/loader.py b/timm/data/loader.py index 67d8cd83..ea490f38 100644 --- a/timm/data/loader.py +++ b/timm/data/loader.py @@ -225,6 +225,7 @@ def create_loader( if use_multi_epochs_loader: loader_class = MultiEpochsDataLoader + print(loader_class) loader_args = dict( batch_size=batch_size, shuffle=not isinstance(dataset, torch.utils.data.IterableDataset) and sampler is None and is_training, @@ -255,6 +256,7 @@ def create_loader( ) return loader + class MultiEpochsDataLoader(torch.utils.data.DataLoader): diff --git a/timm/data/textdataset.py b/timm/data/textdataset.py new file mode 100644 index 00000000..895b7c2d --- /dev/null +++ b/timm/data/textdataset.py @@ -0,0 +1,60 @@ +from torch.utils.data import Dataset, DataLoader +import os +import torch +import glob +import numpy as np + + +class TextDataset(Dataset): + def __init__(self, dir_path, split): + self.path = dir_path + self.split = split + + def __len__(self): + count = 0 + for root_dir, cur_dir, files in os.walk(self.path): + count += len(files) + #print('file count:', count) + count = count*32 + return count + + def __getitem__(self, idx): + # index sequentially as per file list + + # Go to file idx//32 + # Get label(1x1) based on file name + # Get vector(1x4096) at idx%32 in the file + #return a tensor x*y (x*y = 4096) and target tensor (1,) //Use x,y =64 + + + def listdir_nohidden(AllVideos_Path): # To ignore hidden files + file_dir_extension = os.path.join(AllVideos_Path, '*.txt') + for f in glob.glob(file_dir_extension): + if not f.startswith('.'): + yield os.path.basename(f) + + All_Videos = sorted(listdir_nohidden(self.path)) + #print(self.path) + #print(len(All_Videos)) + All_Videos.sort() + #print(All_Videos) + VideoPath = os.path.join(self.path, All_Videos[idx//32]) + f = open(VideoPath, "r") + feat = idx%32 + words = f.read().split() + features = np.float32(words[feat * 4096:feat * 4096 + 4096]) + features = torch.tensor(features) + features = torch.reshape(features, (16, 256)) + print(VideoPath) + if VideoPath.find('Normal') == -1: + label = 0 + else: + label = 1 + + label = torch.tensor(label) + #print(features.shape) + #print(features) + #print(label.shape) + print(label) + + return features, label diff --git a/timm/models/mlp_mixer.py b/timm/models/mlp_mixer.py index 727b655b..4b5683d2 100644 --- a/timm/models/mlp_mixer.py +++ b/timm/models/mlp_mixer.py @@ -85,7 +85,7 @@ default_cfgs = dict( # Mixer ImageNet-21K-P pretraining mixer_b16_224_miil_in21k=_cfg( url='https://miil-public-eu.oss-eu-central-1.aliyuncs.com/model-zoo/ImageNet_21K_P/models/timm/mixer_b16_224_miil_in21k.pth', - mean=(0, 0, 0), std=(1, 1, 1), crop_pct=0.875, interpolation='bilinear', num_classes=11221, + mean=(0, 0, 0), std=(1, 1, 1), crop_pct=0.875, interpolation='bilinear', num_classes=2, ), mixer_b16_224_miil=_cfg( url='https://miil-public-eu.oss-eu-central-1.aliyuncs.com/model-zoo/ImageNet_21K_P/models/timm/mixer_b16_224_miil.pth', @@ -269,14 +269,25 @@ class MlpMixer(nn.Module): img_size=img_size, patch_size=patch_size, in_chans=in_chans, embed_dim=embed_dim, norm_layer=norm_layer if stem_norm else None) # FIXME drop_path (stochastic depth scaling rule or all the same?) + #embed_dim=256 + #print("num_classes:",self.num_classes, "embed_dim:", embed_dim) + self.blocks = nn.Sequential(*[ + block_layer( + embed_dim + , 16 #self.stem.num_patches + , mlp_ratio, mlp_layer=mlp_layer, norm_layer=norm_layer, + act_layer=act_layer, drop=drop_rate, drop_path=drop_path_rate) + for _ in range(num_blocks)]) + """ self.blocks = nn.Sequential(*[ block_layer( embed_dim, self.stem.num_patches, mlp_ratio, mlp_layer=mlp_layer, norm_layer=norm_layer, act_layer=act_layer, drop=drop_rate, drop_path=drop_path_rate) for _ in range(num_blocks)]) + """ self.norm = norm_layer(embed_dim) self.head = nn.Linear(embed_dim, self.num_classes) if num_classes > 0 else nn.Identity() - + self.init_weights(nlhb=nlhb) def init_weights(self, nlhb=False): @@ -291,7 +302,8 @@ class MlpMixer(nn.Module): self.head = nn.Linear(self.embed_dim, num_classes) if num_classes > 0 else nn.Identity() def forward_features(self, x): - x = self.stem(x) + #x = self.stem(x) + #print(x.shape) x = self.blocks(x) x = self.norm(x) x = x.mean(dim=1) @@ -461,7 +473,7 @@ def mixer_b16_224_miil_in21k(pretrained=False, **kwargs): """ Mixer-B/16 224x224. ImageNet-1k pretrained weights. Weights taken from: https://github.com/Alibaba-MIIL/ImageNet21K """ - model_args = dict(patch_size=16, num_blocks=12, embed_dim=768, **kwargs) + model_args = dict(patch_size=16, num_blocks=12, embed_dim=256, **kwargs) model = _create_mixer('mixer_b16_224_miil_in21k', pretrained=pretrained, **model_args) return model diff --git a/train.py b/train.py old mode 100755 new mode 100644 index 849f40e3..73b5e7e0 --- a/train.py +++ b/train.py @@ -555,7 +555,7 @@ def main(): use_multi_epochs_loader=args.use_multi_epochs_loader, worker_seeding=args.worker_seeding, ) - + loader_eval = create_loader( dataset_eval, input_size=data_config['input_size'], diff --git a/validate.py b/validate.py old mode 100755 new mode 100644