Supports Read from c3d_embeddings

pull/1229/head
kira7005 3 years ago
parent fa754db940
commit 6c376d8139

@ -0,0 +1,40 @@
import glob
import cv2
import numpy as np
import torch
from torch.utils.data import Dataset, DataLoader
class CustomDataset(Dataset):
def __init__(self):
self.imgs_path = "Dog_Cat_Dataset/"
file_list = glob.glob(self.imgs_path + "*")
print(file_list)
self.data = []
for class_path in file_list:
class_name = class_path.split("/")[-1]
for img_path in glob.glob(class_path + "/*.jpeg"):
self.data.append([img_path, class_name])
print(self.data)
self.class_map = {"dogs" : 0, "cats": 1}
self.img_dim = (416, 416)
def __len__(self):
return len(self.data)
def __getitem__(self, idx):
img_path, class_name = self.data[idx]
img = cv2.imread(img_path)
img = cv2.resize(img, self.img_dim)
class_id = self.class_map[class_name]
img_tensor = torch.from_numpy(img)
img_tensor = img_tensor.permute(2, 0, 1)
class_id = torch.tensor([class_id])
return img_tensor, class_id
if __name__ == "__main__":
dataset = CustomDataset()
data_loader = DataLoader(dataset, batch_size=4, shuffle=True)
for imgs, labels in data_loader:
print("Batch of images has shape: ",imgs.shape)
print("Batch of labels has shape: ", labels.shape)

@ -17,6 +17,7 @@ except ImportError:
has_inaturalist = False
from .dataset import IterableImageDataset, ImageDataset
from .textdataset import TextDataset
_TORCH_BASIC_DS = dict(
cifar10=CIFAR10,
@ -134,6 +135,11 @@ def create_dataset(
ds = IterableImageDataset(
root, parser=name, split=split, is_training=is_training,
download=download, batch_size=batch_size, repeats=repeats, **kwargs)
elif name == 'embeddings':
if search_split and os.path.isdir(root):
# look for split specific sub-folder in root
root = _search_split(root, split)
ds = TextDataset(root, split)
else:
# FIXME support more advance split cfg for ImageFolder/Tar datasets in the future
if search_split and os.path.isdir(root):

@ -225,6 +225,7 @@ def create_loader(
if use_multi_epochs_loader:
loader_class = MultiEpochsDataLoader
print(loader_class)
loader_args = dict(
batch_size=batch_size,
shuffle=not isinstance(dataset, torch.utils.data.IterableDataset) and sampler is None and is_training,
@ -257,6 +258,7 @@ def create_loader(
return loader
class MultiEpochsDataLoader(torch.utils.data.DataLoader):
def __init__(self, *args, **kwargs):

@ -0,0 +1,60 @@
from torch.utils.data import Dataset, DataLoader
import os
import torch
import glob
import numpy as np
class TextDataset(Dataset):
def __init__(self, dir_path, split):
self.path = dir_path
self.split = split
def __len__(self):
count = 0
for root_dir, cur_dir, files in os.walk(self.path):
count += len(files)
#print('file count:', count)
count = count*32
return count
def __getitem__(self, idx):
# index sequentially as per file list
# Go to file idx//32
# Get label(1x1) based on file name
# Get vector(1x4096) at idx%32 in the file
#return a tensor x*y (x*y = 4096) and target tensor (1,) //Use x,y =64
def listdir_nohidden(AllVideos_Path): # To ignore hidden files
file_dir_extension = os.path.join(AllVideos_Path, '*.txt')
for f in glob.glob(file_dir_extension):
if not f.startswith('.'):
yield os.path.basename(f)
All_Videos = sorted(listdir_nohidden(self.path))
#print(self.path)
#print(len(All_Videos))
All_Videos.sort()
#print(All_Videos)
VideoPath = os.path.join(self.path, All_Videos[idx//32])
f = open(VideoPath, "r")
feat = idx%32
words = f.read().split()
features = np.float32(words[feat * 4096:feat * 4096 + 4096])
features = torch.tensor(features)
features = torch.reshape(features, (16, 256))
print(VideoPath)
if VideoPath.find('Normal') == -1:
label = 0
else:
label = 1
label = torch.tensor(label)
#print(features.shape)
#print(features)
#print(label.shape)
print(label)
return features, label

@ -85,7 +85,7 @@ default_cfgs = dict(
# Mixer ImageNet-21K-P pretraining
mixer_b16_224_miil_in21k=_cfg(
url='https://miil-public-eu.oss-eu-central-1.aliyuncs.com/model-zoo/ImageNet_21K_P/models/timm/mixer_b16_224_miil_in21k.pth',
mean=(0, 0, 0), std=(1, 1, 1), crop_pct=0.875, interpolation='bilinear', num_classes=11221,
mean=(0, 0, 0), std=(1, 1, 1), crop_pct=0.875, interpolation='bilinear', num_classes=2,
),
mixer_b16_224_miil=_cfg(
url='https://miil-public-eu.oss-eu-central-1.aliyuncs.com/model-zoo/ImageNet_21K_P/models/timm/mixer_b16_224_miil.pth',
@ -269,11 +269,22 @@ class MlpMixer(nn.Module):
img_size=img_size, patch_size=patch_size, in_chans=in_chans,
embed_dim=embed_dim, norm_layer=norm_layer if stem_norm else None)
# FIXME drop_path (stochastic depth scaling rule or all the same?)
#embed_dim=256
#print("num_classes:",self.num_classes, "embed_dim:", embed_dim)
self.blocks = nn.Sequential(*[
block_layer(
embed_dim
, 16 #self.stem.num_patches
, mlp_ratio, mlp_layer=mlp_layer, norm_layer=norm_layer,
act_layer=act_layer, drop=drop_rate, drop_path=drop_path_rate)
for _ in range(num_blocks)])
"""
self.blocks = nn.Sequential(*[
block_layer(
embed_dim, self.stem.num_patches, mlp_ratio, mlp_layer=mlp_layer, norm_layer=norm_layer,
act_layer=act_layer, drop=drop_rate, drop_path=drop_path_rate)
for _ in range(num_blocks)])
"""
self.norm = norm_layer(embed_dim)
self.head = nn.Linear(embed_dim, self.num_classes) if num_classes > 0 else nn.Identity()
@ -291,7 +302,8 @@ class MlpMixer(nn.Module):
self.head = nn.Linear(self.embed_dim, num_classes) if num_classes > 0 else nn.Identity()
def forward_features(self, x):
x = self.stem(x)
#x = self.stem(x)
#print(x.shape)
x = self.blocks(x)
x = self.norm(x)
x = x.mean(dim=1)
@ -461,7 +473,7 @@ def mixer_b16_224_miil_in21k(pretrained=False, **kwargs):
""" Mixer-B/16 224x224. ImageNet-1k pretrained weights.
Weights taken from: https://github.com/Alibaba-MIIL/ImageNet21K
"""
model_args = dict(patch_size=16, num_blocks=12, embed_dim=768, **kwargs)
model_args = dict(patch_size=16, num_blocks=12, embed_dim=256, **kwargs)
model = _create_mixer('mixer_b16_224_miil_in21k', pretrained=pretrained, **model_args)
return model

Loading…
Cancel
Save