diff --git a/sultani_code.py b/sultani_code.py new file mode 100644 index 00000000..2bebdf61 --- /dev/null +++ b/sultani_code.py @@ -0,0 +1,318 @@ +from keras.models import Sequential +from keras.layers import Dense, Dropout, Activation ,LSTM,Reshape +from keras.regularizers import l2 +from keras.optimizers import adam, Adagrad +from scipy.io import loadmat, savemat +from keras.models import model_from_json +import theano.tensor as T +import theano +import csv +import ConfigParser +import collections +import time +import csv +import os +from os import listdir +import skimage.transform +from skimage import color +from os.path import isfile, join +import numpy as np +import numpy +from datetime import datetime +import path +from os.path import basename +import glob +import theano.sandbox +theano.sandbox.cuda.use('gpu0') + + +print("Create Model") +model = Sequential() +model.add(Dense(512, input_dim=4096,init='glorot_normal',W_regularizer=l2(0.001),activation='relu')) +model.add(Dropout(0.6)) +model.add(Dense(32,init='glorot_normal',W_regularizer=l2(0.001))) +model.add(Dropout(0.6)) +model.add(Dense(1,init='glorot_normal',W_regularizer=l2(0.001),activation='sigmoid')) + + +def load_model(json_path): # Function to load the model + model = model_from_json(open(json_path).read()) + return model + +def load_weights(model, weight_path): # Function to load the model weights + dict2 = loadmat(weight_path) + dict = conv_dict(dict2) + i = 0 + for layer in model.layers: + weights = dict[str(i)] + layer.set_weights(weights) + i += 1 + return model + +def conv_dict(dict2): + i = 0 + dict = {} + for i in range(len(dict2)): + if str(i) in dict2: + if dict2[str(i)].shape == (0, 0): + dict[str(i)] = dict2[str(i)] + else: + weights = dict2[str(i)][0] + weights2 = [] + for weight in weights: + if weight.shape in [(1, x) for x in range(0, 5000)]: + weights2.append(weight[0]) + else: + weights2.append(weight) + dict[str(i)] = weights2 + return dict + +def save_model(model, json_path, weight_path): # Function to save the model + json_string = model.to_json() + open(json_path, 'w').write(json_string) + dict = {} + i = 0 + for layer in model.layers: + weights = layer.get_weights() + my_list = np.zeros(len(weights), dtype=np.object) + my_list[:] = weights + dict[str(i)] = my_list + i += 1 + savemat(weight_path, dict) + + + + + +# Load Training Dataset + +def load_dataset_Train_batch(AbnormalPath, NormalPath): +# print("Loading training batch") + + batchsize=60 # Each batch contain 60 videos. + n_exp=batchsize/2 # Number of abnormal and normal videos + + Num_abnormal = 810 # Total number of abnormal videos in Training Dataset. + Num_Normal = 800 # Total number of Normal videos in Training Dataset. + + + # We assume the features of abnormal videos and normal videos are located in two different folders. + Abnor_list_iter = np.random.permutation(Num_abnormal) + Abnor_list_iter = Abnor_list_iter[Num_abnormal-n_exp:] # Indexes for randomly selected Abnormal Videos + Norm_list_iter = np.random.permutation(Num_Normal) + Norm_list_iter = Norm_list_iter[Num_Normal-n_exp:] # Indexes for randomly selected Normal Videos + + + AllVideos_Path = AbnormalPath + def listdir_nohidden(AllVideos_Path): # To ignore hidden files + file_dir_extension = os.path.join(AllVideos_Path, '*_C.txt') + for f in glob.glob(file_dir_extension): + if not f.startswith('.'): + yield os.path.basename(f) + + All_Videos=sorted(listdir_nohidden(AllVideos_Path)) + All_Videos.sort() + AllFeatures = [] # To store C3D features of a batch + print("Loading Abnormal videos Features...") + + Video_count=-1 + for iv in Abnor_list_iter: + Video_count=Video_count+1 + VideoPath = os.path.join(AllVideos_Path, All_Videos[iv]) + f = open(VideoPath, "r") + words = f.read().split() + num_feat = len(words) / 4096 + # Number of features per video to be loaded. In our case num_feat=32, as we divide the video into 32 segments. Note that + # we have already computed C3D features for the whole video and divide the video features into 32 segments. Please see Save_C3DFeatures_32Segments.m as well + + count = -1; + VideoFeatues = [] + for feat in xrange(0, num_feat): + feat_row1 = np.float32(words[feat * 4096:feat * 4096 + 4096]) + count = count + 1 + if count == 0: + VideoFeatues = feat_row1 + if count > 0: + VideoFeatues = np.vstack((VideoFeatues, feat_row1)) + + if Video_count == 0: + AllFeatures = VideoFeatues + if Video_count > 0: + AllFeatures = np.vstack((AllFeatures, VideoFeatues)) + print(" Abnormal Features loaded") + + + + print("Loading Normal videos...") + AllVideos_Path = NormalPath + + def listdir_nohidden(AllVideos_Path): # To ignore hidden files + file_dir_extension = os.path.join(AllVideos_Path, '*_C.txt') + for f in glob.glob(file_dir_extension): + if not f.startswith('.'): + yield os.path.basename(f) + + All_Videos = sorted(listdir_nohidden(AllVideos_Path)) + All_Videos.sort() + + for iv in Norm_list_iter: + VideoPath = os.path.join(AllVideos_Path, All_Videos[iv]) + f = open(VideoPath, "r") + words = f.read().split() + feat_row1 = np.array([]) + num_feat = len(words) /4096 # Number of features to be loaded. In our case num_feat=32, as we divide the video into 32 segments. + + count = -1; + VideoFeatues = [] + for feat in xrange(0, num_feat): + + + feat_row1 = np.float32(words[feat * 4096:feat * 4096 + 4096]) + count = count + 1 + if count == 0: + VideoFeatues = feat_row1 + if count > 0: + VideoFeatues = np.vstack((VideoFeatues, feat_row1)) + feat_row1 = [] + AllFeatures = np.vstack((AllFeatures, VideoFeatues)) + + print("Features loaded") + + + AllLabels = np.zeros(32*batchsize, dtype='uint8') + th_loop1=n_exp*32 + th_loop2=n_exp*32-1 + + + + for iv in xrange(0, 32*batchsize): + if iv< th_loop1: + AllLabels[iv] = int(0) # All instances of abnormal videos are labeled 0. This will be used in custom_objective to keep track of normal and abnormal videos indexes. + if iv > th_loop2: + AllLabels[iv] = int(1) # All instances of Normal videos are labeled 1. This will be used in custom_objective to keep track of normal and abnormal videos indexes. + # print("ALLabels loaded") + + return AllFeatures,AllLabels + + +def custom_objective(y_true, y_pred): + 'Custom Objective function' + + y_true = T.flatten(y_true) + y_pred = T.flatten(y_pred) + + n_seg = 32 # Because we have 32 segments per video. + nvid = 60 + n_exp = nvid / 2 + Num_d=32*nvid + + + sub_max = T.ones_like(y_pred) # sub_max represents the highest scoring instants in bags (videos). + sub_sum_labels = T.ones_like(y_true) # It is used to sum the labels in order to distinguish between normal and abnormal videos. + sub_sum_l1=T.ones_like(y_true) # For holding the concatenation of summation of scores in the bag. + sub_l2 = T.ones_like(y_true) # For holding the concatenation of L2 of score in the bag. + + for ii in xrange(0, nvid, 1): + # For Labels + mm = y_true[ii * n_seg:ii * n_seg + n_seg] + sub_sum_labels = T.concatenate([sub_sum_labels, T.stack(T.sum(mm))]) # Just to keep track of abnormal and normal vidoes + + # For Features scores + Feat_Score = y_pred[ii * n_seg:ii * n_seg + n_seg] + sub_max = T.concatenate([sub_max, T.stack(T.max(Feat_Score))]) # Keep the maximum score of scores of all instances in a Bag (video) + sub_sum_l1 = T.concatenate([sub_sum_l1, T.stack(T.sum(Feat_Score))]) # Keep the sum of scores of all instances in a Bag (video) + + z1 = T.ones_like(Feat_Score) + z2 = T.concatenate([z1, Feat_Score]) + z3 = T.concatenate([Feat_Score, z1]) + z_22 = z2[31:] + z_44 = z3[:33] + z = z_22 - z_44 + z = z[1:32] + z = T.sum(T.sqr(z)) + sub_l2 = T.concatenate([sub_l2, T.stack(z)]) + + + # sub_max[Num_d:] means include all elements after Num_d. + # AllLabels =[2 , 4, 3 ,9 ,6 ,12,7 ,18 ,9 ,14] + # z=x[4:] + #[ 6. 12. 7. 18. 9. 14.] + + sub_score = sub_max[Num_d:] # We need this step since we have used T.ones_like + F_labels = sub_sum_labels[Num_d:] # We need this step since we have used T.ones_like + # F_labels contains integer 32 for normal video and 0 for abnormal videos. This because of labeling done at the end of "load_dataset_Train_batch" + + + + # AllLabels =[2 , 4, 3 ,9 ,6 ,12,7 ,18 ,9 ,14] + # z=x[:4] + # [ 2 4 3 9]... This shows 0 to 3 elements + + sub_sum_l1 = sub_sum_l1[Num_d:] # We need this step since we have used T.ones_like + sub_sum_l1 = sub_sum_l1[:n_exp] + sub_l2 = sub_l2[Num_d:] # We need this step since we have used T.ones_like + sub_l2 = sub_l2[:n_exp] + + + indx_nor = theano.tensor.eq(F_labels, 32).nonzero()[0] # Index of normal videos: Since we labeled 1 for each of 32 segments of normal videos F_labels=32 for normal video + indx_abn = theano.tensor.eq(F_labels, 0).nonzero()[0] + + n_Nor=n_exp + + Sub_Nor = sub_score[indx_nor] # Maximum Score for each of abnormal video + Sub_Abn = sub_score[indx_abn] # Maximum Score for each of normal video + + z = T.ones_like(y_true) + for ii in xrange(0, n_Nor, 1): + sub_z = T.maximum(1 - Sub_Abn + Sub_Nor[ii], 0) + z = T.concatenate([z, T.stack(T.sum(sub_z))]) + + z = z[Num_d:] # We need this step since we have used T.ones_like + z = T.mean(z, axis=-1) + 0.00008*T.sum(sub_sum_l1) + 0.00008*T.sum(sub_l2) # Final Loss f + + return z + + +adagrad=Adagrad(lr=0.01, epsilon=1e-08) + +model.compile(loss=custom_objective, optimizer=adagrad) + +print("Starting training...") + +AllClassPath='/content/drive/MyDrive/DL_project/embedding_data' +# AllClassPath contains C3D features (.txt file) of each video. Each text file contains 32 features, each of 4096 dimension +output_dir='/content/output' +# Output_dir is the directory where you want to save trained weights +weights_path = output_dir + 'weights.mat' +# weights.mat are the model weights that you will get after (or during) that training +model_path = output_dir + 'model.json' + +if not os.path.exists(output_dir): + os.makedirs(output_dir) + +All_class_files= listdir(AllClassPath) +All_class_files.sort() +loss_graph =[] +num_iters = 20000 +total_iterations = 0 +batchsize=60 +time_before = datetime.now() + +for it_num in range(num_iters): + + AbnormalPath = os.path.join(AllClassPath, All_class_files[0]) # Path of abnormal already computed C3D features + NormalPath = os.path.join(AllClassPath, All_class_files[1]) # Path of Normal already computed C3D features + inputs, targets=load_dataset_Train_batch(AbnormalPath, NormalPath) # Load normal and abnormal video C3D features + batch_loss =model.train_on_batch(inputs, targets) + loss_graph = np.hstack((loss_graph, batch_loss)) + total_iterations += 1 + if total_iterations % 20 == 1: + #print "These iteration=" + str(total_iterations) + ") took: " + str(datetime.now() - time_before) + ", with loss of " + str(batch_loss) + iteration_path = output_dir + 'Iterations_graph_' + str(total_iterations) + '.mat' + savemat(iteration_path, dict(loss_graph=loss_graph)) + if total_iterations % 1000 == 0: # Save the model at every 1000th iterations. + weights_path = output_dir + 'weightsAnomalyL1L2_' + str(total_iterations) + '.mat' + save_model(model, model_path, weights_path) + + +save_model(model, model_path, weights_path) \ No newline at end of file diff --git a/timm/data/bag_sampler.py b/timm/data/bag_sampler.py new file mode 100644 index 00000000..c1c3d8c9 --- /dev/null +++ b/timm/data/bag_sampler.py @@ -0,0 +1,17 @@ +import torch +import random +from torch.utils.data.sampler import Sampler + +class BagSampler(Sampler): + def __init__(self, dataset): + halfway_point = int(len(dataset)/2) + self.first_half_indices = list(range(halfway_point)) + self.second_half_indices = list(range(halfway_point, len(dataset))) + + def __iter__(self): + random.shuffle(self.first_half_indices) + random.shuffle(self.second_half_indices) + return iter(self.first_half_indices + self.second_half_indices) + + def __len__(self): + return len(self.first_half_indices) + len(self.second_half_indices) \ No newline at end of file diff --git a/timm/data/loader.py b/timm/data/loader.py index ea490f38..e0d5a81d 100644 --- a/timm/data/loader.py +++ b/timm/data/loader.py @@ -15,6 +15,7 @@ import numpy as np from .transforms_factory import create_transform from .constants import IMAGENET_DEFAULT_MEAN, IMAGENET_DEFAULT_STD from .distributed_sampler import OrderedDistributedSampler, RepeatAugSampler +from .bag_sampler import BagSampler from .random_erasing import RandomErasing from .mixup import FastCollateMixup @@ -148,6 +149,7 @@ def create_loader( dataset, input_size, batch_size, + custom_sampler=False, is_training=False, use_prefetcher=True, no_aug=False, @@ -205,6 +207,10 @@ def create_loader( ) sampler = None + print("dataset_len=", len(dataset)) + print(custom_sampler) + #if custom_sampler: + # sampler = BagSampler(dataset) if distributed and not isinstance(dataset, torch.utils.data.IterableDataset): if is_training: if num_aug_repeats: diff --git a/timm/loss/mil_ranking.py b/timm/loss/mil_ranking.py index 1df7f6b4..7bdc8ee4 100644 --- a/timm/loss/mil_ranking.py +++ b/timm/loss/mil_ranking.py @@ -14,15 +14,17 @@ class MilRankingLoss(nn.Module): y_true = torch.flatten(y_true) y_pred = torch.flatten(y_pred) - - #print(y_true.shape) + print("MIL_Ranking") + print(y_true) #print(y_true.type) - #print(y_pred.shape) + print(y_pred) #print(y_pred.type) n_seg = 32 # Because we have 32 segments per video. - nvid = 60 - n_exp = nvid / 2 + #nvid = 60 + nvid = 1 + #n_exp = nvid / 2 + n_exp = nvid // 2 Num_d=32*nvid @@ -34,7 +36,9 @@ class MilRankingLoss(nn.Module): for ii in range(0, nvid, 1): # For Labels mm = y_true[ii * n_seg:ii * n_seg + n_seg] - sub_sum_labels = torch.cat([sub_sum_labels, torch.stack((torch.sum(mm)))]) # Just to keep track of abnormal and normal vidoes + + print(torch.sum(mm)) + sub_sum_labels = torch.cat([sub_sum_labels, torch.sum(mm)]) # Just to keep track of abnormal and normal vidoes # For Features scores Feat_Score = y_pred[ii * n_seg:ii * n_seg + n_seg] diff --git a/timm/models/mlp_mixer.py b/timm/models/mlp_mixer.py index 818c02e5..9aba4b7e 100644 --- a/timm/models/mlp_mixer.py +++ b/timm/models/mlp_mixer.py @@ -85,7 +85,7 @@ default_cfgs = dict( # Mixer ImageNet-21K-P pretraining mixer_b16_224_miil_in21k=_cfg( url='https://miil-public-eu.oss-eu-central-1.aliyuncs.com/model-zoo/ImageNet_21K_P/models/timm/mixer_b16_224_miil_in21k.pth', - mean=(0, 0, 0), std=(1, 1, 1), crop_pct=0.875, interpolation='bilinear', num_classes=1, + mean=(0, 0, 0), std=(1, 1, 1), crop_pct=0.875, interpolation='bilinear', num_classes=2, ), mixer_b16_224_miil=_cfg( url='https://miil-public-eu.oss-eu-central-1.aliyuncs.com/model-zoo/ImageNet_21K_P/models/timm/mixer_b16_224_miil.pth', @@ -312,6 +312,9 @@ class MlpMixer(nn.Module): def forward(self, x): x = self.forward_features(x) x = self.head(x) + #print("In_model") + #print(x.shape) + #print(x) return x diff --git a/train.py b/train.py index bd164972..b9f38ced 100644 --- a/train.py +++ b/train.py @@ -530,6 +530,7 @@ def main(): train_interpolation = data_config['interpolation'] loader_train = create_loader( dataset_train, + custom_sampler = True, input_size=data_config['input_size'], batch_size=args.batch_size, is_training=True, @@ -681,9 +682,11 @@ def train_one_epoch( model.train() end = time.time() + print("loader_length=",len(loader)) last_idx = len(loader) - 1 num_updates = epoch * len(loader) for batch_idx, (input, target) in enumerate(loader): + print("batch=", batch_idx) last_batch = batch_idx == last_idx data_time_m.update(time.time() - end) if not args.prefetcher: @@ -698,7 +701,9 @@ def train_one_epoch( output = model(input) print(output.shape) print(target.shape) - print(loss_fn) + #print(output) + #print(target) + #print(loss_fn) loss = loss_fn(output, target) if not args.distributed: