diff --git a/out_log b/out_log new file mode 100644 index 00000000..40955796 --- /dev/null +++ b/out_log @@ -0,0 +1,2011 @@ +_create_mixer +Pretrained= False +default_Cfgs= {'url': '', 'num_classes': 2, 'input_size': (3, 224, 224), 'pool_size': None, 'crop_pct': 0.875, 'interpolation': 'bicubic', 'fixed_input_size': True, 'mean': (0.5, 0.5, 0.5), 'std': (0.5, 0.5, 0.5), 'first_conv': 'stem.proj', 'classifier': 'head'} +dataset_len= 288 +True + +dataset_len= 32 +False + +------Training------- +batch= 0 +torch.Size([32, 16, 256]) +torch.Size([32, 2]) +torch.Size([32]) +LabelSmoothingCrossEntropy() +confidence, nll_loss, self.smoothing, smooth_loss +0.9 tensor([0.6931, 0.6931, 0.6931, 0.6931, 0.6931, 0.6931, 0.6931, 0.6931, 0.6931, + 0.6931, 0.6931, 0.6931, 0.6931, 0.6931, 0.6931, 0.6931, 0.6931, 0.6931, + 0.6931, 0.6931, 0.6931, 0.6931, 0.6931, 0.6931, 0.6931, 0.6931, 0.6931, + 0.6931, 0.6931, 0.6931, 0.6931, 0.6931], device='cuda:0', + grad_fn=) 0.1 tensor([0.6931, 0.6931, 0.6931, 0.6931, 0.6931, 0.6931, 0.6931, 0.6931, 0.6931, + 0.6931, 0.6931, 0.6931, 0.6931, 0.6931, 0.6931, 0.6931, 0.6931, 0.6931, + 0.6931, 0.6931, 0.6931, 0.6931, 0.6931, 0.6931, 0.6931, 0.6931, 0.6931, + 0.6931, 0.6931, 0.6931, 0.6931, 0.6931], device='cuda:0', + grad_fn=) +loss= tensor(0.6931, device='cuda:0', grad_fn=) +batch= 1 +torch.Size([32, 16, 256]) +torch.Size([32, 2]) +torch.Size([32]) +LabelSmoothingCrossEntropy() +confidence, nll_loss, self.smoothing, smooth_loss +0.9 tensor([0.6927, 0.6926, 0.6926, 0.6926, 0.6926, 0.6926, 0.6926, 0.6926, 0.6926, + 0.6926, 0.6926, 0.6926, 0.6926, 0.6926, 0.6926, 0.6926, 0.6926, 0.6926, + 0.6926, 0.6926, 0.6926, 0.6925, 0.6926, 0.6926, 0.6926, 0.6926, 0.6926, + 0.6926, 0.6926, 0.6925, 0.6926, 0.6937], device='cuda:0', + grad_fn=) 0.1 tensor([0.6931, 0.6931, 0.6931, 0.6931, 0.6931, 0.6931, 0.6931, 0.6931, 0.6931, + 0.6931, 0.6931, 0.6931, 0.6931, 0.6931, 0.6931, 0.6931, 0.6931, 0.6931, + 0.6931, 0.6931, 0.6931, 0.6931, 0.6931, 0.6931, 0.6931, 0.6931, 0.6931, + 0.6931, 0.6931, 0.6931, 0.6931, 0.6931], device='cuda:0', + grad_fn=) +loss= tensor(0.6927, device='cuda:0', grad_fn=) +batch= 2 +torch.Size([32, 16, 256]) +torch.Size([32, 2]) +torch.Size([32]) +LabelSmoothingCrossEntropy() +confidence, nll_loss, self.smoothing, smooth_loss +0.9 tensor([0.6916, 0.6916, 0.6918, 0.6917, 0.6918, 0.6916, 0.6916, 0.6918, 0.6918, + 0.6917, 0.6917, 0.6917, 0.6917, 0.6917, 0.6916, 0.6917, 0.6917, 0.6916, + 0.6947, 0.6917, 0.6917, 0.6917, 0.6917, 0.6917, 0.6917, 0.6917, 0.6918, + 0.6916, 0.6916, 0.6916, 0.6917, 0.6918], device='cuda:0', + grad_fn=) 0.1 tensor([0.6931, 0.6931, 0.6931, 0.6931, 0.6931, 0.6931, 0.6931, 0.6931, 0.6931, + 0.6931, 0.6931, 0.6931, 0.6931, 0.6931, 0.6931, 0.6931, 0.6931, 0.6931, + 0.6931, 0.6931, 0.6931, 0.6931, 0.6931, 0.6931, 0.6931, 0.6931, 0.6931, + 0.6931, 0.6931, 0.6931, 0.6931, 0.6931], device='cuda:0', + grad_fn=) +loss= tensor(0.6919, device='cuda:0', grad_fn=) +batch= 3 +torch.Size([32, 16, 256]) +torch.Size([32, 2]) +torch.Size([32]) +LabelSmoothingCrossEntropy() +confidence, nll_loss, self.smoothing, smooth_loss +0.9 tensor([0.6903, 0.6907, 0.6903, 0.6904, 0.6907, 0.6906, 0.6907, 0.6903, 0.6904, + 0.6905, 0.6903, 0.6959, 0.6904, 0.6906, 0.6905, 0.6906, 0.6905, 0.6905, + 0.6906, 0.6904, 0.6906, 0.6905, 0.6906, 0.6960, 0.6903, 0.6907, 0.6905, + 0.6906, 0.6905, 0.6907, 0.6907, 0.6907], device='cuda:0', + grad_fn=) 0.1 tensor([0.6932, 0.6932, 0.6932, 0.6932, 0.6932, 0.6932, 0.6932, 0.6932, 0.6932, + 0.6932, 0.6932, 0.6932, 0.6932, 0.6932, 0.6932, 0.6932, 0.6932, 0.6932, + 0.6932, 0.6932, 0.6932, 0.6932, 0.6932, 0.6932, 0.6932, 0.6932, 0.6932, + 0.6932, 0.6932, 0.6932, 0.6932, 0.6932], device='cuda:0', + grad_fn=) +loss= tensor(0.6911, device='cuda:0', grad_fn=) +batch= 4 +torch.Size([32, 16, 256]) +torch.Size([32, 2]) +torch.Size([32]) +LabelSmoothingCrossEntropy() +confidence, nll_loss, self.smoothing, smooth_loss +0.9 tensor([0.6894, 0.6894, 0.6889, 0.6894, 0.6889, 0.6975, 0.6893, 0.6889, 0.6891, + 0.6892, 0.6896, 0.6973, 0.6976, 0.6890, 0.6893, 0.6889, 0.6888, 0.6889, + 0.6894, 0.6888, 0.6974, 0.6974, 0.6890, 0.6896, 0.6972, 0.6887, 0.6892, + 0.6889, 0.6890, 0.6888, 0.6893, 0.6892], device='cuda:0', + grad_fn=) 0.1 tensor([0.6932, 0.6932, 0.6932, 0.6932, 0.6932, 0.6932, 0.6932, 0.6932, 0.6932, + 0.6932, 0.6932, 0.6932, 0.6932, 0.6932, 0.6932, 0.6932, 0.6932, 0.6932, + 0.6932, 0.6932, 0.6932, 0.6932, 0.6932, 0.6932, 0.6932, 0.6932, 0.6932, + 0.6932, 0.6932, 0.6932, 0.6932, 0.6932], device='cuda:0', + grad_fn=) +loss= tensor(0.6909, device='cuda:0', grad_fn=) +batch= 5 +torch.Size([32, 16, 256]) +torch.Size([32, 2]) +torch.Size([32]) +LabelSmoothingCrossEntropy() +confidence, nll_loss, self.smoothing, smooth_loss +0.9 tensor([0.6876, 0.6878, 0.6880, 0.6876, 0.6877, 0.6878, 0.6880, 0.6989, 0.6879, + 0.6873, 0.6884, 0.6877, 0.6876, 0.6876, 0.6879, 0.6877, 0.6876, 0.6882, + 0.6875, 0.6875, 0.6877, 0.6881, 0.6877, 0.6991, 0.6880, 0.6875, 0.6881, + 0.6987, 0.6874, 0.6877, 0.6880, 0.6884], device='cuda:0', + grad_fn=) 0.1 tensor([0.6932, 0.6932, 0.6932, 0.6932, 0.6932, 0.6932, 0.6932, 0.6932, 0.6932, + 0.6932, 0.6932, 0.6932, 0.6932, 0.6932, 0.6932, 0.6932, 0.6932, 0.6932, + 0.6932, 0.6932, 0.6932, 0.6932, 0.6932, 0.6932, 0.6932, 0.6932, 0.6932, + 0.6932, 0.6932, 0.6932, 0.6932, 0.6932], device='cuda:0', + grad_fn=) +loss= tensor(0.6893, device='cuda:0', grad_fn=) +batch= 6 +torch.Size([32, 16, 256]) +torch.Size([32, 2]) +torch.Size([32]) +LabelSmoothingCrossEntropy() +confidence, nll_loss, self.smoothing, smooth_loss +0.9 tensor([0.6862, 0.6858, 0.6861, 0.7007, 0.6857, 0.6864, 0.6866, 0.6860, 0.6861, + 0.6862, 0.6862, 0.6863, 0.6864, 0.6863, 0.6857, 0.6863, 0.6859, 0.6862, + 0.6857, 0.7008, 0.6860, 0.6869, 0.6860, 0.7005, 0.6861, 0.6857, 0.6866, + 0.7008, 0.6857, 0.6856, 0.6855, 0.6864], device='cuda:0', + grad_fn=) 0.1 tensor([0.6932, 0.6932, 0.6932, 0.6932, 0.6932, 0.6932, 0.6932, 0.6932, 0.6932, + 0.6932, 0.6932, 0.6932, 0.6932, 0.6932, 0.6932, 0.6932, 0.6932, 0.6932, + 0.6932, 0.6932, 0.6932, 0.6932, 0.6932, 0.6932, 0.6932, 0.6932, 0.6932, + 0.6932, 0.6932, 0.6932, 0.6932, 0.6932], device='cuda:0', + grad_fn=) +loss= tensor(0.6884, device='cuda:0', grad_fn=) +batch= 7 +torch.Size([32, 16, 256]) +torch.Size([32, 2]) +torch.Size([32]) +LabelSmoothingCrossEntropy() +confidence, nll_loss, self.smoothing, smooth_loss +0.9 tensor([0.6848, 0.7023, 0.7028, 0.6848, 0.6849, 0.6848, 0.7029, 0.6839, 0.6837, + 0.6838, 0.6837, 0.6848, 0.6839, 0.6842, 0.6838, 0.6844, 0.6843, 0.6847, + 0.6836, 0.6840, 0.7021, 0.6838, 0.6836, 0.6836, 0.7027, 0.6849, 0.7023, + 0.7023, 0.6842, 0.6848, 0.6843, 0.6844], device='cuda:0', + grad_fn=) 0.1 tensor([0.6932, 0.6932, 0.6932, 0.6932, 0.6932, 0.6932, 0.6932, 0.6932, 0.6932, + 0.6932, 0.6932, 0.6932, 0.6932, 0.6932, 0.6932, 0.6932, 0.6932, 0.6932, + 0.6932, 0.6932, 0.6932, 0.6932, 0.6932, 0.6932, 0.6932, 0.6932, 0.6932, + 0.6932, 0.6932, 0.6932, 0.6932, 0.6932], device='cuda:0', + grad_fn=) +loss= tensor(0.6887, device='cuda:0', grad_fn=) +batch= 8 +torch.Size([32, 16, 256]) +torch.Size([32, 2]) +torch.Size([32]) +LabelSmoothingCrossEntropy() +confidence, nll_loss, self.smoothing, smooth_loss +0.9 tensor([0.6833, 0.6820, 0.6809, 0.7045, 0.6832, 0.6825, 0.6827, 0.6829, 0.7038, + 0.7045, 0.6832, 0.6833, 0.6839, 0.6829, 0.6834, 0.6840, 0.6829, 0.7037, + 0.6833, 0.6829, 0.6827, 0.6828, 0.6829, 0.6821, 0.6829, 0.6834, 0.6828, + 0.6823, 0.6835, 0.6822, 0.6825, 0.6829], device='cuda:0', + grad_fn=) 0.1 tensor([0.6932, 0.6932, 0.6932, 0.6932, 0.6932, 0.6932, 0.6932, 0.6932, 0.6932, + 0.6932, 0.6932, 0.6932, 0.6932, 0.6932, 0.6932, 0.6932, 0.6932, 0.6932, + 0.6932, 0.6932, 0.6932, 0.6932, 0.6932, 0.6932, 0.6932, 0.6932, 0.6932, + 0.6932, 0.6932, 0.6932, 0.6932, 0.6932], device='cuda:0', + grad_fn=) +loss= tensor(0.6863, device='cuda:0', grad_fn=) +------Validating------- +output = tensor([[0.5123, 0.4877], + [0.5117, 0.4883], + [0.5118, 0.4882], + [0.5118, 0.4882], + [0.5124, 0.4876], + [0.5122, 0.4878], + [0.5119, 0.4881], + [0.5127, 0.4873], + [0.5123, 0.4877], + [0.5121, 0.4879], + [0.5120, 0.4880], + [0.5121, 0.4879], + [0.5122, 0.4878], + [0.5125, 0.4875], + [0.5128, 0.4872], + [0.5126, 0.4874], + [0.5124, 0.4876], + [0.5124, 0.4876], + [0.5124, 0.4876], + [0.5124, 0.4876], + [0.5123, 0.4877], + [0.5122, 0.4878], + [0.5120, 0.4880], + [0.5124, 0.4876], + [0.5120, 0.4880], + [0.5119, 0.4881], + [0.5117, 0.4883], + [0.5119, 0.4881], + [0.5118, 0.4882], + [0.5119, 0.4881], + [0.5117, 0.4883], + [0.5124, 0.4876]], device='cuda:0') +target= tensor([1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1], device='cuda:0') +eval_loss= tensor(0.7054, device='cuda:0') +eval_acc1= tensor(0., device='cuda:0') +------Training------- +batch= 0 +torch.Size([32, 16, 256]) +torch.Size([32, 2]) +torch.Size([32]) +LabelSmoothingCrossEntropy() +confidence, nll_loss, self.smoothing, smooth_loss +0.9 tensor([0.6801, 0.6811, 0.6807, 0.6810, 0.6798, 0.6802, 0.6811, 0.6811, 0.6799, + 0.6807, 0.7065, 0.6818, 0.6802, 0.6824, 0.6808, 0.6806, 0.6810, 0.6797, + 0.6803, 0.6806, 0.7064, 0.6804, 0.6800, 0.7054, 0.6813, 0.6809, 0.6811, + 0.6816, 0.6820, 0.6803, 0.6800, 0.6813], device='cuda:0', + grad_fn=) 0.1 tensor([0.6932, 0.6932, 0.6932, 0.6932, 0.6932, 0.6932, 0.6932, 0.6932, 0.6932, + 0.6932, 0.6932, 0.6932, 0.6932, 0.6932, 0.6932, 0.6932, 0.6932, 0.6932, + 0.6932, 0.6932, 0.6932, 0.6932, 0.6932, 0.6932, 0.6932, 0.6932, 0.6932, + 0.6932, 0.6932, 0.6932, 0.6932, 0.6932], device='cuda:0', + grad_fn=) +loss= tensor(0.6841, device='cuda:0', grad_fn=) +batch= 1 +torch.Size([32, 16, 256]) +torch.Size([32, 2]) +torch.Size([32]) +LabelSmoothingCrossEntropy() +confidence, nll_loss, self.smoothing, smooth_loss +0.9 tensor([0.4440, 0.4431, 0.4423, 1.0512, 0.4354, 0.4477, 0.4348, 0.4372, 0.4326, + 0.4353, 0.4288, 1.0622, 0.4329, 0.4263, 0.4323, 0.4431, 0.4356, 0.4367, + 0.4443, 0.4378, 0.4332, 0.4242, 0.4547, 0.4266, 0.4205, 0.4291, 0.4446, + 0.4455, 0.4256, 0.4609, 0.4424, 0.4271], device='cuda:0', + grad_fn=) 0.1 tensor([0.7349, 0.7352, 0.7356, 0.7406, 0.7383, 0.7334, 0.7386, 0.7376, 0.7395, + 0.7384, 0.7412, 0.7432, 0.7394, 0.7422, 0.7396, 0.7352, 0.7383, 0.7378, + 0.7348, 0.7373, 0.7393, 0.7432, 0.7308, 0.7421, 0.7449, 0.7410, 0.7346, + 0.7343, 0.7426, 0.7285, 0.7355, 0.7419], device='cuda:0', + grad_fn=) +loss= tensor(0.5018, device='cuda:0', grad_fn=) +batch= 2 +torch.Size([32, 16, 256]) +torch.Size([32, 2]) +torch.Size([32]) +LabelSmoothingCrossEntropy() +confidence, nll_loss, self.smoothing, smooth_loss +0.9 tensor([1.2373, 0.3479, 1.2251, 0.3466, 0.3493, 0.3496, 0.3549, 0.3486, 0.3413, + 0.3564, 0.3540, 0.3417, 0.3526, 0.3504, 0.3545, 0.3465, 0.3580, 0.3578, + 0.3433, 0.3538, 0.3425, 0.3502, 1.2251, 1.2219, 0.3512, 0.3503, 0.3460, + 0.3590, 0.3446, 0.3562, 0.3484, 1.2262], device='cuda:0', + grad_fn=) 0.1 tensor([0.7900, 0.7863, 0.7864, 0.7872, 0.7854, 0.7852, 0.7815, 0.7858, 0.7910, + 0.7805, 0.7821, 0.7907, 0.7831, 0.7846, 0.7818, 0.7873, 0.7795, 0.7796, + 0.7896, 0.7823, 0.7901, 0.7847, 0.7864, 0.7855, 0.7841, 0.7847, 0.7877, + 0.7788, 0.7887, 0.7806, 0.7860, 0.7868], device='cuda:0', + grad_fn=) +loss= tensor(0.5170, device='cuda:0', grad_fn=) +batch= 3 +torch.Size([32, 16, 256]) +torch.Size([32, 2]) +torch.Size([32]) +LabelSmoothingCrossEntropy() +confidence, nll_loss, self.smoothing, smooth_loss +0.9 tensor([0.3278, 0.3256, 0.3231, 0.3233, 0.3251, 0.3253, 0.3262, 0.3278, 0.3243, + 0.3225, 0.3259, 0.3254, 0.3295, 0.3260, 0.3256, 0.3262, 0.3250, 0.3260, + 0.3236, 0.3283, 0.3268, 0.3250, 0.3323, 0.3233, 0.3267, 0.3259, 0.3223, + 0.3228, 0.3229, 0.3225, 0.3246, 0.3310], device='cuda:0', + grad_fn=) 0.1 tensor([0.8013, 0.8030, 0.8051, 0.8049, 0.8034, 0.8033, 0.8026, 0.8013, 0.8041, + 0.8055, 0.8028, 0.8032, 0.8000, 0.8027, 0.8031, 0.8026, 0.8035, 0.8027, + 0.8046, 0.8009, 0.8021, 0.8035, 0.7978, 0.8049, 0.8022, 0.8028, 0.8057, + 0.8053, 0.8052, 0.8056, 0.8038, 0.7988], device='cuda:0', + grad_fn=) +loss= tensor(0.3733, device='cuda:0', grad_fn=) +batch= 4 +torch.Size([32, 16, 256]) +torch.Size([32, 2]) +torch.Size([32]) +LabelSmoothingCrossEntropy() +confidence, nll_loss, self.smoothing, smooth_loss +0.9 tensor([0.3171, 0.3167, 0.3177, 0.3159, 0.3175, 1.3035, 0.3192, 0.3186, 0.3171, + 0.3171, 0.3168, 0.3180, 0.3172, 1.3040, 0.3188, 0.3205, 0.3203, 0.3178, + 1.3052, 0.3161, 0.3166, 0.3167, 1.3039, 1.3045, 0.3166, 0.3163, 1.3048, + 1.3016, 1.3018, 0.3161, 0.3171, 0.3150], device='cuda:0', + grad_fn=) 0.1 tensor([0.8100, 0.8104, 0.8095, 0.8110, 0.8096, 0.8102, 0.8083, 0.8087, 0.8100, + 0.8100, 0.8103, 0.8092, 0.8099, 0.8103, 0.8086, 0.8071, 0.8073, 0.8094, + 0.8107, 0.8108, 0.8104, 0.8104, 0.8103, 0.8105, 0.8104, 0.8107, 0.8106, + 0.8096, 0.8097, 0.8108, 0.8100, 0.8117], device='cuda:0', + grad_fn=) +loss= tensor(0.5885, device='cuda:0', grad_fn=) +batch= 5 +torch.Size([32, 16, 256]) +torch.Size([32, 2]) +torch.Size([32]) +LabelSmoothingCrossEntropy() +confidence, nll_loss, self.smoothing, smooth_loss +0.9 tensor([0.3142, 0.3149, 0.3155, 0.3143, 1.3105, 0.3147, 1.3086, 0.3151, 1.3093, + 1.3094, 0.3152, 0.3146, 0.3145, 0.3144, 0.3151, 0.3150, 1.3094, 0.3147, + 0.3158, 0.3148, 0.3149, 0.3150, 0.3157, 0.3144, 0.3144, 0.3155, 0.3151, + 0.3148, 0.3144, 0.3150, 0.3159, 0.3147], device='cuda:0', + grad_fn=) 0.1 tensor([0.8125, 0.8118, 0.8114, 0.8123, 0.8124, 0.8120, 0.8118, 0.8117, 0.8120, + 0.8120, 0.8116, 0.8122, 0.8122, 0.8122, 0.8117, 0.8118, 0.8121, 0.8120, + 0.8111, 0.8120, 0.8119, 0.8118, 0.8112, 0.8123, 0.8123, 0.8113, 0.8117, + 0.8119, 0.8123, 0.8118, 0.8110, 0.8120], device='cuda:0', + grad_fn=) +loss= tensor(0.5045, device='cuda:0', grad_fn=) +batch= 6 +torch.Size([32, 16, 256]) +torch.Size([32, 2]) +torch.Size([32]) +LabelSmoothingCrossEntropy() +confidence, nll_loss, self.smoothing, smooth_loss +0.9 tensor([0.3139, 0.3137, 0.3137, 0.3138, 0.3138, 1.3116, 0.3138, 0.3139, 0.3140, + 0.3139, 1.3113, 0.3137, 0.3137, 0.3140, 0.3139, 0.3138, 0.3143, 0.3143, + 0.3139, 0.3140, 0.3138, 0.3138, 0.3138, 0.3140, 0.3139, 1.3122, 0.3137, + 0.3138, 0.3141, 0.3138, 1.3120, 0.3139], device='cuda:0', + grad_fn=) 0.1 tensor([0.8127, 0.8129, 0.8129, 0.8128, 0.8128, 0.8127, 0.8128, 0.8127, 0.8126, + 0.8127, 0.8126, 0.8129, 0.8129, 0.8126, 0.8127, 0.8128, 0.8124, 0.8124, + 0.8127, 0.8127, 0.8128, 0.8128, 0.8128, 0.8126, 0.8127, 0.8129, 0.8129, + 0.8128, 0.8126, 0.8128, 0.8129, 0.8127], device='cuda:0', + grad_fn=) +loss= tensor(0.4760, device='cuda:0', grad_fn=) +batch= 7 +torch.Size([32, 16, 256]) +torch.Size([32, 2]) +torch.Size([32]) +LabelSmoothingCrossEntropy() +confidence, nll_loss, self.smoothing, smooth_loss +0.9 tensor([0.3135, 0.3134, 0.3135, 0.3136, 0.3136, 0.3135, 0.3139, 0.3135, 0.3134, + 0.3134, 0.3134, 0.3135, 0.3136, 0.3135, 0.3134, 0.3135, 1.3126, 0.3137, + 0.3135, 0.3138, 0.3134, 0.3135, 0.3138, 0.3134, 0.3135, 0.3136, 0.3134, + 0.3135, 1.3127, 0.3135, 0.3134, 0.3135], device='cuda:0', + grad_fn=) 0.1 tensor([0.8130, 0.8131, 0.8131, 0.8130, 0.8130, 0.8131, 0.8127, 0.8131, 0.8131, + 0.8131, 0.8131, 0.8131, 0.8130, 0.8131, 0.8131, 0.8131, 0.8131, 0.8129, + 0.8131, 0.8128, 0.8131, 0.8130, 0.8128, 0.8131, 0.8131, 0.8130, 0.8131, + 0.8131, 0.8131, 0.8130, 0.8131, 0.8131], device='cuda:0', + grad_fn=) +loss= tensor(0.4197, device='cuda:0', grad_fn=) +batch= 8 +torch.Size([32, 16, 256]) +torch.Size([32, 2]) +torch.Size([32]) +LabelSmoothingCrossEntropy() +confidence, nll_loss, self.smoothing, smooth_loss +0.9 tensor([0.3135, 0.3134, 0.3134, 0.3133, 0.3134, 1.3129, 1.3130, 0.3133, 0.3135, + 0.3133, 0.3134, 0.3134, 0.3134, 0.3133, 0.3133, 0.3133, 0.3133, 0.3133, + 1.3130, 0.3134, 0.3133, 0.3134, 0.3134, 0.3134, 0.3134, 0.3133, 0.3133, + 0.3134, 0.3134, 0.3134, 0.3134, 0.3133], device='cuda:0', + grad_fn=) 0.1 tensor([0.8131, 0.8132, 0.8132, 0.8132, 0.8131, 0.8131, 0.8132, 0.8132, 0.8130, + 0.8132, 0.8131, 0.8132, 0.8132, 0.8132, 0.8132, 0.8132, 0.8132, 0.8132, + 0.8132, 0.8132, 0.8132, 0.8132, 0.8132, 0.8131, 0.8131, 0.8132, 0.8132, + 0.8131, 0.8131, 0.8131, 0.8132, 0.8132], device='cuda:0', + grad_fn=) +loss= tensor(0.4477, device='cuda:0', grad_fn=) +------Validating------- +output = tensor([[9.9987e-01, 1.3297e-04], + [9.9981e-01, 1.9078e-04], + [9.9982e-01, 1.7943e-04], + [9.9982e-01, 1.7781e-04], + [9.9988e-01, 1.2462e-04], + [9.9986e-01, 1.3894e-04], + [9.9983e-01, 1.7157e-04], + [9.9989e-01, 1.0836e-04], + [9.9987e-01, 1.3451e-04], + [9.9985e-01, 1.4906e-04], + [9.9984e-01, 1.6087e-04], + [9.9984e-01, 1.5916e-04], + [9.9985e-01, 1.4523e-04], + [9.9988e-01, 1.1763e-04], + [9.9990e-01, 9.5136e-05], + [9.9989e-01, 1.0943e-04], + [9.9987e-01, 1.3151e-04], + [9.9987e-01, 1.2863e-04], + [9.9988e-01, 1.2315e-04], + [9.9988e-01, 1.2338e-04], + [9.9986e-01, 1.3644e-04], + [9.9986e-01, 1.4150e-04], + [9.9984e-01, 1.6069e-04], + [9.9988e-01, 1.2354e-04], + [9.9984e-01, 1.5766e-04], + [9.9983e-01, 1.7292e-04], + [9.9981e-01, 1.9048e-04], + [9.9982e-01, 1.7739e-04], + [9.9981e-01, 1.8760e-04], + [9.9983e-01, 1.7279e-04], + [9.9981e-01, 1.9314e-04], + [9.9987e-01, 1.2820e-04]], device='cuda:0') +target= tensor([1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1], device='cuda:0') +eval_loss= tensor(1.3130, device='cuda:0') +eval_acc1= tensor(0., device='cuda:0') +------Training------- +batch= 0 +torch.Size([32, 16, 256]) +torch.Size([32, 2]) +torch.Size([32]) +LabelSmoothingCrossEntropy() +confidence, nll_loss, self.smoothing, smooth_loss +0.9 tensor([1.3131, 1.3131, 0.3133, 0.3133, 0.3133, 1.3130, 0.3133, 0.3133, 0.3134, + 0.3133, 0.3133, 0.3133, 0.3133, 0.3134, 0.3133, 0.3133, 1.3131, 0.3133, + 0.3133, 0.3133, 0.3133, 0.3133, 0.3133, 0.3133, 0.3133, 0.3133, 0.3133, + 0.3134, 0.3133, 0.3133, 0.3133, 1.3131], device='cuda:0', + grad_fn=) 0.1 tensor([0.8132, 0.8132, 0.8132, 0.8132, 0.8132, 0.8132, 0.8132, 0.8132, 0.8131, + 0.8132, 0.8132, 0.8132, 0.8132, 0.8132, 0.8132, 0.8132, 0.8132, 0.8132, + 0.8132, 0.8132, 0.8132, 0.8132, 0.8132, 0.8132, 0.8132, 0.8132, 0.8132, + 0.8132, 0.8132, 0.8132, 0.8132, 0.8132], device='cuda:0', + grad_fn=) +loss= tensor(0.5039, device='cuda:0', grad_fn=) +batch= 1 +torch.Size([32, 16, 256]) +torch.Size([32, 2]) +torch.Size([32]) +LabelSmoothingCrossEntropy() +confidence, nll_loss, self.smoothing, smooth_loss +0.9 tensor([0.3133, 0.3133, 0.3133, 1.3132, 0.3133, 0.3133, 0.3133, 0.3133, 0.3133, + 0.3133, 0.3133, 0.3133, 0.3133, 0.3133, 0.3133, 1.3132, 0.3133, 0.3133, + 0.3133, 1.3132, 0.3133, 0.3133, 0.3133, 0.3133, 0.3133, 0.3133, 0.3133, + 0.3133, 0.3133, 0.3133, 0.3133, 0.3133], device='cuda:0', + grad_fn=) 0.1 tensor([0.8132, 0.8132, 0.8132, 0.8132, 0.8132, 0.8132, 0.8132, 0.8133, 0.8132, + 0.8133, 0.8133, 0.8133, 0.8133, 0.8132, 0.8132, 0.8132, 0.8133, 0.8133, + 0.8132, 0.8133, 0.8133, 0.8132, 0.8133, 0.8132, 0.8132, 0.8132, 0.8133, + 0.8132, 0.8133, 0.8133, 0.8132, 0.8132], device='cuda:0', + grad_fn=) +loss= tensor(0.4476, device='cuda:0', grad_fn=) +batch= 2 +torch.Size([32, 16, 256]) +torch.Size([32, 2]) +torch.Size([32]) +LabelSmoothingCrossEntropy() +confidence, nll_loss, self.smoothing, smooth_loss +0.9 tensor([0.3133, 0.3133, 0.3133, 0.3133, 0.3133, 0.3133, 0.3133, 0.3133, 0.3133, + 1.3133, 0.3133, 0.3133, 0.3133, 0.3133, 1.3133, 0.3133, 0.3133, 0.3133, + 0.3133, 0.3133, 0.3133, 0.3133, 0.3133, 0.3133, 0.3133, 0.3133, 1.3132, + 0.3133, 0.3133, 0.3133, 0.3133, 0.3133], device='cuda:0', + grad_fn=) 0.1 tensor([0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, + 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, + 0.8133, 0.8133, 0.8132, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, + 0.8133, 0.8133, 0.8133, 0.8133, 0.8133], device='cuda:0', + grad_fn=) +loss= tensor(0.4476, device='cuda:0', grad_fn=) +batch= 3 +torch.Size([32, 16, 256]) +torch.Size([32, 2]) +torch.Size([32]) +LabelSmoothingCrossEntropy() +confidence, nll_loss, self.smoothing, smooth_loss +0.9 tensor([0.3133, 0.3133, 0.3133, 0.3133, 0.3133, 0.3133, 0.3133, 0.3133, 0.3133, + 0.3133, 0.3133, 1.3133, 0.3133, 0.3133, 0.3133, 0.3133, 0.3133, 0.3133, + 1.3133, 0.3133, 0.3133, 0.3133, 1.3133, 0.3133, 0.3133, 0.3133, 0.3133, + 0.3133, 0.3133, 0.3133, 0.3133, 0.3133], device='cuda:0', + grad_fn=) 0.1 tensor([0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, + 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, + 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, + 0.8133, 0.8133, 0.8133, 0.8133, 0.8133], device='cuda:0', + grad_fn=) +loss= tensor(0.4476, device='cuda:0', grad_fn=) +batch= 4 +torch.Size([32, 16, 256]) +torch.Size([32, 2]) +torch.Size([32]) +LabelSmoothingCrossEntropy() +confidence, nll_loss, self.smoothing, smooth_loss +0.9 tensor([0.3133, 0.3133, 0.3133, 0.3133, 0.3133, 0.3133, 0.3133, 0.3133, 0.3133, + 1.3133, 0.3133, 1.3133, 0.3133, 0.3133, 0.3133, 0.3133, 1.3133, 0.3133, + 0.3133, 0.3133, 0.3133, 0.3133, 0.3133, 0.3133, 0.3133, 0.3133, 0.3133, + 1.3133, 0.3133, 0.3133, 0.3133, 1.3133], device='cuda:0', + grad_fn=) 0.1 tensor([0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, + 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, + 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, + 0.8133, 0.8133, 0.8133, 0.8133, 0.8133], device='cuda:0', + grad_fn=) +loss= tensor(0.5039, device='cuda:0', grad_fn=) +batch= 5 +torch.Size([32, 16, 256]) +torch.Size([32, 2]) +torch.Size([32]) +LabelSmoothingCrossEntropy() +confidence, nll_loss, self.smoothing, smooth_loss +0.9 tensor([0.3133, 0.3133, 0.3133, 0.3133, 1.3133, 0.3133, 0.3133, 0.3133, 1.3133, + 0.3133, 0.3133, 1.3133, 0.3133, 1.3133, 0.3133, 0.3133, 1.3133, 0.3133, + 0.3133, 1.3133, 0.3133, 0.3133, 0.3133, 0.3133, 0.3133, 1.3133, 0.3133, + 0.3133, 0.3133, 0.3133, 0.3133, 0.3133], device='cuda:0', + grad_fn=) 0.1 tensor([0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, + 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, + 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, + 0.8133, 0.8133, 0.8133, 0.8133, 0.8133], device='cuda:0', + grad_fn=) +loss= tensor(0.5601, device='cuda:0', grad_fn=) +batch= 6 +torch.Size([32, 16, 256]) +torch.Size([32, 2]) +torch.Size([32]) +LabelSmoothingCrossEntropy() +confidence, nll_loss, self.smoothing, smooth_loss +0.9 tensor([0.3133, 0.3133, 0.3133, 0.3133, 0.3133, 0.3133, 0.3133, 0.3133, 0.3133, + 0.3133, 0.3133, 0.3133, 0.3133, 0.3133, 0.3133, 0.3133, 0.3133, 0.3133, + 0.3133, 0.3133, 0.3133, 0.3133, 0.3133, 0.3133, 0.3133, 0.3133, 0.3133, + 0.3133, 0.3133, 0.3133, 0.3133, 0.3133], device='cuda:0', + grad_fn=) 0.1 tensor([0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, + 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, + 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, + 0.8133, 0.8133, 0.8133, 0.8133, 0.8133], device='cuda:0', + grad_fn=) +loss= tensor(0.3633, device='cuda:0', grad_fn=) +batch= 7 +torch.Size([32, 16, 256]) +torch.Size([32, 2]) +torch.Size([32]) +LabelSmoothingCrossEntropy() +confidence, nll_loss, self.smoothing, smooth_loss +0.9 tensor([0.3133, 0.3133, 0.3133, 0.3133, 0.3133, 0.3133, 0.3133, 0.3133, 0.3133, + 0.3133, 0.3133, 0.3133, 0.3133, 0.3133, 0.3133, 1.3133, 0.3133, 0.3133, + 0.3133, 0.3133, 1.3133, 0.3133, 0.3133, 1.3133, 0.3133, 0.3133, 1.3133, + 0.3133, 0.3133, 0.3133, 0.3133, 0.3133], device='cuda:0', + grad_fn=) 0.1 tensor([0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, + 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, + 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, + 0.8133, 0.8133, 0.8133, 0.8133, 0.8133], device='cuda:0', + grad_fn=) +loss= tensor(0.4758, device='cuda:0', grad_fn=) +batch= 8 +torch.Size([32, 16, 256]) +torch.Size([32, 2]) +torch.Size([32]) +LabelSmoothingCrossEntropy() +confidence, nll_loss, self.smoothing, smooth_loss +0.9 tensor([0.3133, 0.3133, 0.3133, 1.3133, 0.3133, 0.3133, 0.3133, 0.3133, 0.3133, + 0.3133, 0.3133, 0.3133, 0.3133, 0.3133, 0.3133, 0.3133, 1.3133, 0.3133, + 0.3133, 0.3133, 0.3133, 0.3133, 0.3133, 0.3133, 0.3133, 0.3133, 0.3133, + 0.3133, 0.3133, 0.3133, 0.3133, 0.3133], device='cuda:0', + grad_fn=) 0.1 tensor([0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, + 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, + 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, + 0.8133, 0.8133, 0.8133, 0.8133, 0.8133], device='cuda:0', + grad_fn=) +loss= tensor(0.4195, device='cuda:0', grad_fn=) +------Validating------- +output = tensor([[1.0000e+00, 3.4283e-08], + [1.0000e+00, 6.0943e-08], + [1.0000e+00, 5.2414e-08], + [1.0000e+00, 5.2234e-08], + [1.0000e+00, 2.9134e-08], + [1.0000e+00, 3.5439e-08], + [1.0000e+00, 5.2968e-08], + [1.0000e+00, 2.4628e-08], + [1.0000e+00, 3.3891e-08], + [1.0000e+00, 3.9833e-08], + [1.0000e+00, 4.6143e-08], + [1.0000e+00, 4.5376e-08], + [1.0000e+00, 3.9571e-08], + [1.0000e+00, 2.7552e-08], + [1.0000e+00, 1.9134e-08], + [1.0000e+00, 2.4356e-08], + [1.0000e+00, 3.3188e-08], + [1.0000e+00, 3.1166e-08], + [1.0000e+00, 2.8297e-08], + [1.0000e+00, 2.8663e-08], + [1.0000e+00, 3.4569e-08], + [1.0000e+00, 3.7047e-08], + [1.0000e+00, 4.4655e-08], + [1.0000e+00, 2.7564e-08], + [1.0000e+00, 4.3321e-08], + [1.0000e+00, 5.1786e-08], + [1.0000e+00, 6.0503e-08], + [1.0000e+00, 5.4386e-08], + [1.0000e+00, 5.9361e-08], + [1.0000e+00, 5.2789e-08], + [1.0000e+00, 6.3719e-08], + [1.0000e+00, 3.1184e-08]], device='cuda:0') +target= tensor([1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1], device='cuda:0') +eval_loss= tensor(1.3133, device='cuda:0') +eval_acc1= tensor(0., device='cuda:0') +------Training------- +batch= 0 +torch.Size([32, 16, 256]) +torch.Size([32, 2]) +torch.Size([32]) +LabelSmoothingCrossEntropy() +confidence, nll_loss, self.smoothing, smooth_loss +0.9 tensor([0.3133, 0.3133, 0.3133, 0.3133, 0.3133, 0.3133, 0.3133, 0.3133, 0.3133, + 0.3133, 0.3133, 0.3133, 0.3133, 0.3133, 1.3133, 0.3133, 0.3133, 0.3133, + 0.3133, 0.3133, 1.3133, 0.3133, 0.3133, 0.3133, 1.3133, 1.3133, 1.3133, + 0.3133, 0.3133, 0.3133, 0.3133, 0.3133], device='cuda:0', + grad_fn=) 0.1 tensor([0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, + 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, + 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, + 0.8133, 0.8133, 0.8133, 0.8133, 0.8133], device='cuda:0', + grad_fn=) +loss= tensor(0.5039, device='cuda:0', grad_fn=) +batch= 1 +torch.Size([32, 16, 256]) +torch.Size([32, 2]) +torch.Size([32]) +LabelSmoothingCrossEntropy() +confidence, nll_loss, self.smoothing, smooth_loss +0.9 tensor([0.3133, 1.3133, 0.3133, 1.3133, 0.3133, 0.3133, 0.3133, 0.3133, 0.3133, + 0.3133, 0.3133, 1.3133, 0.3133, 0.3133, 0.3133, 0.3133, 0.3133, 0.3133, + 0.3133, 0.3133, 1.3133, 0.3133, 1.3133, 0.3133, 0.3133, 0.3133, 0.3133, + 0.3133, 0.3133, 0.3133, 0.3133, 0.3133], device='cuda:0', + grad_fn=) 0.1 tensor([0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, + 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, + 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, + 0.8133, 0.8133, 0.8133, 0.8133, 0.8133], device='cuda:0', + grad_fn=) +loss= tensor(0.5039, device='cuda:0', grad_fn=) +batch= 2 +torch.Size([32, 16, 256]) +torch.Size([32, 2]) +torch.Size([32]) +LabelSmoothingCrossEntropy() +confidence, nll_loss, self.smoothing, smooth_loss +0.9 tensor([0.3133, 0.3133, 0.3133, 0.3133, 0.3133, 0.3133, 0.3133, 0.3133, 0.3133, + 0.3133, 1.3133, 0.3133, 1.3133, 0.3133, 0.3133, 0.3133, 0.3133, 0.3133, + 0.3133, 0.3133, 0.3133, 1.3133, 0.3133, 0.3133, 0.3133, 0.3133, 0.3133, + 0.3133, 0.3133, 0.3133, 0.3133, 0.3133], device='cuda:0', + grad_fn=) 0.1 tensor([0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, + 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, + 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, + 0.8133, 0.8133, 0.8133, 0.8133, 0.8133], device='cuda:0', + grad_fn=) +loss= tensor(0.4476, device='cuda:0', grad_fn=) +batch= 3 +torch.Size([32, 16, 256]) +torch.Size([32, 2]) +torch.Size([32]) +LabelSmoothingCrossEntropy() +confidence, nll_loss, self.smoothing, smooth_loss +0.9 tensor([0.3133, 0.3133, 0.3133, 0.3133, 0.3133, 0.3133, 0.3133, 1.3133, 0.3133, + 0.3133, 0.3133, 0.3133, 0.3133, 0.3133, 0.3133, 0.3133, 0.3133, 0.3133, + 1.3133, 0.3133, 0.3133, 0.3133, 0.3133, 0.3133, 0.3133, 0.3133, 1.3133, + 0.3133, 0.3133, 0.3133, 0.3133, 1.3133], device='cuda:0', + grad_fn=) 0.1 tensor([0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, + 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, + 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, + 0.8133, 0.8133, 0.8133, 0.8133, 0.8133], device='cuda:0', + grad_fn=) +loss= tensor(0.4758, device='cuda:0', grad_fn=) +batch= 4 +torch.Size([32, 16, 256]) +torch.Size([32, 2]) +torch.Size([32]) +LabelSmoothingCrossEntropy() +confidence, nll_loss, self.smoothing, smooth_loss +0.9 tensor([0.3133, 0.3133, 0.3133, 0.3133, 0.3133, 0.3133, 0.3133, 0.3133, 0.3133, + 0.3133, 0.3133, 0.3133, 0.3133, 0.3133, 0.3133, 0.3133, 1.3133, 0.3133, + 1.3133, 0.3133, 0.3133, 0.3133, 0.3133, 0.3133, 0.3133, 0.3133, 0.3133, + 0.3133, 0.3133, 0.3133, 0.3133, 1.3133], device='cuda:0', + grad_fn=) 0.1 tensor([0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, + 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, + 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, + 0.8133, 0.8133, 0.8133, 0.8133, 0.8133], device='cuda:0', + grad_fn=) +loss= tensor(0.4476, device='cuda:0', grad_fn=) +batch= 5 +torch.Size([32, 16, 256]) +torch.Size([32, 2]) +torch.Size([32]) +LabelSmoothingCrossEntropy() +confidence, nll_loss, self.smoothing, smooth_loss +0.9 tensor([0.3133, 0.3133, 0.3133, 1.3133, 0.3133, 0.3133, 0.3133, 0.3133, 1.3133, + 0.3133, 0.3133, 0.3133, 0.3133, 0.3133, 0.3133, 0.3133, 0.3133, 0.3133, + 0.3133, 0.3133, 0.3133, 1.3133, 0.3133, 0.3133, 0.3133, 0.3133, 0.3133, + 0.3133, 0.3133, 0.3133, 0.3133, 0.3133], device='cuda:0', + grad_fn=) 0.1 tensor([0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, + 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, + 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, + 0.8133, 0.8133, 0.8133, 0.8133, 0.8133], device='cuda:0', + grad_fn=) +loss= tensor(0.4476, device='cuda:0', grad_fn=) +batch= 6 +torch.Size([32, 16, 256]) +torch.Size([32, 2]) +torch.Size([32]) +LabelSmoothingCrossEntropy() +confidence, nll_loss, self.smoothing, smooth_loss +0.9 tensor([0.3133, 0.3133, 0.3133, 0.3133, 0.3133, 0.3133, 0.3133, 0.3133, 1.3133, + 0.3133, 0.3133, 0.3133, 0.3133, 1.3133, 0.3133, 0.3133, 0.3133, 0.3133, + 0.3133, 0.3133, 0.3133, 0.3133, 0.3133, 0.3133, 1.3133, 0.3133, 0.3133, + 0.3133, 0.3133, 0.3133, 1.3133, 0.3133], device='cuda:0', + grad_fn=) 0.1 tensor([0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, + 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, + 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, + 0.8133, 0.8133, 0.8133, 0.8133, 0.8133], device='cuda:0', + grad_fn=) +loss= tensor(0.4758, device='cuda:0', grad_fn=) +batch= 7 +torch.Size([32, 16, 256]) +torch.Size([32, 2]) +torch.Size([32]) +LabelSmoothingCrossEntropy() +confidence, nll_loss, self.smoothing, smooth_loss +0.9 tensor([0.3133, 0.3133, 0.3133, 0.3133, 0.3133, 0.3133, 0.3133, 0.3133, 0.3133, + 0.3133, 0.3133, 0.3133, 0.3133, 0.3133, 1.3133, 0.3133, 0.3133, 0.3133, + 1.3133, 0.3133, 0.3133, 0.3133, 0.3133, 0.3133, 0.3133, 0.3133, 0.3133, + 0.3133, 0.3133, 0.3133, 1.3133, 0.3133], device='cuda:0', + grad_fn=) 0.1 tensor([0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, + 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, + 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, + 0.8133, 0.8133, 0.8133, 0.8133, 0.8133], device='cuda:0', + grad_fn=) +loss= tensor(0.4476, device='cuda:0', grad_fn=) +batch= 8 +torch.Size([32, 16, 256]) +torch.Size([32, 2]) +torch.Size([32]) +LabelSmoothingCrossEntropy() +confidence, nll_loss, self.smoothing, smooth_loss +0.9 tensor([0.3133, 0.3133, 0.3133, 0.3133, 0.3133, 0.3133, 0.3133, 0.3133, 0.3133, + 0.3133, 0.3133, 1.3133, 0.3133, 0.3133, 0.3133, 0.3133, 0.3133, 0.3133, + 0.3133, 0.3133, 0.3133, 1.3133, 0.3133, 0.3133, 0.3133, 0.3133, 0.3133, + 0.3133, 0.3133, 0.3133, 0.3133, 0.3133], device='cuda:0', + grad_fn=) 0.1 tensor([0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, + 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, + 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, + 0.8133, 0.8133, 0.8133, 0.8133, 0.8133], device='cuda:0', + grad_fn=) +loss= tensor(0.4195, device='cuda:0', grad_fn=) +------Validating------- +output = tensor([[1.0000e+00, 3.4280e-08], + [1.0000e+00, 6.0938e-08], + [1.0000e+00, 5.2409e-08], + [1.0000e+00, 5.2230e-08], + [1.0000e+00, 2.9132e-08], + [1.0000e+00, 3.5436e-08], + [1.0000e+00, 5.2964e-08], + [1.0000e+00, 2.4626e-08], + [1.0000e+00, 3.3888e-08], + [1.0000e+00, 3.9830e-08], + [1.0000e+00, 4.6140e-08], + [1.0000e+00, 4.5372e-08], + [1.0000e+00, 3.9568e-08], + [1.0000e+00, 2.7549e-08], + [1.0000e+00, 1.9132e-08], + [1.0000e+00, 2.4354e-08], + [1.0000e+00, 3.3185e-08], + [1.0000e+00, 3.1164e-08], + [1.0000e+00, 2.8295e-08], + [1.0000e+00, 2.8660e-08], + [1.0000e+00, 3.4566e-08], + [1.0000e+00, 3.7044e-08], + [1.0000e+00, 4.4652e-08], + [1.0000e+00, 2.7561e-08], + [1.0000e+00, 4.3318e-08], + [1.0000e+00, 5.1782e-08], + [1.0000e+00, 6.0497e-08], + [1.0000e+00, 5.4381e-08], + [1.0000e+00, 5.9356e-08], + [1.0000e+00, 5.2785e-08], + [1.0000e+00, 6.3714e-08], + [1.0000e+00, 3.1181e-08]], device='cuda:0') +target= tensor([1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1], device='cuda:0') +eval_loss= tensor(1.3133, device='cuda:0') +eval_acc1= tensor(0., device='cuda:0') +------Training------- +batch= 0 +torch.Size([32, 16, 256]) +torch.Size([32, 2]) +torch.Size([32]) +LabelSmoothingCrossEntropy() +confidence, nll_loss, self.smoothing, smooth_loss +0.9 tensor([0.3133, 1.3133, 0.3133, 0.3133, 1.3133, 0.3133, 0.3133, 0.3133, 1.3133, + 0.3133, 0.3133, 0.3133, 0.3133, 0.3133, 0.3133, 0.3133, 0.3133, 0.3133, + 0.3133, 0.3133, 0.3133, 0.3133, 0.3133, 0.3133, 0.3133, 0.3133, 0.3133, + 0.3133, 0.3133, 0.3133, 0.3133, 1.3133], device='cuda:0', + grad_fn=) 0.1 tensor([0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, + 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, + 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, + 0.8133, 0.8133, 0.8133, 0.8133, 0.8133], device='cuda:0', + grad_fn=) +loss= tensor(0.4758, device='cuda:0', grad_fn=) +batch= 1 +torch.Size([32, 16, 256]) +torch.Size([32, 2]) +torch.Size([32]) +LabelSmoothingCrossEntropy() +confidence, nll_loss, self.smoothing, smooth_loss +0.9 tensor([1.3133, 0.3133, 0.3133, 0.3133, 0.3133, 1.3133, 0.3133, 1.3133, 0.3133, + 0.3133, 0.3133, 0.3133, 0.3133, 0.3133, 0.3133, 0.3133, 0.3133, 0.3133, + 1.3133, 0.3133, 0.3133, 0.3133, 0.3133, 0.3133, 0.3133, 0.3133, 0.3133, + 0.3133, 0.3133, 1.3133, 0.3133, 0.3133], device='cuda:0', + grad_fn=) 0.1 tensor([0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, + 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, + 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, + 0.8133, 0.8133, 0.8133, 0.8133, 0.8133], device='cuda:0', + grad_fn=) +loss= tensor(0.5039, device='cuda:0', grad_fn=) +batch= 2 +torch.Size([32, 16, 256]) +torch.Size([32, 2]) +torch.Size([32]) +LabelSmoothingCrossEntropy() +confidence, nll_loss, self.smoothing, smooth_loss +0.9 tensor([0.3133, 0.3133, 0.3133, 0.3133, 0.3133, 0.3133, 0.3133, 0.3133, 0.3133, + 1.3133, 0.3133, 0.3133, 0.3133, 1.3133, 0.3133, 0.3133, 0.3133, 0.3133, + 0.3133, 0.3133, 0.3133, 0.3133, 0.3133, 0.3133, 0.3133, 0.3133, 0.3133, + 1.3133, 0.3133, 0.3133, 1.3133, 0.3133], device='cuda:0', + grad_fn=) 0.1 tensor([0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, + 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, + 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, + 0.8133, 0.8133, 0.8133, 0.8133, 0.8133], device='cuda:0', + grad_fn=) +loss= tensor(0.4758, device='cuda:0', grad_fn=) +batch= 3 +torch.Size([32, 16, 256]) +torch.Size([32, 2]) +torch.Size([32]) +LabelSmoothingCrossEntropy() +confidence, nll_loss, self.smoothing, smooth_loss +0.9 tensor([0.3133, 0.3133, 0.3133, 0.3133, 1.3133, 0.3133, 0.3133, 0.3133, 1.3133, + 0.3133, 0.3133, 1.3133, 0.3133, 0.3133, 0.3133, 0.3133, 0.3133, 0.3133, + 0.3133, 0.3133, 0.3133, 0.3133, 0.3133, 0.3133, 0.3133, 0.3133, 0.3133, + 0.3133, 0.3133, 0.3133, 0.3133, 1.3133], device='cuda:0', + grad_fn=) 0.1 tensor([0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, + 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, + 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, + 0.8133, 0.8133, 0.8133, 0.8133, 0.8133], device='cuda:0', + grad_fn=) +loss= tensor(0.4758, device='cuda:0', grad_fn=) +batch= 4 +torch.Size([32, 16, 256]) +torch.Size([32, 2]) +torch.Size([32]) +LabelSmoothingCrossEntropy() +confidence, nll_loss, self.smoothing, smooth_loss +0.9 tensor([0.3133, 0.3133, 0.3133, 0.3133, 0.3133, 0.3133, 1.3133, 0.3133, 0.3133, + 0.3133, 0.3133, 0.3133, 0.3133, 0.3133, 0.3133, 0.3133, 0.3133, 0.3133, + 0.3133, 0.3133, 0.3133, 0.3133, 0.3133, 0.3133, 0.3133, 0.3133, 0.3133, + 0.3133, 0.3133, 0.3133, 0.3133, 0.3133], device='cuda:0', + grad_fn=) 0.1 tensor([0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, + 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, + 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, + 0.8133, 0.8133, 0.8133, 0.8133, 0.8133], device='cuda:0', + grad_fn=) +loss= tensor(0.3914, device='cuda:0', grad_fn=) +batch= 5 +torch.Size([32, 16, 256]) +torch.Size([32, 2]) +torch.Size([32]) +LabelSmoothingCrossEntropy() +confidence, nll_loss, self.smoothing, smooth_loss +0.9 tensor([0.3133, 0.3133, 0.3133, 0.3133, 0.3133, 1.3133, 1.3133, 0.3133, 1.3133, + 0.3133, 0.3133, 1.3133, 0.3133, 1.3133, 0.3133, 0.3133, 0.3133, 0.3133, + 0.3133, 0.3133, 0.3133, 0.3133, 0.3133, 1.3133, 0.3133, 0.3133, 0.3133, + 0.3133, 0.3133, 0.3133, 0.3133, 0.3133], device='cuda:0', + grad_fn=) 0.1 tensor([0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, + 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, + 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, + 0.8133, 0.8133, 0.8133, 0.8133, 0.8133], device='cuda:0', + grad_fn=) +loss= tensor(0.5320, device='cuda:0', grad_fn=) +batch= 6 +torch.Size([32, 16, 256]) +torch.Size([32, 2]) +torch.Size([32]) +LabelSmoothingCrossEntropy() +confidence, nll_loss, self.smoothing, smooth_loss +0.9 tensor([0.3133, 0.3133, 0.3133, 0.3133, 0.3133, 0.3133, 0.3133, 1.3133, 0.3133, + 0.3133, 1.3133, 0.3133, 0.3133, 0.3133, 0.3133, 0.3133, 1.3133, 0.3133, + 0.3133, 0.3133, 0.3133, 1.3133, 0.3133, 0.3133, 0.3133, 0.3133, 0.3133, + 0.3133, 0.3133, 0.3133, 0.3133, 0.3133], device='cuda:0', + grad_fn=) 0.1 tensor([0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, + 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, + 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, + 0.8133, 0.8133, 0.8133, 0.8133, 0.8133], device='cuda:0', + grad_fn=) +loss= tensor(0.4758, device='cuda:0', grad_fn=) +batch= 7 +torch.Size([32, 16, 256]) +torch.Size([32, 2]) +torch.Size([32]) +LabelSmoothingCrossEntropy() +confidence, nll_loss, self.smoothing, smooth_loss +0.9 tensor([0.3133, 0.3133, 0.3133, 0.3133, 0.3133, 0.3133, 0.3133, 0.3133, 0.3133, + 0.3133, 0.3133, 0.3133, 0.3133, 0.3133, 0.3133, 0.3133, 0.3133, 1.3133, + 0.3133, 0.3133, 0.3133, 0.3133, 0.3133, 0.3133, 0.3133, 0.3133, 0.3133, + 0.3133, 0.3133, 0.3133, 0.3133, 0.3133], device='cuda:0', + grad_fn=) 0.1 tensor([0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, + 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, + 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, + 0.8133, 0.8133, 0.8133, 0.8133, 0.8133], device='cuda:0', + grad_fn=) +loss= tensor(0.3914, device='cuda:0', grad_fn=) +batch= 8 +torch.Size([32, 16, 256]) +torch.Size([32, 2]) +torch.Size([32]) +LabelSmoothingCrossEntropy() +confidence, nll_loss, self.smoothing, smooth_loss +0.9 tensor([0.3133, 1.3133, 0.3133, 0.3133, 0.3133, 0.3133, 0.3133, 0.3133, 0.3133, + 0.3133, 0.3133, 0.3133, 1.3133, 0.3133, 0.3133, 0.3133, 0.3133, 0.3133, + 0.3133, 0.3133, 0.3133, 0.3133, 0.3133, 0.3133, 0.3133, 0.3133, 0.3133, + 0.3133, 0.3133, 0.3133, 1.3133, 0.3133], device='cuda:0', + grad_fn=) 0.1 tensor([0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, + 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, + 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, + 0.8133, 0.8133, 0.8133, 0.8133, 0.8133], device='cuda:0', + grad_fn=) +loss= tensor(0.4476, device='cuda:0', grad_fn=) +------Validating------- +output = tensor([[1.0000e+00, 3.4279e-08], + [1.0000e+00, 6.0936e-08], + [1.0000e+00, 5.2408e-08], + [1.0000e+00, 5.2228e-08], + [1.0000e+00, 2.9131e-08], + [1.0000e+00, 3.5435e-08], + [1.0000e+00, 5.2962e-08], + [1.0000e+00, 2.4625e-08], + [1.0000e+00, 3.3888e-08], + [1.0000e+00, 3.9828e-08], + [1.0000e+00, 4.6138e-08], + [1.0000e+00, 4.5371e-08], + [1.0000e+00, 3.9567e-08], + [1.0000e+00, 2.7549e-08], + [1.0000e+00, 1.9132e-08], + [1.0000e+00, 2.4353e-08], + [1.0000e+00, 3.3184e-08], + [1.0000e+00, 3.1163e-08], + [1.0000e+00, 2.8294e-08], + [1.0000e+00, 2.8660e-08], + [1.0000e+00, 3.4565e-08], + [1.0000e+00, 3.7043e-08], + [1.0000e+00, 4.4650e-08], + [1.0000e+00, 2.7560e-08], + [1.0000e+00, 4.3316e-08], + [1.0000e+00, 5.1780e-08], + [1.0000e+00, 6.0495e-08], + [1.0000e+00, 5.4379e-08], + [1.0000e+00, 5.9354e-08], + [1.0000e+00, 5.2783e-08], + [1.0000e+00, 6.3712e-08], + [1.0000e+00, 3.1180e-08]], device='cuda:0') +target= tensor([1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1], device='cuda:0') +eval_loss= tensor(1.3133, device='cuda:0') +eval_acc1= tensor(0., device='cuda:0') +------Training------- +batch= 0 +torch.Size([32, 16, 256]) +torch.Size([32, 2]) +torch.Size([32]) +LabelSmoothingCrossEntropy() +confidence, nll_loss, self.smoothing, smooth_loss +0.9 tensor([0.3133, 0.3133, 0.3133, 0.3133, 0.3133, 1.3133, 0.3133, 0.3133, 0.3133, + 0.3133, 0.3133, 1.3133, 0.3133, 0.3133, 0.3133, 0.3133, 0.3133, 0.3133, + 1.3133, 0.3133, 0.3133, 0.3133, 1.3133, 0.3133, 0.3133, 0.3133, 1.3133, + 0.3133, 0.3133, 0.3133, 0.3133, 0.3133], device='cuda:0', + grad_fn=) 0.1 tensor([0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, + 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, + 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, + 0.8133, 0.8133, 0.8133, 0.8133, 0.8133], device='cuda:0', + grad_fn=) +loss= tensor(0.5039, device='cuda:0', grad_fn=) +batch= 1 +torch.Size([32, 16, 256]) +torch.Size([32, 2]) +torch.Size([32]) +LabelSmoothingCrossEntropy() +confidence, nll_loss, self.smoothing, smooth_loss +0.9 tensor([0.3133, 0.3133, 0.3133, 0.3133, 0.3133, 0.3133, 1.3133, 0.3133, 0.3133, + 1.3133, 0.3133, 0.3133, 0.3133, 0.3133, 0.3133, 0.3133, 0.3133, 0.3133, + 0.3133, 0.3133, 0.3133, 0.3133, 0.3133, 0.3133, 0.3133, 0.3133, 0.3133, + 0.3133, 0.3133, 0.3133, 1.3133, 0.3133], device='cuda:0', + grad_fn=) 0.1 tensor([0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, + 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, + 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, + 0.8133, 0.8133, 0.8133, 0.8133, 0.8133], device='cuda:0', + grad_fn=) +loss= tensor(0.4476, device='cuda:0', grad_fn=) +batch= 2 +torch.Size([32, 16, 256]) +torch.Size([32, 2]) +torch.Size([32]) +LabelSmoothingCrossEntropy() +confidence, nll_loss, self.smoothing, smooth_loss +0.9 tensor([0.3133, 0.3133, 0.3133, 0.3133, 0.3133, 0.3133, 0.3133, 0.3133, 0.3133, + 0.3133, 1.3133, 1.3133, 0.3133, 0.3133, 0.3133, 1.3133, 0.3133, 0.3133, + 0.3133, 0.3133, 0.3133, 0.3133, 0.3133, 0.3133, 0.3133, 1.3133, 1.3133, + 0.3133, 0.3133, 0.3133, 0.3133, 0.3133], device='cuda:0', + grad_fn=) 0.1 tensor([0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, + 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, + 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, + 0.8133, 0.8133, 0.8133, 0.8133, 0.8133], device='cuda:0', + grad_fn=) +loss= tensor(0.5039, device='cuda:0', grad_fn=) +batch= 3 +torch.Size([32, 16, 256]) +torch.Size([32, 2]) +torch.Size([32]) +LabelSmoothingCrossEntropy() +confidence, nll_loss, self.smoothing, smooth_loss +0.9 tensor([0.3133, 0.3133, 0.3133, 0.3133, 0.3133, 0.3133, 0.3133, 0.3133, 0.3133, + 0.3133, 0.3133, 1.3133, 0.3133, 0.3133, 0.3133, 0.3133, 0.3133, 0.3133, + 0.3133, 0.3133, 1.3133, 0.3133, 0.3133, 0.3133, 0.3133, 0.3133, 0.3133, + 0.3133, 0.3133, 0.3133, 0.3133, 0.3133], device='cuda:0', + grad_fn=) 0.1 tensor([0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, + 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, + 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, + 0.8133, 0.8133, 0.8133, 0.8133, 0.8133], device='cuda:0', + grad_fn=) +loss= tensor(0.4195, device='cuda:0', grad_fn=) +batch= 4 +torch.Size([32, 16, 256]) +torch.Size([32, 2]) +torch.Size([32]) +LabelSmoothingCrossEntropy() +confidence, nll_loss, self.smoothing, smooth_loss +0.9 tensor([1.3133, 0.3133, 0.3133, 0.3133, 0.3133, 0.3133, 0.3133, 0.3133, 0.3133, + 0.3133, 0.3133, 0.3133, 0.3133, 0.3133, 0.3133, 0.3133, 0.3133, 0.3133, + 0.3133, 0.3133, 0.3133, 0.3133, 1.3133, 0.3133, 0.3133, 0.3133, 0.3133, + 1.3133, 0.3133, 0.3133, 0.3133, 0.3133], device='cuda:0', + grad_fn=) 0.1 tensor([0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, + 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, + 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, + 0.8133, 0.8133, 0.8133, 0.8133, 0.8133], device='cuda:0', + grad_fn=) +loss= tensor(0.4476, device='cuda:0', grad_fn=) +batch= 5 +torch.Size([32, 16, 256]) +torch.Size([32, 2]) +torch.Size([32]) +LabelSmoothingCrossEntropy() +confidence, nll_loss, self.smoothing, smooth_loss +0.9 tensor([0.3133, 0.3133, 1.3133, 0.3133, 0.3133, 0.3133, 0.3133, 0.3133, 0.3133, + 0.3133, 0.3133, 1.3133, 0.3133, 0.3133, 0.3133, 0.3133, 0.3133, 0.3133, + 0.3133, 0.3133, 0.3133, 0.3133, 0.3133, 0.3133, 0.3133, 0.3133, 0.3133, + 0.3133, 0.3133, 0.3133, 0.3133, 0.3133], device='cuda:0', + grad_fn=) 0.1 tensor([0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, + 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, + 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, + 0.8133, 0.8133, 0.8133, 0.8133, 0.8133], device='cuda:0', + grad_fn=) +loss= tensor(0.4195, device='cuda:0', grad_fn=) +batch= 6 +torch.Size([32, 16, 256]) +torch.Size([32, 2]) +torch.Size([32]) +LabelSmoothingCrossEntropy() +confidence, nll_loss, self.smoothing, smooth_loss +0.9 tensor([0.3133, 0.3133, 0.3133, 0.3133, 0.3133, 0.3133, 1.3133, 0.3133, 0.3133, + 0.3133, 0.3133, 0.3133, 0.3133, 0.3133, 0.3133, 0.3133, 0.3133, 0.3133, + 0.3133, 0.3133, 0.3133, 0.3133, 0.3133, 1.3133, 0.3133, 1.3133, 0.3133, + 1.3133, 0.3133, 0.3133, 0.3133, 0.3133], device='cuda:0', + grad_fn=) 0.1 tensor([0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, + 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, + 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, + 0.8133, 0.8133, 0.8133, 0.8133, 0.8133], device='cuda:0', + grad_fn=) +loss= tensor(0.4758, device='cuda:0', grad_fn=) +batch= 7 +torch.Size([32, 16, 256]) +torch.Size([32, 2]) +torch.Size([32]) +LabelSmoothingCrossEntropy() +confidence, nll_loss, self.smoothing, smooth_loss +0.9 tensor([0.3133, 0.3133, 0.3133, 1.3133, 1.3133, 0.3133, 0.3133, 0.3133, 0.3133, + 0.3133, 0.3133, 0.3133, 0.3133, 0.3133, 0.3133, 1.3133, 0.3133, 0.3133, + 0.3133, 0.3133, 0.3133, 0.3133, 0.3133, 0.3133, 1.3133, 0.3133, 0.3133, + 0.3133, 0.3133, 0.3133, 1.3133, 0.3133], device='cuda:0', + grad_fn=) 0.1 tensor([0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, + 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, + 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, + 0.8133, 0.8133, 0.8133, 0.8133, 0.8133], device='cuda:0', + grad_fn=) +loss= tensor(0.5039, device='cuda:0', grad_fn=) +batch= 8 +torch.Size([32, 16, 256]) +torch.Size([32, 2]) +torch.Size([32]) +LabelSmoothingCrossEntropy() +confidence, nll_loss, self.smoothing, smooth_loss +0.9 tensor([0.3133, 0.3133, 0.3133, 0.3133, 0.3133, 0.3133, 0.3133, 0.3133, 0.3133, + 1.3133, 0.3133, 0.3133, 0.3133, 0.3133, 0.3133, 0.3133, 0.3133, 0.3133, + 0.3133, 1.3133, 0.3133, 1.3133, 0.3133, 0.3133, 0.3133, 0.3133, 0.3133, + 0.3133, 0.3133, 0.3133, 0.3133, 0.3133], device='cuda:0', + grad_fn=) 0.1 tensor([0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, + 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, + 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, + 0.8133, 0.8133, 0.8133, 0.8133, 0.8133], device='cuda:0', + grad_fn=) +loss= tensor(0.4476, device='cuda:0', grad_fn=) +------Validating------- +output = tensor([[1.0000e+00, 3.4278e-08], + [1.0000e+00, 6.0935e-08], + [1.0000e+00, 5.2407e-08], + [1.0000e+00, 5.2227e-08], + [1.0000e+00, 2.9130e-08], + [1.0000e+00, 3.5434e-08], + [1.0000e+00, 5.2961e-08], + [1.0000e+00, 2.4625e-08], + [1.0000e+00, 3.3887e-08], + [1.0000e+00, 3.9828e-08], + [1.0000e+00, 4.6137e-08], + [1.0000e+00, 4.5370e-08], + [1.0000e+00, 3.9566e-08], + [1.0000e+00, 2.7548e-08], + [1.0000e+00, 1.9131e-08], + [1.0000e+00, 2.4353e-08], + [1.0000e+00, 3.3183e-08], + [1.0000e+00, 3.1162e-08], + [1.0000e+00, 2.8294e-08], + [1.0000e+00, 2.8659e-08], + [1.0000e+00, 3.4565e-08], + [1.0000e+00, 3.7043e-08], + [1.0000e+00, 4.4650e-08], + [1.0000e+00, 2.7560e-08], + [1.0000e+00, 4.3316e-08], + [1.0000e+00, 5.1779e-08], + [1.0000e+00, 6.0495e-08], + [1.0000e+00, 5.4379e-08], + [1.0000e+00, 5.9353e-08], + [1.0000e+00, 5.2782e-08], + [1.0000e+00, 6.3711e-08], + [1.0000e+00, 3.1179e-08]], device='cuda:0') +target= tensor([1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1], device='cuda:0') +eval_loss= tensor(1.3133, device='cuda:0') +eval_acc1= tensor(0., device='cuda:0') +------Training------- +batch= 0 +torch.Size([32, 16, 256]) +torch.Size([32, 2]) +torch.Size([32]) +LabelSmoothingCrossEntropy() +confidence, nll_loss, self.smoothing, smooth_loss +0.9 tensor([1.3133, 0.3133, 0.3133, 0.3133, 0.3133, 0.3133, 0.3133, 0.3133, 0.3133, + 1.3133, 0.3133, 0.3133, 0.3133, 0.3133, 1.3133, 0.3133, 0.3133, 0.3133, + 0.3133, 0.3133, 0.3133, 0.3133, 0.3133, 0.3133, 0.3133, 0.3133, 0.3133, + 0.3133, 0.3133, 0.3133, 0.3133, 0.3133], device='cuda:0', + grad_fn=) 0.1 tensor([0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, + 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, + 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, + 0.8133, 0.8133, 0.8133, 0.8133, 0.8133], device='cuda:0', + grad_fn=) +loss= tensor(0.4476, device='cuda:0', grad_fn=) +batch= 1 +torch.Size([32, 16, 256]) +torch.Size([32, 2]) +torch.Size([32]) +LabelSmoothingCrossEntropy() +confidence, nll_loss, self.smoothing, smooth_loss +0.9 tensor([1.3133, 0.3133, 0.3133, 0.3133, 0.3133, 0.3133, 1.3133, 0.3133, 0.3133, + 0.3133, 0.3133, 0.3133, 0.3133, 0.3133, 0.3133, 0.3133, 0.3133, 0.3133, + 0.3133, 0.3133, 1.3133, 0.3133, 0.3133, 0.3133, 0.3133, 0.3133, 0.3133, + 0.3133, 0.3133, 0.3133, 0.3133, 0.3133], device='cuda:0', + grad_fn=) 0.1 tensor([0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, + 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, + 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, + 0.8133, 0.8133, 0.8133, 0.8133, 0.8133], device='cuda:0', + grad_fn=) +loss= tensor(0.4476, device='cuda:0', grad_fn=) +batch= 2 +torch.Size([32, 16, 256]) +torch.Size([32, 2]) +torch.Size([32]) +LabelSmoothingCrossEntropy() +confidence, nll_loss, self.smoothing, smooth_loss +0.9 tensor([0.3133, 0.3133, 0.3133, 0.3133, 0.3133, 0.3133, 0.3133, 0.3133, 0.3133, + 0.3133, 0.3133, 0.3133, 1.3133, 1.3133, 0.3133, 0.3133, 0.3133, 0.3133, + 1.3133, 0.3133, 0.3133, 0.3133, 0.3133, 0.3133, 0.3133, 0.3133, 0.3133, + 0.3133, 0.3133, 0.3133, 0.3133, 1.3133], device='cuda:0', + grad_fn=) 0.1 tensor([0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, + 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, + 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, + 0.8133, 0.8133, 0.8133, 0.8133, 0.8133], device='cuda:0', + grad_fn=) +loss= tensor(0.4758, device='cuda:0', grad_fn=) +batch= 3 +torch.Size([32, 16, 256]) +torch.Size([32, 2]) +torch.Size([32]) +LabelSmoothingCrossEntropy() +confidence, nll_loss, self.smoothing, smooth_loss +0.9 tensor([0.3133, 0.3133, 0.3133, 0.3133, 0.3133, 0.3133, 0.3133, 0.3133, 0.3133, + 0.3133, 0.3133, 0.3133, 0.3133, 0.3133, 0.3133, 0.3133, 0.3133, 0.3133, + 0.3133, 0.3133, 0.3133, 0.3133, 1.3133, 0.3133, 1.3133, 0.3133, 0.3133, + 0.3133, 0.3133, 0.3133, 0.3133, 0.3133], device='cuda:0', + grad_fn=) 0.1 tensor([0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, + 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, + 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, + 0.8133, 0.8133, 0.8133, 0.8133, 0.8133], device='cuda:0', + grad_fn=) +loss= tensor(0.4195, device='cuda:0', grad_fn=) +batch= 4 +torch.Size([32, 16, 256]) +torch.Size([32, 2]) +torch.Size([32]) +LabelSmoothingCrossEntropy() +confidence, nll_loss, self.smoothing, smooth_loss +0.9 tensor([0.3133, 0.3133, 1.3133, 0.3133, 0.3133, 1.3133, 1.3133, 0.3133, 0.3133, + 0.3133, 0.3133, 0.3133, 1.3133, 0.3133, 0.3133, 0.3133, 0.3133, 1.3133, + 1.3133, 0.3133, 0.3133, 0.3133, 0.3133, 0.3133, 0.3133, 0.3133, 0.3133, + 0.3133, 0.3133, 0.3133, 0.3133, 0.3133], device='cuda:0', + grad_fn=) 0.1 tensor([0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, + 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, + 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, + 0.8133, 0.8133, 0.8133, 0.8133, 0.8133], device='cuda:0', + grad_fn=) +loss= tensor(0.5320, device='cuda:0', grad_fn=) +batch= 5 +torch.Size([32, 16, 256]) +torch.Size([32, 2]) +torch.Size([32]) +LabelSmoothingCrossEntropy() +confidence, nll_loss, self.smoothing, smooth_loss +0.9 tensor([0.3133, 0.3133, 0.3133, 0.3133, 1.3133, 1.3133, 0.3133, 0.3133, 0.3133, + 1.3133, 0.3133, 0.3133, 0.3133, 0.3133, 1.3133, 0.3133, 0.3133, 0.3133, + 0.3133, 0.3133, 1.3133, 0.3133, 1.3133, 0.3133, 0.3133, 0.3133, 0.3133, + 0.3133, 0.3133, 0.3133, 0.3133, 0.3133], device='cuda:0', + grad_fn=) 0.1 tensor([0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, + 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, + 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, + 0.8133, 0.8133, 0.8133, 0.8133, 0.8133], device='cuda:0', + grad_fn=) +loss= tensor(0.5320, device='cuda:0', grad_fn=) +batch= 6 +torch.Size([32, 16, 256]) +torch.Size([32, 2]) +torch.Size([32]) +LabelSmoothingCrossEntropy() +confidence, nll_loss, self.smoothing, smooth_loss +0.9 tensor([0.3133, 0.3133, 0.3133, 0.3133, 0.3133, 0.3133, 0.3133, 1.3133, 1.3133, + 0.3133, 0.3133, 0.3133, 1.3133, 0.3133, 0.3133, 0.3133, 0.3133, 0.3133, + 0.3133, 0.3133, 0.3133, 0.3133, 0.3133, 0.3133, 0.3133, 0.3133, 0.3133, + 0.3133, 0.3133, 0.3133, 0.3133, 0.3133], device='cuda:0', + grad_fn=) 0.1 tensor([0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, + 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, + 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, + 0.8133, 0.8133, 0.8133, 0.8133, 0.8133], device='cuda:0', + grad_fn=) +loss= tensor(0.4476, device='cuda:0', grad_fn=) +batch= 7 +torch.Size([32, 16, 256]) +torch.Size([32, 2]) +torch.Size([32]) +LabelSmoothingCrossEntropy() +confidence, nll_loss, self.smoothing, smooth_loss +0.9 tensor([0.3133, 0.3133, 0.3133, 0.3133, 0.3133, 0.3133, 0.3133, 0.3133, 0.3133, + 0.3133, 0.3133, 0.3133, 0.3133, 0.3133, 0.3133, 1.3133, 1.3133, 0.3133, + 0.3133, 1.3133, 0.3133, 0.3133, 0.3133, 0.3133, 0.3133, 1.3133, 0.3133, + 0.3133, 0.3133, 0.3133, 0.3133, 0.3133], device='cuda:0', + grad_fn=) 0.1 tensor([0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, + 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, + 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, + 0.8133, 0.8133, 0.8133, 0.8133, 0.8133], device='cuda:0', + grad_fn=) +loss= tensor(0.4758, device='cuda:0', grad_fn=) +batch= 8 +torch.Size([32, 16, 256]) +torch.Size([32, 2]) +torch.Size([32]) +LabelSmoothingCrossEntropy() +confidence, nll_loss, self.smoothing, smooth_loss +0.9 tensor([0.3133, 0.3133, 0.3133, 0.3133, 0.3133, 0.3133, 0.3133, 0.3133, 0.3133, + 0.3133, 0.3133, 0.3133, 0.3133, 0.3133, 0.3133, 0.3133, 0.3133, 0.3133, + 0.3133, 0.3133, 0.3133, 0.3133, 1.3133, 0.3133, 0.3133, 0.3133, 0.3133, + 0.3133, 0.3133, 0.3133, 0.3133, 0.3133], device='cuda:0', + grad_fn=) 0.1 tensor([0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, + 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, + 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, + 0.8133, 0.8133, 0.8133, 0.8133, 0.8133], device='cuda:0', + grad_fn=) +loss= tensor(0.3914, device='cuda:0', grad_fn=) +------Validating------- +output = tensor([[1.0000e+00, 3.4278e-08], + [1.0000e+00, 6.0935e-08], + [1.0000e+00, 5.2407e-08], + [1.0000e+00, 5.2227e-08], + [1.0000e+00, 2.9130e-08], + [1.0000e+00, 3.5434e-08], + [1.0000e+00, 5.2961e-08], + [1.0000e+00, 2.4625e-08], + [1.0000e+00, 3.3887e-08], + [1.0000e+00, 3.9828e-08], + [1.0000e+00, 4.6137e-08], + [1.0000e+00, 4.5370e-08], + [1.0000e+00, 3.9566e-08], + [1.0000e+00, 2.7548e-08], + [1.0000e+00, 1.9131e-08], + [1.0000e+00, 2.4353e-08], + [1.0000e+00, 3.3183e-08], + [1.0000e+00, 3.1162e-08], + [1.0000e+00, 2.8294e-08], + [1.0000e+00, 2.8659e-08], + [1.0000e+00, 3.4564e-08], + [1.0000e+00, 3.7043e-08], + [1.0000e+00, 4.4649e-08], + [1.0000e+00, 2.7560e-08], + [1.0000e+00, 4.3316e-08], + [1.0000e+00, 5.1779e-08], + [1.0000e+00, 6.0495e-08], + [1.0000e+00, 5.4378e-08], + [1.0000e+00, 5.9353e-08], + [1.0000e+00, 5.2782e-08], + [1.0000e+00, 6.3711e-08], + [1.0000e+00, 3.1179e-08]], device='cuda:0') +target= tensor([1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1], device='cuda:0') +eval_loss= tensor(1.3133, device='cuda:0') +eval_acc1= tensor(0., device='cuda:0') +------Training------- +batch= 0 +torch.Size([32, 16, 256]) +torch.Size([32, 2]) +torch.Size([32]) +LabelSmoothingCrossEntropy() +confidence, nll_loss, self.smoothing, smooth_loss +0.9 tensor([0.3133, 0.3133, 1.3133, 0.3133, 0.3133, 0.3133, 0.3133, 1.3133, 0.3133, + 0.3133, 0.3133, 0.3133, 0.3133, 0.3133, 0.3133, 0.3133, 0.3133, 0.3133, + 0.3133, 1.3133, 0.3133, 0.3133, 0.3133, 1.3133, 0.3133, 0.3133, 0.3133, + 0.3133, 0.3133, 0.3133, 0.3133, 0.3133], device='cuda:0', + grad_fn=) 0.1 tensor([0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, + 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, + 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, + 0.8133, 0.8133, 0.8133, 0.8133, 0.8133], device='cuda:0', + grad_fn=) +loss= tensor(0.4758, device='cuda:0', grad_fn=) +batch= 1 +torch.Size([32, 16, 256]) +torch.Size([32, 2]) +torch.Size([32]) +LabelSmoothingCrossEntropy() +confidence, nll_loss, self.smoothing, smooth_loss +0.9 tensor([0.3133, 0.3133, 0.3133, 0.3133, 0.3133, 0.3133, 0.3133, 1.3133, 0.3133, + 0.3133, 0.3133, 0.3133, 0.3133, 0.3133, 1.3133, 0.3133, 0.3133, 0.3133, + 0.3133, 0.3133, 0.3133, 0.3133, 0.3133, 0.3133, 1.3133, 1.3133, 0.3133, + 0.3133, 0.3133, 0.3133, 0.3133, 1.3133], device='cuda:0', + grad_fn=) 0.1 tensor([0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, + 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, + 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, + 0.8133, 0.8133, 0.8133, 0.8133, 0.8133], device='cuda:0', + grad_fn=) +loss= tensor(0.5039, device='cuda:0', grad_fn=) +batch= 2 +torch.Size([32, 16, 256]) +torch.Size([32, 2]) +torch.Size([32]) +LabelSmoothingCrossEntropy() +confidence, nll_loss, self.smoothing, smooth_loss +0.9 tensor([0.3133, 1.3133, 0.3133, 0.3133, 0.3133, 0.3133, 0.3133, 0.3133, 1.3133, + 0.3133, 0.3133, 0.3133, 0.3133, 0.3133, 0.3133, 0.3133, 0.3133, 0.3133, + 0.3133, 0.3133, 0.3133, 1.3133, 0.3133, 0.3133, 0.3133, 0.3133, 1.3133, + 1.3133, 1.3133, 0.3133, 0.3133, 0.3133], device='cuda:0', + grad_fn=) 0.1 tensor([0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, + 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, + 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, + 0.8133, 0.8133, 0.8133, 0.8133, 0.8133], device='cuda:0', + grad_fn=) +loss= tensor(0.5320, device='cuda:0', grad_fn=) +batch= 3 +torch.Size([32, 16, 256]) +torch.Size([32, 2]) +torch.Size([32]) +LabelSmoothingCrossEntropy() +confidence, nll_loss, self.smoothing, smooth_loss +0.9 tensor([0.3133, 0.3133, 0.3133, 0.3133, 0.3133, 0.3133, 0.3133, 0.3133, 0.3133, + 0.3133, 0.3133, 1.3133, 0.3133, 0.3133, 0.3133, 0.3133, 0.3133, 0.3133, + 0.3133, 0.3133, 0.3133, 0.3133, 0.3133, 0.3133, 0.3133, 0.3133, 0.3133, + 0.3133, 0.3133, 0.3133, 1.3133, 0.3133], device='cuda:0', + grad_fn=) 0.1 tensor([0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, + 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, + 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, + 0.8133, 0.8133, 0.8133, 0.8133, 0.8133], device='cuda:0', + grad_fn=) +loss= tensor(0.4195, device='cuda:0', grad_fn=) +batch= 4 +torch.Size([32, 16, 256]) +torch.Size([32, 2]) +torch.Size([32]) +LabelSmoothingCrossEntropy() +confidence, nll_loss, self.smoothing, smooth_loss +0.9 tensor([0.3133, 0.3133, 0.3133, 0.3133, 0.3133, 0.3133, 0.3133, 0.3133, 0.3133, + 0.3133, 1.3133, 0.3133, 0.3133, 0.3133, 1.3133, 0.3133, 0.3133, 0.3133, + 0.3133, 0.3133, 0.3133, 0.3133, 0.3133, 0.3133, 0.3133, 0.3133, 0.3133, + 0.3133, 0.3133, 0.3133, 1.3133, 1.3133], device='cuda:0', + grad_fn=) 0.1 tensor([0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, + 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, + 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, + 0.8133, 0.8133, 0.8133, 0.8133, 0.8133], device='cuda:0', + grad_fn=) +loss= tensor(0.4758, device='cuda:0', grad_fn=) +batch= 5 +torch.Size([32, 16, 256]) +torch.Size([32, 2]) +torch.Size([32]) +LabelSmoothingCrossEntropy() +confidence, nll_loss, self.smoothing, smooth_loss +0.9 tensor([0.3133, 0.3133, 1.3133, 0.3133, 0.3133, 0.3133, 0.3133, 0.3133, 0.3133, + 0.3133, 0.3133, 0.3133, 0.3133, 0.3133, 0.3133, 0.3133, 0.3133, 1.3133, + 0.3133, 0.3133, 0.3133, 0.3133, 0.3133, 0.3133, 0.3133, 0.3133, 0.3133, + 1.3133, 0.3133, 0.3133, 0.3133, 0.3133], device='cuda:0', + grad_fn=) 0.1 tensor([0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, + 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, + 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, + 0.8133, 0.8133, 0.8133, 0.8133, 0.8133], device='cuda:0', + grad_fn=) +loss= tensor(0.4476, device='cuda:0', grad_fn=) +batch= 6 +torch.Size([32, 16, 256]) +torch.Size([32, 2]) +torch.Size([32]) +LabelSmoothingCrossEntropy() +confidence, nll_loss, self.smoothing, smooth_loss +0.9 tensor([1.3133, 0.3133, 0.3133, 0.3133, 0.3133, 0.3133, 0.3133, 0.3133, 0.3133, + 0.3133, 0.3133, 0.3133, 0.3133, 0.3133, 0.3133, 0.3133, 0.3133, 0.3133, + 0.3133, 0.3133, 1.3133, 0.3133, 0.3133, 1.3133, 1.3133, 1.3133, 0.3133, + 0.3133, 1.3133, 0.3133, 0.3133, 0.3133], device='cuda:0', + grad_fn=) 0.1 tensor([0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, + 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, + 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, + 0.8133, 0.8133, 0.8133, 0.8133, 0.8133], device='cuda:0', + grad_fn=) +loss= tensor(0.5320, device='cuda:0', grad_fn=) +batch= 7 +torch.Size([32, 16, 256]) +torch.Size([32, 2]) +torch.Size([32]) +LabelSmoothingCrossEntropy() +confidence, nll_loss, self.smoothing, smooth_loss +0.9 tensor([0.3133, 0.3133, 0.3133, 0.3133, 0.3133, 0.3133, 0.3133, 0.3133, 0.3133, + 0.3133, 0.3133, 1.3133, 0.3133, 0.3133, 0.3133, 0.3133, 0.3133, 0.3133, + 0.3133, 0.3133, 0.3133, 0.3133, 0.3133, 0.3133, 0.3133, 0.3133, 0.3133, + 0.3133, 0.3133, 0.3133, 0.3133, 0.3133], device='cuda:0', + grad_fn=) 0.1 tensor([0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, + 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, + 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, + 0.8133, 0.8133, 0.8133, 0.8133, 0.8133], device='cuda:0', + grad_fn=) +loss= tensor(0.3914, device='cuda:0', grad_fn=) +batch= 8 +torch.Size([32, 16, 256]) +torch.Size([32, 2]) +torch.Size([32]) +LabelSmoothingCrossEntropy() +confidence, nll_loss, self.smoothing, smooth_loss +0.9 tensor([0.3133, 0.3133, 0.3133, 0.3133, 0.3133, 0.3133, 0.3133, 0.3133, 0.3133, + 0.3133, 0.3133, 0.3133, 0.3133, 0.3133, 0.3133, 0.3133, 0.3133, 1.3133, + 0.3133, 0.3133, 0.3133, 0.3133, 0.3133, 0.3133, 0.3133, 0.3133, 0.3133, + 0.3133, 0.3133, 0.3133, 0.3133, 0.3133], device='cuda:0', + grad_fn=) 0.1 tensor([0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, + 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, + 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, + 0.8133, 0.8133, 0.8133, 0.8133, 0.8133], device='cuda:0', + grad_fn=) +loss= tensor(0.3914, device='cuda:0', grad_fn=) +------Validating------- +output = tensor([[1.0000e+00, 3.4278e-08], + [1.0000e+00, 6.0935e-08], + [1.0000e+00, 5.2407e-08], + [1.0000e+00, 5.2227e-08], + [1.0000e+00, 2.9130e-08], + [1.0000e+00, 3.5434e-08], + [1.0000e+00, 5.2961e-08], + [1.0000e+00, 2.4625e-08], + [1.0000e+00, 3.3887e-08], + [1.0000e+00, 3.9828e-08], + [1.0000e+00, 4.6137e-08], + [1.0000e+00, 4.5370e-08], + [1.0000e+00, 3.9566e-08], + [1.0000e+00, 2.7548e-08], + [1.0000e+00, 1.9131e-08], + [1.0000e+00, 2.4353e-08], + [1.0000e+00, 3.3183e-08], + [1.0000e+00, 3.1162e-08], + [1.0000e+00, 2.8294e-08], + [1.0000e+00, 2.8659e-08], + [1.0000e+00, 3.4564e-08], + [1.0000e+00, 3.7042e-08], + [1.0000e+00, 4.4649e-08], + [1.0000e+00, 2.7560e-08], + [1.0000e+00, 4.3316e-08], + [1.0000e+00, 5.1779e-08], + [1.0000e+00, 6.0494e-08], + [1.0000e+00, 5.4378e-08], + [1.0000e+00, 5.9353e-08], + [1.0000e+00, 5.2782e-08], + [1.0000e+00, 6.3711e-08], + [1.0000e+00, 3.1179e-08]], device='cuda:0') +target= tensor([1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1], device='cuda:0') +eval_loss= tensor(1.3133, device='cuda:0') +eval_acc1= tensor(0., device='cuda:0') +------Training------- +batch= 0 +torch.Size([32, 16, 256]) +torch.Size([32, 2]) +torch.Size([32]) +LabelSmoothingCrossEntropy() +confidence, nll_loss, self.smoothing, smooth_loss +0.9 tensor([1.3133, 0.3133, 0.3133, 0.3133, 1.3133, 0.3133, 0.3133, 0.3133, 0.3133, + 0.3133, 0.3133, 0.3133, 0.3133, 0.3133, 0.3133, 1.3133, 0.3133, 0.3133, + 1.3133, 0.3133, 0.3133, 1.3133, 0.3133, 0.3133, 0.3133, 0.3133, 1.3133, + 0.3133, 0.3133, 0.3133, 0.3133, 0.3133], device='cuda:0', + grad_fn=) 0.1 tensor([0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, + 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, + 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, + 0.8133, 0.8133, 0.8133, 0.8133, 0.8133], device='cuda:0', + grad_fn=) +loss= tensor(0.5320, device='cuda:0', grad_fn=) +batch= 1 +torch.Size([32, 16, 256]) +torch.Size([32, 2]) +torch.Size([32]) +LabelSmoothingCrossEntropy() +confidence, nll_loss, self.smoothing, smooth_loss +0.9 tensor([0.3133, 0.3133, 0.3133, 0.3133, 0.3133, 0.3133, 0.3133, 0.3133, 0.3133, + 1.3133, 0.3133, 0.3133, 0.3133, 0.3133, 0.3133, 0.3133, 1.3133, 0.3133, + 0.3133, 0.3133, 0.3133, 0.3133, 0.3133, 0.3133, 0.3133, 0.3133, 0.3133, + 0.3133, 0.3133, 0.3133, 0.3133, 0.3133], device='cuda:0', + grad_fn=) 0.1 tensor([0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, + 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, + 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, + 0.8133, 0.8133, 0.8133, 0.8133, 0.8133], device='cuda:0', + grad_fn=) +loss= tensor(0.4195, device='cuda:0', grad_fn=) +batch= 2 +torch.Size([32, 16, 256]) +torch.Size([32, 2]) +torch.Size([32]) +LabelSmoothingCrossEntropy() +confidence, nll_loss, self.smoothing, smooth_loss +0.9 tensor([0.3133, 0.3133, 0.3133, 0.3133, 0.3133, 0.3133, 0.3133, 0.3133, 0.3133, + 0.3133, 0.3133, 0.3133, 0.3133, 0.3133, 0.3133, 1.3133, 0.3133, 0.3133, + 0.3133, 0.3133, 0.3133, 0.3133, 0.3133, 0.3133, 1.3133, 0.3133, 0.3133, + 0.3133, 0.3133, 0.3133, 0.3133, 0.3133], device='cuda:0', + grad_fn=) 0.1 tensor([0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, + 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, + 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, + 0.8133, 0.8133, 0.8133, 0.8133, 0.8133], device='cuda:0', + grad_fn=) +loss= tensor(0.4195, device='cuda:0', grad_fn=) +batch= 3 +torch.Size([32, 16, 256]) +torch.Size([32, 2]) +torch.Size([32]) +LabelSmoothingCrossEntropy() +confidence, nll_loss, self.smoothing, smooth_loss +0.9 tensor([0.3133, 0.3133, 0.3133, 0.3133, 0.3133, 0.3133, 1.3133, 0.3133, 1.3133, + 0.3133, 0.3133, 0.3133, 0.3133, 0.3133, 0.3133, 1.3133, 0.3133, 0.3133, + 0.3133, 0.3133, 0.3133, 0.3133, 0.3133, 1.3133, 1.3133, 0.3133, 0.3133, + 0.3133, 0.3133, 0.3133, 0.3133, 0.3133], device='cuda:0', + grad_fn=) 0.1 tensor([0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, + 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, + 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, + 0.8133, 0.8133, 0.8133, 0.8133, 0.8133], device='cuda:0', + grad_fn=) +loss= tensor(0.5039, device='cuda:0', grad_fn=) +batch= 4 +torch.Size([32, 16, 256]) +torch.Size([32, 2]) +torch.Size([32]) +LabelSmoothingCrossEntropy() +confidence, nll_loss, self.smoothing, smooth_loss +0.9 tensor([0.3133, 0.3133, 0.3133, 0.3133, 0.3133, 0.3133, 0.3133, 0.3133, 0.3133, + 0.3133, 1.3133, 0.3133, 0.3133, 0.3133, 0.3133, 0.3133, 0.3133, 0.3133, + 0.3133, 0.3133, 0.3133, 0.3133, 0.3133, 0.3133, 0.3133, 0.3133, 0.3133, + 0.3133, 0.3133, 1.3133, 0.3133, 0.3133], device='cuda:0', + grad_fn=) 0.1 tensor([0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, + 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, + 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, + 0.8133, 0.8133, 0.8133, 0.8133, 0.8133], device='cuda:0', + grad_fn=) +loss= tensor(0.4195, device='cuda:0', grad_fn=) +batch= 5 +torch.Size([32, 16, 256]) +torch.Size([32, 2]) +torch.Size([32]) +LabelSmoothingCrossEntropy() +confidence, nll_loss, self.smoothing, smooth_loss +0.9 tensor([1.3133, 0.3133, 0.3133, 0.3133, 0.3133, 0.3133, 1.3133, 1.3133, 0.3133, + 0.3133, 0.3133, 0.3133, 0.3133, 0.3133, 0.3133, 0.3133, 0.3133, 1.3133, + 0.3133, 1.3133, 0.3133, 1.3133, 0.3133, 0.3133, 0.3133, 0.3133, 0.3133, + 0.3133, 0.3133, 0.3133, 0.3133, 0.3133], device='cuda:0', + grad_fn=) 0.1 tensor([0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, + 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, + 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, + 0.8133, 0.8133, 0.8133, 0.8133, 0.8133], device='cuda:0', + grad_fn=) +loss= tensor(0.5320, device='cuda:0', grad_fn=) +batch= 6 +torch.Size([32, 16, 256]) +torch.Size([32, 2]) +torch.Size([32]) +LabelSmoothingCrossEntropy() +confidence, nll_loss, self.smoothing, smooth_loss +0.9 tensor([1.3133, 1.3133, 1.3133, 0.3133, 0.3133, 0.3133, 0.3133, 0.3133, 0.3133, + 0.3133, 0.3133, 0.3133, 0.3133, 0.3133, 0.3133, 0.3133, 0.3133, 0.3133, + 0.3133, 0.3133, 0.3133, 0.3133, 0.3133, 0.3133, 0.3133, 0.3133, 0.3133, + 0.3133, 0.3133, 0.3133, 0.3133, 1.3133], device='cuda:0', + grad_fn=) 0.1 tensor([0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, + 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, + 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, + 0.8133, 0.8133, 0.8133, 0.8133, 0.8133], device='cuda:0', + grad_fn=) +loss= tensor(0.4758, device='cuda:0', grad_fn=) +batch= 7 +torch.Size([32, 16, 256]) +torch.Size([32, 2]) +torch.Size([32]) +LabelSmoothingCrossEntropy() +confidence, nll_loss, self.smoothing, smooth_loss +0.9 tensor([1.3133, 0.3133, 0.3133, 0.3133, 0.3133, 1.3133, 0.3133, 0.3133, 0.3133, + 0.3133, 0.3133, 0.3133, 0.3133, 0.3133, 0.3133, 0.3133, 0.3133, 0.3133, + 0.3133, 0.3133, 0.3133, 0.3133, 0.3133, 0.3133, 0.3133, 0.3133, 0.3133, + 0.3133, 0.3133, 0.3133, 0.3133, 1.3133], device='cuda:0', + grad_fn=) 0.1 tensor([0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, + 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, + 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, + 0.8133, 0.8133, 0.8133, 0.8133, 0.8133], device='cuda:0', + grad_fn=) +loss= tensor(0.4476, device='cuda:0', grad_fn=) +batch= 8 +torch.Size([32, 16, 256]) +torch.Size([32, 2]) +torch.Size([32]) +LabelSmoothingCrossEntropy() +confidence, nll_loss, self.smoothing, smooth_loss +0.9 tensor([0.3133, 0.3133, 0.3133, 0.3133, 0.3133, 0.3133, 0.3133, 0.3133, 0.3133, + 0.3133, 0.3133, 0.3133, 0.3133, 0.3133, 0.3133, 0.3133, 0.3133, 0.3133, + 0.3133, 0.3133, 0.3133, 0.3133, 0.3133, 0.3133, 0.3133, 1.3133, 0.3133, + 0.3133, 1.3133, 0.3133, 0.3133, 0.3133], device='cuda:0', + grad_fn=) 0.1 tensor([0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, + 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, + 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, + 0.8133, 0.8133, 0.8133, 0.8133, 0.8133], device='cuda:0', + grad_fn=) +loss= tensor(0.4195, device='cuda:0', grad_fn=) +------Validating------- +output = tensor([[1.0000e+00, 3.4278e-08], + [1.0000e+00, 6.0935e-08], + [1.0000e+00, 5.2407e-08], + [1.0000e+00, 5.2227e-08], + [1.0000e+00, 2.9130e-08], + [1.0000e+00, 3.5434e-08], + [1.0000e+00, 5.2961e-08], + [1.0000e+00, 2.4625e-08], + [1.0000e+00, 3.3887e-08], + [1.0000e+00, 3.9828e-08], + [1.0000e+00, 4.6137e-08], + [1.0000e+00, 4.5370e-08], + [1.0000e+00, 3.9566e-08], + [1.0000e+00, 2.7548e-08], + [1.0000e+00, 1.9131e-08], + [1.0000e+00, 2.4353e-08], + [1.0000e+00, 3.3183e-08], + [1.0000e+00, 3.1162e-08], + [1.0000e+00, 2.8294e-08], + [1.0000e+00, 2.8659e-08], + [1.0000e+00, 3.4564e-08], + [1.0000e+00, 3.7042e-08], + [1.0000e+00, 4.4649e-08], + [1.0000e+00, 2.7560e-08], + [1.0000e+00, 4.3316e-08], + [1.0000e+00, 5.1779e-08], + [1.0000e+00, 6.0494e-08], + [1.0000e+00, 5.4378e-08], + [1.0000e+00, 5.9353e-08], + [1.0000e+00, 5.2782e-08], + [1.0000e+00, 6.3711e-08], + [1.0000e+00, 3.1179e-08]], device='cuda:0') +target= tensor([1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1], device='cuda:0') +eval_loss= tensor(1.3133, device='cuda:0') +eval_acc1= tensor(0., device='cuda:0') +------Training------- +batch= 0 +torch.Size([32, 16, 256]) +torch.Size([32, 2]) +torch.Size([32]) +LabelSmoothingCrossEntropy() +confidence, nll_loss, self.smoothing, smooth_loss +0.9 tensor([0.3133, 0.3133, 0.3133, 0.3133, 1.3133, 0.3133, 0.3133, 0.3133, 0.3133, + 0.3133, 0.3133, 0.3133, 0.3133, 0.3133, 0.3133, 0.3133, 0.3133, 0.3133, + 0.3133, 0.3133, 0.3133, 0.3133, 0.3133, 1.3133, 0.3133, 1.3133, 0.3133, + 1.3133, 0.3133, 0.3133, 0.3133, 0.3133], device='cuda:0', + grad_fn=) 0.1 tensor([0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, + 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, + 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, + 0.8133, 0.8133, 0.8133, 0.8133, 0.8133], device='cuda:0', + grad_fn=) +loss= tensor(0.4758, device='cuda:0', grad_fn=) +batch= 1 +torch.Size([32, 16, 256]) +torch.Size([32, 2]) +torch.Size([32]) +LabelSmoothingCrossEntropy() +confidence, nll_loss, self.smoothing, smooth_loss +0.9 tensor([0.3133, 0.3133, 0.3133, 0.3133, 0.3133, 0.3133, 0.3133, 0.3133, 0.3133, + 0.3133, 1.3133, 0.3133, 0.3133, 0.3133, 0.3133, 0.3133, 0.3133, 0.3133, + 0.3133, 1.3133, 0.3133, 0.3133, 0.3133, 0.3133, 0.3133, 0.3133, 0.3133, + 0.3133, 0.3133, 0.3133, 0.3133, 0.3133], device='cuda:0', + grad_fn=) 0.1 tensor([0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, + 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, + 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, + 0.8133, 0.8133, 0.8133, 0.8133, 0.8133], device='cuda:0', + grad_fn=) +loss= tensor(0.4195, device='cuda:0', grad_fn=) +batch= 2 +torch.Size([32, 16, 256]) +torch.Size([32, 2]) +torch.Size([32]) +LabelSmoothingCrossEntropy() +confidence, nll_loss, self.smoothing, smooth_loss +0.9 tensor([0.3133, 0.3133, 0.3133, 0.3133, 0.3133, 1.3133, 1.3133, 0.3133, 0.3133, + 0.3133, 0.3133, 0.3133, 0.3133, 0.3133, 0.3133, 0.3133, 1.3133, 0.3133, + 0.3133, 0.3133, 0.3133, 0.3133, 1.3133, 0.3133, 0.3133, 0.3133, 0.3133, + 0.3133, 0.3133, 0.3133, 1.3133, 0.3133], device='cuda:0', + grad_fn=) 0.1 tensor([0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, + 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, + 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, + 0.8133, 0.8133, 0.8133, 0.8133, 0.8133], device='cuda:0', + grad_fn=) +loss= tensor(0.5039, device='cuda:0', grad_fn=) +batch= 3 +torch.Size([32, 16, 256]) +torch.Size([32, 2]) +torch.Size([32]) +LabelSmoothingCrossEntropy() +confidence, nll_loss, self.smoothing, smooth_loss +0.9 tensor([0.3133, 0.3133, 0.3133, 1.3133, 0.3133, 0.3133, 0.3133, 0.3133, 0.3133, + 0.3133, 0.3133, 0.3133, 0.3133, 0.3133, 0.3133, 0.3133, 0.3133, 0.3133, + 1.3133, 0.3133, 0.3133, 0.3133, 0.3133, 0.3133, 0.3133, 0.3133, 0.3133, + 0.3133, 0.3133, 0.3133, 0.3133, 0.3133], device='cuda:0', + grad_fn=) 0.1 tensor([0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, + 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, + 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, + 0.8133, 0.8133, 0.8133, 0.8133, 0.8133], device='cuda:0', + grad_fn=) +loss= tensor(0.4195, device='cuda:0', grad_fn=) +batch= 4 +torch.Size([32, 16, 256]) +torch.Size([32, 2]) +torch.Size([32]) +LabelSmoothingCrossEntropy() +confidence, nll_loss, self.smoothing, smooth_loss +0.9 tensor([0.3133, 0.3133, 0.3133, 0.3133, 0.3133, 0.3133, 0.3133, 0.3133, 1.3133, + 0.3133, 0.3133, 0.3133, 0.3133, 0.3133, 0.3133, 0.3133, 0.3133, 0.3133, + 0.3133, 0.3133, 1.3133, 0.3133, 0.3133, 0.3133, 0.3133, 0.3133, 1.3133, + 0.3133, 0.3133, 0.3133, 0.3133, 1.3133], device='cuda:0', + grad_fn=) 0.1 tensor([0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, + 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, + 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, + 0.8133, 0.8133, 0.8133, 0.8133, 0.8133], device='cuda:0', + grad_fn=) +loss= tensor(0.4758, device='cuda:0', grad_fn=) +batch= 5 +torch.Size([32, 16, 256]) +torch.Size([32, 2]) +torch.Size([32]) +LabelSmoothingCrossEntropy() +confidence, nll_loss, self.smoothing, smooth_loss +0.9 tensor([0.3133, 0.3133, 0.3133, 0.3133, 0.3133, 0.3133, 0.3133, 0.3133, 0.3133, + 0.3133, 0.3133, 1.3133, 0.3133, 0.3133, 0.3133, 0.3133, 0.3133, 1.3133, + 0.3133, 0.3133, 0.3133, 0.3133, 0.3133, 0.3133, 0.3133, 0.3133, 0.3133, + 0.3133, 0.3133, 0.3133, 1.3133, 0.3133], device='cuda:0', + grad_fn=) 0.1 tensor([0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, + 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, + 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, + 0.8133, 0.8133, 0.8133, 0.8133, 0.8133], device='cuda:0', + grad_fn=) +loss= tensor(0.4476, device='cuda:0', grad_fn=) +batch= 6 +torch.Size([32, 16, 256]) +torch.Size([32, 2]) +torch.Size([32]) +LabelSmoothingCrossEntropy() +confidence, nll_loss, self.smoothing, smooth_loss +0.9 tensor([0.3133, 0.3133, 0.3133, 0.3133, 0.3133, 0.3133, 0.3133, 1.3133, 0.3133, + 0.3133, 0.3133, 0.3133, 0.3133, 0.3133, 0.3133, 0.3133, 0.3133, 0.3133, + 1.3133, 0.3133, 0.3133, 0.3133, 0.3133, 0.3133, 0.3133, 0.3133, 0.3133, + 1.3133, 0.3133, 0.3133, 0.3133, 0.3133], device='cuda:0', + grad_fn=) 0.1 tensor([0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, + 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, + 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, + 0.8133, 0.8133, 0.8133, 0.8133, 0.8133], device='cuda:0', + grad_fn=) +loss= tensor(0.4476, device='cuda:0', grad_fn=) +batch= 7 +torch.Size([32, 16, 256]) +torch.Size([32, 2]) +torch.Size([32]) +LabelSmoothingCrossEntropy() +confidence, nll_loss, self.smoothing, smooth_loss +0.9 tensor([0.3133, 0.3133, 0.3133, 0.3133, 0.3133, 0.3133, 0.3133, 0.3133, 0.3133, + 0.3133, 1.3133, 0.3133, 1.3133, 0.3133, 0.3133, 0.3133, 0.3133, 0.3133, + 0.3133, 0.3133, 0.3133, 0.3133, 0.3133, 0.3133, 1.3133, 0.3133, 0.3133, + 0.3133, 0.3133, 0.3133, 1.3133, 1.3133], device='cuda:0', + grad_fn=) 0.1 tensor([0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, + 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, + 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, + 0.8133, 0.8133, 0.8133, 0.8133, 0.8133], device='cuda:0', + grad_fn=) +loss= tensor(0.5039, device='cuda:0', grad_fn=) +batch= 8 +torch.Size([32, 16, 256]) +torch.Size([32, 2]) +torch.Size([32]) +LabelSmoothingCrossEntropy() +confidence, nll_loss, self.smoothing, smooth_loss +0.9 tensor([0.3133, 1.3133, 0.3133, 1.3133, 0.3133, 0.3133, 0.3133, 0.3133, 0.3133, + 0.3133, 0.3133, 0.3133, 0.3133, 0.3133, 0.3133, 0.3133, 0.3133, 0.3133, + 0.3133, 0.3133, 0.3133, 0.3133, 0.3133, 0.3133, 0.3133, 1.3133, 1.3133, + 0.3133, 0.3133, 0.3133, 0.3133, 0.3133], device='cuda:0', + grad_fn=) 0.1 tensor([0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, + 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, + 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, + 0.8133, 0.8133, 0.8133, 0.8133, 0.8133], device='cuda:0', + grad_fn=) +loss= tensor(0.4758, device='cuda:0', grad_fn=) +------Validating------- +output = tensor([[1.0000e+00, 3.4278e-08], + [1.0000e+00, 6.0935e-08], + [1.0000e+00, 5.2407e-08], + [1.0000e+00, 5.2227e-08], + [1.0000e+00, 2.9130e-08], + [1.0000e+00, 3.5434e-08], + [1.0000e+00, 5.2961e-08], + [1.0000e+00, 2.4625e-08], + [1.0000e+00, 3.3887e-08], + [1.0000e+00, 3.9828e-08], + [1.0000e+00, 4.6137e-08], + [1.0000e+00, 4.5370e-08], + [1.0000e+00, 3.9566e-08], + [1.0000e+00, 2.7548e-08], + [1.0000e+00, 1.9131e-08], + [1.0000e+00, 2.4353e-08], + [1.0000e+00, 3.3183e-08], + [1.0000e+00, 3.1162e-08], + [1.0000e+00, 2.8294e-08], + [1.0000e+00, 2.8659e-08], + [1.0000e+00, 3.4564e-08], + [1.0000e+00, 3.7042e-08], + [1.0000e+00, 4.4649e-08], + [1.0000e+00, 2.7560e-08], + [1.0000e+00, 4.3316e-08], + [1.0000e+00, 5.1779e-08], + [1.0000e+00, 6.0494e-08], + [1.0000e+00, 5.4378e-08], + [1.0000e+00, 5.9353e-08], + [1.0000e+00, 5.2782e-08], + [1.0000e+00, 6.3711e-08], + [1.0000e+00, 3.1179e-08]], device='cuda:0') +target= tensor([1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1], device='cuda:0') +eval_loss= tensor(1.3133, device='cuda:0') +eval_acc1= tensor(0., device='cuda:0') +------Training------- +batch= 0 +torch.Size([32, 16, 256]) +torch.Size([32, 2]) +torch.Size([32]) +LabelSmoothingCrossEntropy() +confidence, nll_loss, self.smoothing, smooth_loss +0.9 tensor([1.3133, 0.3133, 0.3133, 0.3133, 0.3133, 0.3133, 0.3133, 0.3133, 0.3133, + 0.3133, 0.3133, 0.3133, 0.3133, 1.3133, 0.3133, 0.3133, 0.3133, 0.3133, + 0.3133, 0.3133, 0.3133, 0.3133, 0.3133, 1.3133, 1.3133, 0.3133, 1.3133, + 0.3133, 0.3133, 0.3133, 0.3133, 0.3133], device='cuda:0', + grad_fn=) 0.1 tensor([0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, + 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, + 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, + 0.8133, 0.8133, 0.8133, 0.8133, 0.8133], device='cuda:0', + grad_fn=) +loss= tensor(0.5039, device='cuda:0', grad_fn=) +batch= 1 +torch.Size([32, 16, 256]) +torch.Size([32, 2]) +torch.Size([32]) +LabelSmoothingCrossEntropy() +confidence, nll_loss, self.smoothing, smooth_loss +0.9 tensor([0.3133, 0.3133, 0.3133, 0.3133, 0.3133, 0.3133, 0.3133, 0.3133, 0.3133, + 0.3133, 0.3133, 0.3133, 0.3133, 0.3133, 0.3133, 0.3133, 0.3133, 0.3133, + 0.3133, 0.3133, 0.3133, 0.3133, 1.3133, 0.3133, 0.3133, 0.3133, 0.3133, + 0.3133, 0.3133, 0.3133, 1.3133, 0.3133], device='cuda:0', + grad_fn=) 0.1 tensor([0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, + 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, + 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, + 0.8133, 0.8133, 0.8133, 0.8133, 0.8133], device='cuda:0', + grad_fn=) +loss= tensor(0.4195, device='cuda:0', grad_fn=) +batch= 2 +torch.Size([32, 16, 256]) +torch.Size([32, 2]) +torch.Size([32]) +LabelSmoothingCrossEntropy() +confidence, nll_loss, self.smoothing, smooth_loss +0.9 tensor([1.3133, 0.3133, 0.3133, 0.3133, 0.3133, 0.3133, 0.3133, 0.3133, 0.3133, + 0.3133, 0.3133, 0.3133, 0.3133, 0.3133, 0.3133, 0.3133, 0.3133, 0.3133, + 0.3133, 0.3133, 0.3133, 0.3133, 0.3133, 0.3133, 0.3133, 0.3133, 0.3133, + 0.3133, 0.3133, 0.3133, 0.3133, 0.3133], device='cuda:0', + grad_fn=) 0.1 tensor([0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, + 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, + 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, + 0.8133, 0.8133, 0.8133, 0.8133, 0.8133], device='cuda:0', + grad_fn=) +loss= tensor(0.3914, device='cuda:0', grad_fn=) +batch= 3 +torch.Size([32, 16, 256]) +torch.Size([32, 2]) +torch.Size([32]) +LabelSmoothingCrossEntropy() +confidence, nll_loss, self.smoothing, smooth_loss +0.9 tensor([0.3133, 0.3133, 1.3133, 0.3133, 0.3133, 0.3133, 0.3133, 0.3133, 0.3133, + 0.3133, 0.3133, 0.3133, 0.3133, 1.3133, 0.3133, 0.3133, 0.3133, 0.3133, + 0.3133, 0.3133, 0.3133, 0.3133, 0.3133, 0.3133, 0.3133, 0.3133, 0.3133, + 0.3133, 0.3133, 0.3133, 0.3133, 0.3133], device='cuda:0', + grad_fn=) 0.1 tensor([0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, + 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, + 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, + 0.8133, 0.8133, 0.8133, 0.8133, 0.8133], device='cuda:0', + grad_fn=) +loss= tensor(0.4195, device='cuda:0', grad_fn=) +batch= 4 +torch.Size([32, 16, 256]) +torch.Size([32, 2]) +torch.Size([32]) +LabelSmoothingCrossEntropy() +confidence, nll_loss, self.smoothing, smooth_loss +0.9 tensor([0.3133, 0.3133, 1.3133, 1.3133, 0.3133, 0.3133, 0.3133, 0.3133, 0.3133, + 0.3133, 0.3133, 0.3133, 1.3133, 0.3133, 0.3133, 0.3133, 0.3133, 0.3133, + 0.3133, 1.3133, 0.3133, 0.3133, 0.3133, 0.3133, 0.3133, 0.3133, 0.3133, + 1.3133, 0.3133, 0.3133, 0.3133, 0.3133], device='cuda:0', + grad_fn=) 0.1 tensor([0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, + 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, + 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, + 0.8133, 0.8133, 0.8133, 0.8133, 0.8133], device='cuda:0', + grad_fn=) +loss= tensor(0.5039, device='cuda:0', grad_fn=) +batch= 5 +torch.Size([32, 16, 256]) +torch.Size([32, 2]) +torch.Size([32]) +LabelSmoothingCrossEntropy() +confidence, nll_loss, self.smoothing, smooth_loss +0.9 tensor([0.3133, 0.3133, 0.3133, 0.3133, 0.3133, 0.3133, 0.3133, 0.3133, 0.3133, + 0.3133, 0.3133, 0.3133, 0.3133, 0.3133, 0.3133, 0.3133, 0.3133, 0.3133, + 0.3133, 0.3133, 0.3133, 0.3133, 1.3133, 0.3133, 0.3133, 1.3133, 0.3133, + 0.3133, 0.3133, 0.3133, 1.3133, 0.3133], device='cuda:0', + grad_fn=) 0.1 tensor([0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, + 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, + 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, + 0.8133, 0.8133, 0.8133, 0.8133, 0.8133], device='cuda:0', + grad_fn=) +loss= tensor(0.4476, device='cuda:0', grad_fn=) +batch= 6 +torch.Size([32, 16, 256]) +torch.Size([32, 2]) +torch.Size([32]) +LabelSmoothingCrossEntropy() +confidence, nll_loss, self.smoothing, smooth_loss +0.9 tensor([0.3133, 1.3133, 0.3133, 0.3133, 1.3133, 1.3133, 0.3133, 1.3133, 0.3133, + 0.3133, 0.3133, 0.3133, 0.3133, 1.3133, 1.3133, 0.3133, 0.3133, 0.3133, + 0.3133, 0.3133, 0.3133, 0.3133, 0.3133, 0.3133, 0.3133, 0.3133, 0.3133, + 0.3133, 0.3133, 0.3133, 0.3133, 0.3133], device='cuda:0', + grad_fn=) 0.1 tensor([0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, + 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, + 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, + 0.8133, 0.8133, 0.8133, 0.8133, 0.8133], device='cuda:0', + grad_fn=) +loss= tensor(0.5320, device='cuda:0', grad_fn=) +batch= 7 +torch.Size([32, 16, 256]) +torch.Size([32, 2]) +torch.Size([32]) +LabelSmoothingCrossEntropy() +confidence, nll_loss, self.smoothing, smooth_loss +0.9 tensor([0.3133, 0.3133, 0.3133, 0.3133, 1.3133, 0.3133, 0.3133, 0.3133, 0.3133, + 0.3133, 0.3133, 1.3133, 0.3133, 0.3133, 0.3133, 0.3133, 0.3133, 0.3133, + 0.3133, 1.3133, 0.3133, 0.3133, 0.3133, 0.3133, 0.3133, 0.3133, 0.3133, + 0.3133, 0.3133, 0.3133, 1.3133, 1.3133], device='cuda:0', + grad_fn=) 0.1 tensor([0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, + 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, + 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, + 0.8133, 0.8133, 0.8133, 0.8133, 0.8133], device='cuda:0', + grad_fn=) +loss= tensor(0.5039, device='cuda:0', grad_fn=) +batch= 8 +torch.Size([32, 16, 256]) +torch.Size([32, 2]) +torch.Size([32]) +LabelSmoothingCrossEntropy() +confidence, nll_loss, self.smoothing, smooth_loss +0.9 tensor([0.3133, 0.3133, 0.3133, 0.3133, 0.3133, 1.3133, 0.3133, 0.3133, 0.3133, + 0.3133, 0.3133, 0.3133, 0.3133, 0.3133, 0.3133, 0.3133, 0.3133, 0.3133, + 0.3133, 0.3133, 0.3133, 1.3133, 0.3133, 0.3133, 0.3133, 0.3133, 0.3133, + 0.3133, 0.3133, 1.3133, 0.3133, 0.3133], device='cuda:0', + grad_fn=) 0.1 tensor([0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, + 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, + 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, 0.8133, + 0.8133, 0.8133, 0.8133, 0.8133, 0.8133], device='cuda:0', + grad_fn=) +loss= tensor(0.4476, device='cuda:0', grad_fn=) +------Validating------- +output = tensor([[1.0000e+00, 3.4278e-08], + [1.0000e+00, 6.0935e-08], + [1.0000e+00, 5.2407e-08], + [1.0000e+00, 5.2227e-08], + [1.0000e+00, 2.9130e-08], + [1.0000e+00, 3.5434e-08], + [1.0000e+00, 5.2961e-08], + [1.0000e+00, 2.4625e-08], + [1.0000e+00, 3.3887e-08], + [1.0000e+00, 3.9828e-08], + [1.0000e+00, 4.6137e-08], + [1.0000e+00, 4.5370e-08], + [1.0000e+00, 3.9566e-08], + [1.0000e+00, 2.7548e-08], + [1.0000e+00, 1.9131e-08], + [1.0000e+00, 2.4353e-08], + [1.0000e+00, 3.3183e-08], + [1.0000e+00, 3.1162e-08], + [1.0000e+00, 2.8294e-08], + [1.0000e+00, 2.8659e-08], + [1.0000e+00, 3.4564e-08], + [1.0000e+00, 3.7042e-08], + [1.0000e+00, 4.4649e-08], + [1.0000e+00, 2.7560e-08], + [1.0000e+00, 4.3316e-08], + [1.0000e+00, 5.1779e-08], + [1.0000e+00, 6.0494e-08], + [1.0000e+00, 5.4378e-08], + [1.0000e+00, 5.9353e-08], + [1.0000e+00, 5.2782e-08], + [1.0000e+00, 6.3711e-08], + [1.0000e+00, 3.1179e-08]], device='cuda:0') +target= tensor([1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1], device='cuda:0') +eval_loss= tensor(1.3133, device='cuda:0') +eval_acc1= tensor(0., device='cuda:0') diff --git a/timm/data/textdataset.py b/timm/data/textdataset.py index a1861d2d..30f11309 100644 --- a/timm/data/textdataset.py +++ b/timm/data/textdataset.py @@ -39,14 +39,19 @@ class TextDataset(Dataset): All_Videos.sort() #print(All_Videos) VideoPath = os.path.join(self.path, All_Videos[idx//32]) + #print(VideoPath) f = open(VideoPath, "r") feat = idx%32 words = f.read().split() features = np.float32(words[feat * 4096:feat * 4096 + 4096]) features = torch.tensor(features) - # features = torch.reshape(features, (16, 256)) + #print(features.shape) + if len(features) == 0: + print(idx) + print(VideoPath) + features = torch.reshape(features, (16, 256)) # features = torch.reshape(features, (196, 768)) - features = torch.reshape(features, (1, 4096)) + #features = torch.reshape(features, (1, 4096)) #print(VideoPath) if VideoPath.find('Normal') == -1: label = 0 @@ -54,9 +59,9 @@ class TextDataset(Dataset): label = 1 label = torch.tensor(label) - print(features.shape) + #print(features.shape) #print(features) - print(label.shape) + #print(label.shape) #print(label) return features, label diff --git a/timm/models/mlp_mixer.py b/timm/models/mlp_mixer.py index 0444c59a..5632a537 100644 --- a/timm/models/mlp_mixer.py +++ b/timm/models/mlp_mixer.py @@ -63,7 +63,7 @@ def _cfg(url='', **kwargs): default_cfgs = dict( - mixer_s32_224=_cfg(), + mixer_s32_224=_cfg(num_classes=2), mixer_s16_224=_cfg(), mixer_b32_224=_cfg(), mixer_b16_224=_cfg( @@ -264,12 +264,13 @@ class MlpMixer(nn.Module): super().__init__() self.num_classes = num_classes self.num_features = self.embed_dim = embed_dim # num_features for consistency with other models - self.initial_fc =nn.Linear(4096, 150528) - self.stem = PatchEmbed( - img_size=img_size, patch_size=patch_size, in_chans=in_chans, - embed_dim=embed_dim, norm_layer=norm_layer if stem_norm else None) + + ##initial_fc and stem not needed + #self.initial_fc =nn.Linear(4096, 150528) + #self.stem = PatchEmbed( + # img_size=img_size, patch_size=patch_size, in_chans=in_chans, + # embed_dim=embed_dim, norm_layer=norm_layer if stem_norm else None) # FIXME drop_path (stochastic depth scaling rule or all the same?) - #embed_dim=256 #print("num_classes:",self.num_classes, "embed_dim:", embed_dim) self.blocks = nn.Sequential(*[ block_layer( @@ -286,23 +287,24 @@ class MlpMixer(nn.Module): for _ in range(num_blocks)]) """ self.norm = norm_layer(embed_dim) - # self.head = nn.Linear(embed_dim, self.num_classes) if num_classes > 0 else nn.Identity() - self.head = nn.Sequential( - nn.Linear(embed_dim, self.num_classes), - nn.ReLU(), - nn.Dropout(p=0.3), - nn.Linear(self.num_classes, 1024), - nn.ReLU(), - nn.Dropout(p=0.3), - nn.Linear(1024, 512), - nn.ReLU(), - nn.Dropout(p=0.3), - nn.Linear(512, 256), - nn.ReLU(), - nn.Dropout(p=0.3), - nn.Linear(256, 2) - ) - self.sigmoid = nn.Sigmoid() + self.head = nn.Linear(embed_dim, self.num_classes) if num_classes > 0 else nn.Identity() + # self.head = nn.Sequential( + # nn.Linear(embed_dim, self.num_classes), + # nn.ReLU(), + # nn.Dropout(p=0.3), + # nn.Linear(self.num_classes, 1024), + # nn.ReLU(), + # nn.Dropout(p=0.3), + # nn.Linear(1024, 512), + # nn.ReLU(), + # nn.Dropout(p=0.3), + # nn.Linear(512, 256), + # nn.ReLU(), + # nn.Dropout(p=0.3), + # nn.Linear(256, 2) + # ) + #self.sigmoid = nn.Sigmoid() + self.sm = nn.Softmax(dim=1) self.init_weights(nlhb=nlhb) def init_weights(self, nlhb=False): @@ -318,23 +320,24 @@ class MlpMixer(nn.Module): def forward_features(self, x): #x = self.stem(x) - print("In_Model") + #print("In_Model") x = self.blocks(x) - print(x) + #print(x) x = self.norm(x) - print(x) + #print(x) x = x.mean(dim=1) - print(x) + #print(x) return x def forward(self, x): - x = self.initial_fc(x) - x = torch.reshape(x, (196, 768)) + #x = self.initial_fc(x) + #x = torch.reshape(x, (196, 768)) x = self.forward_features(x) x = self.head(x) - print(x) - x = self.sigmoid(x) - print(x) + #print(x) + #x = self.sigmoid(x) + #print(x) + x = self.sm(x) return x @@ -413,7 +416,8 @@ def mixer_s32_224(pretrained=False, **kwargs): """ Mixer-S/32 224x224 Paper: 'MLP-Mixer: An all-MLP Architecture for Vision' - https://arxiv.org/abs/2105.01601 """ - model_args = dict(patch_size=32, num_blocks=8, embed_dim=512, **kwargs) + #model_args = dict(patch_size=32, num_blocks=8, embed_dim=512, **kwargs) + model_args = dict(patch_size=16, num_blocks=8, embed_dim=256, **kwargs) model = _create_mixer('mixer_s32_224', pretrained=pretrained, **model_args) return model diff --git a/train.py b/train.py index b9f38ced..c9116938 100644 --- a/train.py +++ b/train.py @@ -679,10 +679,11 @@ def train_one_epoch( data_time_m = AverageMeter() losses_m = AverageMeter() + print("------Training-------") model.train() end = time.time() - print("loader_length=",len(loader)) + #print("loader_length=",len(loader)) last_idx = len(loader) - 1 num_updates = epoch * len(loader) for batch_idx, (input, target) in enumerate(loader): @@ -698,13 +699,15 @@ def train_one_epoch( with amp_autocast(): #print(model) + print(input.shape) output = model(input) print(output.shape) print(target.shape) #print(output) #print(target) - #print(loss_fn) + print(loss_fn) loss = loss_fn(output, target) + print("loss=", loss) if not args.distributed: losses_m.update(loss.item(), input.size(0)) @@ -785,6 +788,7 @@ def validate(model, loader, loss_fn, args, amp_autocast=suppress, log_suffix='') top1_m = AverageMeter() top5_m = AverageMeter() + print("------Validating-------") model.eval() end = time.time() @@ -809,8 +813,12 @@ def validate(model, loader, loss_fn, args, amp_autocast=suppress, log_suffix='') output = output.unfold(0, reduce_factor, reduce_factor).mean(dim=2) target = target[0:target.size(0):reduce_factor] + print("output =", output) + print("target=", target) loss = loss_fn(output, target) + print("eval_loss=", loss) acc1, acc5 = accuracy(output, target, topk=(1, 5)) + print("eval_acc1=", acc1) if args.distributed: reduced_loss = reduce_tensor(loss.data, args.world_size)