Remove some redundant requires_grad=True from nn.Parameter in third party code

pull/1363/head
Ross Wightman 2 years ago
parent c5e0d1c700
commit 909705e7ff

@ -182,8 +182,8 @@ class Block(nn.Module):
self.mlp = Mlp(in_features=dim, hidden_features=mlp_hidden_dim, act_layer=act_layer, drop=drop)
if init_values:
self.gamma_1 = nn.Parameter(init_values * torch.ones(dim), requires_grad=True)
self.gamma_2 = nn.Parameter(init_values * torch.ones(dim), requires_grad=True)
self.gamma_1 = nn.Parameter(init_values * torch.ones(dim))
self.gamma_2 = nn.Parameter(init_values * torch.ones(dim))
else:
self.gamma_1, self.gamma_2 = None, None

@ -122,8 +122,8 @@ class LayerScaleBlockClassAttn(nn.Module):
self.norm2 = norm_layer(dim)
mlp_hidden_dim = int(dim * mlp_ratio)
self.mlp = mlp_block(in_features=dim, hidden_features=mlp_hidden_dim, act_layer=act_layer, drop=drop)
self.gamma_1 = nn.Parameter(init_values * torch.ones((dim)), requires_grad=True)
self.gamma_2 = nn.Parameter(init_values * torch.ones((dim)), requires_grad=True)
self.gamma_1 = nn.Parameter(init_values * torch.ones(dim))
self.gamma_2 = nn.Parameter(init_values * torch.ones(dim))
def forward(self, x, x_cls):
u = torch.cat((x_cls, x), dim=1)
@ -189,8 +189,8 @@ class LayerScaleBlock(nn.Module):
self.norm2 = norm_layer(dim)
mlp_hidden_dim = int(dim * mlp_ratio)
self.mlp = mlp_block(in_features=dim, hidden_features=mlp_hidden_dim, act_layer=act_layer, drop=drop)
self.gamma_1 = nn.Parameter(init_values * torch.ones((dim)), requires_grad=True)
self.gamma_2 = nn.Parameter(init_values * torch.ones((dim)), requires_grad=True)
self.gamma_1 = nn.Parameter(init_values * torch.ones(dim))
self.gamma_2 = nn.Parameter(init_values * torch.ones(dim))
def forward(self, x):
x = x + self.drop_path(self.gamma_1 * self.attn(self.norm1(x)))

@ -117,8 +117,8 @@ class PoolFormerBlock(nn.Module):
self.drop_path2 = DropPath(drop_path) if drop_path > 0. else nn.Identity()
if layer_scale_init_value:
self.layer_scale_1 = nn.Parameter(layer_scale_init_value * torch.ones(dim), requires_grad=True)
self.layer_scale_2 = nn.Parameter(layer_scale_init_value * torch.ones(dim), requires_grad=True)
self.layer_scale_1 = nn.Parameter(layer_scale_init_value * torch.ones(dim))
self.layer_scale_2 = nn.Parameter(layer_scale_init_value * torch.ones(dim))
else:
self.layer_scale_1 = None
self.layer_scale_2 = None

@ -230,8 +230,8 @@ class ClassAttentionBlock(nn.Module):
self.mlp = Mlp(in_features=dim, hidden_features=int(dim * mlp_ratio), act_layer=act_layer, drop=drop)
if eta is not None: # LayerScale Initialization (no layerscale when None)
self.gamma1 = nn.Parameter(eta * torch.ones(dim), requires_grad=True)
self.gamma2 = nn.Parameter(eta * torch.ones(dim), requires_grad=True)
self.gamma1 = nn.Parameter(eta * torch.ones(dim))
self.gamma2 = nn.Parameter(eta * torch.ones(dim))
else:
self.gamma1, self.gamma2 = 1.0, 1.0
@ -308,9 +308,9 @@ class XCABlock(nn.Module):
self.norm2 = norm_layer(dim)
self.mlp = Mlp(in_features=dim, hidden_features=int(dim * mlp_ratio), act_layer=act_layer, drop=drop)
self.gamma1 = nn.Parameter(eta * torch.ones(dim), requires_grad=True)
self.gamma3 = nn.Parameter(eta * torch.ones(dim), requires_grad=True)
self.gamma2 = nn.Parameter(eta * torch.ones(dim), requires_grad=True)
self.gamma1 = nn.Parameter(eta * torch.ones(dim))
self.gamma3 = nn.Parameter(eta * torch.ones(dim))
self.gamma2 = nn.Parameter(eta * torch.ones(dim))
def forward(self, x, H: int, W: int):
x = x + self.drop_path(self.gamma1 * self.attn(self.norm1(x)))

Loading…
Cancel
Save