parent
3ba6b55cb2
commit
b2c305c2aa
@ -0,0 +1,49 @@
|
|||||||
|
""" MLP module w/ dropout and configurable activation layer
|
||||||
|
|
||||||
|
Hacked together by / Copyright 2020 Ross Wightman
|
||||||
|
"""
|
||||||
|
from torch import nn as nn
|
||||||
|
|
||||||
|
|
||||||
|
class Mlp(nn.Module):
|
||||||
|
""" MLP as used in Vision Transformer, MLP-Mixer and related networks
|
||||||
|
"""
|
||||||
|
def __init__(self, in_features, hidden_features=None, out_features=None, act_layer=nn.GELU, drop=0.):
|
||||||
|
super().__init__()
|
||||||
|
out_features = out_features or in_features
|
||||||
|
hidden_features = hidden_features or in_features
|
||||||
|
self.fc1 = nn.Linear(in_features, hidden_features)
|
||||||
|
self.act = act_layer()
|
||||||
|
self.fc2 = nn.Linear(hidden_features, out_features)
|
||||||
|
self.drop = nn.Dropout(drop)
|
||||||
|
|
||||||
|
def forward(self, x):
|
||||||
|
x = self.fc1(x)
|
||||||
|
x = self.act(x)
|
||||||
|
x = self.drop(x)
|
||||||
|
x = self.fc2(x)
|
||||||
|
x = self.drop(x)
|
||||||
|
return x
|
||||||
|
|
||||||
|
|
||||||
|
class GluMlp(nn.Module):
|
||||||
|
""" MLP w/ GLU style gating
|
||||||
|
See: https://arxiv.org/abs/1612.08083, https://arxiv.org/abs/2002.05202
|
||||||
|
"""
|
||||||
|
def __init__(self, in_features, hidden_features=None, out_features=None, act_layer=nn.Sigmoid, drop=0.):
|
||||||
|
super().__init__()
|
||||||
|
out_features = out_features or in_features
|
||||||
|
hidden_features = hidden_features or in_features
|
||||||
|
self.fc1 = nn.Linear(in_features, hidden_features * 2)
|
||||||
|
self.act = act_layer()
|
||||||
|
self.fc2 = nn.Linear(hidden_features, out_features)
|
||||||
|
self.drop = nn.Dropout(drop)
|
||||||
|
|
||||||
|
def forward(self, x):
|
||||||
|
x = self.fc1(x)
|
||||||
|
x, gates = x.chunk(2, dim=-1)
|
||||||
|
x = x * self.act(gates)
|
||||||
|
x = self.drop(x)
|
||||||
|
x = self.fc2(x)
|
||||||
|
x = self.drop(x)
|
||||||
|
return x
|
@ -0,0 +1,36 @@
|
|||||||
|
""" Image to Patch Embedding using Conv2d
|
||||||
|
|
||||||
|
A convolution based approach to patchifying a 2D image w/ embedding projection.
|
||||||
|
|
||||||
|
Based on the impl in https://github.com/google-research/vision_transformer
|
||||||
|
|
||||||
|
Hacked together by / Copyright 2020 Ross Wightman
|
||||||
|
"""
|
||||||
|
|
||||||
|
from torch import nn as nn
|
||||||
|
|
||||||
|
from .helpers import to_2tuple
|
||||||
|
|
||||||
|
|
||||||
|
class PatchEmbed(nn.Module):
|
||||||
|
""" 2D Image to Patch Embedding
|
||||||
|
"""
|
||||||
|
def __init__(self, img_size=224, patch_size=16, in_chans=3, embed_dim=768, norm_layer=None):
|
||||||
|
super().__init__()
|
||||||
|
img_size = to_2tuple(img_size)
|
||||||
|
patch_size = to_2tuple(patch_size)
|
||||||
|
self.img_size = img_size
|
||||||
|
self.patch_size = patch_size
|
||||||
|
self.out_size = (img_size[0] // patch_size[0], img_size[1] // patch_size[1])
|
||||||
|
self.num_patches = self.out_size[0] * self.out_size[1]
|
||||||
|
|
||||||
|
self.proj = nn.Conv2d(in_chans, embed_dim, kernel_size=patch_size, stride=patch_size)
|
||||||
|
self.norm = norm_layer(embed_dim) if norm_layer else nn.Identity()
|
||||||
|
|
||||||
|
def forward(self, x):
|
||||||
|
B, C, H, W = x.shape
|
||||||
|
assert H == self.img_size[0] and W == self.img_size[1], \
|
||||||
|
f"Input image size ({H}*{W}) doesn't match model ({self.img_size[0]}*{self.img_size[1]})."
|
||||||
|
x = self.proj(x).flatten(2).transpose(1, 2)
|
||||||
|
x = self.norm(x)
|
||||||
|
return x
|
Loading…
Reference in new issue