diff --git a/timm/models/davit.py b/timm/models/davit.py
index cc924aa4..e551cc61 100644
--- a/timm/models/davit.py
+++ b/timm/models/davit.py
@@ -341,88 +341,6 @@ class SpatialBlock(nn.Module):
         
         return x
 
-class SpatialBlockOld(nn.Module):
-    r""" Windows Block.
-    Args:
-        dim (int): Number of input channels.
-        num_heads (int): Number of attention heads.
-        window_size (int): Window size.
-        mlp_ratio (float): Ratio of mlp hidden dim to embedding dim.
-        qkv_bias (bool, optional): If True, add a learnable bias to query, key, value. Default: True
-        drop_path (float, optional): Stochastic depth rate. Default: 0.0
-        act_layer (nn.Module, optional): Activation layer. Default: nn.GELU
-        norm_layer (nn.Module, optional): Normalization layer.  Default: nn.LayerNorm
-    """
-
-    def __init__(self, dim, num_heads, window_size=7,
-                 mlp_ratio=4., qkv_bias=True, drop_path=0.,
-                 act_layer=nn.GELU, norm_layer=nn.LayerNorm,
-                 ffn=True, cpe_act=False):
-        super().__init__()
-        self.dim = dim
-        self.ffn = ffn
-        self.num_heads = num_heads
-        self.window_size = window_size
-        self.mlp_ratio = mlp_ratio
-        
-        self.cpe1 = ConvPosEnc(dim=dim, k=3, act=cpe_act)
-        self.norm1 = norm_layer(dim)
-        self.attn = WindowAttention(
-            dim,
-            window_size=to_2tuple(self.window_size),
-            num_heads=num_heads,
-            qkv_bias=qkv_bias)
-
-        self.drop_path = DropPath(drop_path) if drop_path > 0. else nn.Identity()
-        self.cpe2 = ConvPosEnc(dim=dim, k=3, act=cpe_act)
- 
-        if self.ffn:
-            self.norm2 = norm_layer(dim)
-            mlp_hidden_dim = int(dim * mlp_ratio)
-            self.mlp = Mlp(
-                in_features=dim,
-                hidden_features=mlp_hidden_dim,
-                act_layer=act_layer)
-
-
-    def forward(self, x : Tensor, size: Tuple[int, int]):
-
-        H, W = size
-        B, L, C = x.shape
-
-        shortcut = self.cpe1(x, size)
-        x = self.norm1(shortcut)
-        x = x.view(B, H, W, C)
-
-        pad_l = pad_t = 0
-        pad_r = (self.window_size - W % self.window_size) % self.window_size
-        pad_b = (self.window_size - H % self.window_size) % self.window_size
-        x = F.pad(x, (0, 0, pad_l, pad_r, pad_t, pad_b))
-        _, Hp, Wp, _ = x.shape
-
-        x_windows = window_partition(x, self.window_size)
-        x_windows = x_windows.view(-1, self.window_size * self.window_size, C)
-
-        # W-MSA/SW-MSA
-        attn_windows = self.attn(x_windows)
-
-        # merge windows
-        attn_windows = attn_windows.view(-1,
-                                         self.window_size,
-                                         self.window_size,
-                                         C)
-        x = window_reverse(attn_windows, self.window_size, Hp, Wp)
-
-        #if pad_r > 0 or pad_b > 0:
-        x = x[:, :H, :W, :].contiguous()
-
-        x = x.view(B, H * W, C)
-        x = shortcut + self.drop_path(x)
-
-        x = self.cpe2(x, size)
-        if self.ffn:
-            x = x + self.drop_path(self.mlp(self.norm2(x)))
-        return x, size
         
 class DaViTStage(nn.Module):
     def __init__(