Merge pull request #750 from drjinying/master

Specify "interpolation" mode in vision_transformer's resize_pos_embed
pull/771/head
Ross Wightman 3 years ago committed by GitHub
commit 72b227dcf5
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23

@ -377,7 +377,7 @@ def resize_pos_embed(posemb, posemb_new):
size_new = int(math.sqrt(num_blocks_new*seq_length_new))
# First change to (1, C, H, W)
posemb = deblockify(posemb, int(math.sqrt(seq_length_old))).permute(0, 3, 1, 2)
posemb = F.interpolate(posemb, size=[size_new, size_new], mode='bilinear')
posemb = F.interpolate(posemb, size=[size_new, size_new], mode='bicubic', align_corners=False)
# Now change to new (1, T, N, C)
posemb = blockify(posemb.permute(0, 2, 3, 1), int(math.sqrt(seq_length_new)))
return posemb

@ -494,7 +494,7 @@ def resize_pos_embed(posemb, posemb_new, num_tokens=1, gs_new=()):
assert len(gs_new) >= 2
_logger.info('Position embedding grid-size from %s to %s', [gs_old, gs_old], gs_new)
posemb_grid = posemb_grid.reshape(1, gs_old, gs_old, -1).permute(0, 3, 1, 2)
posemb_grid = F.interpolate(posemb_grid, size=gs_new, mode='bilinear')
posemb_grid = F.interpolate(posemb_grid, size=gs_new, mode='bicubic', align_corners=False)
posemb_grid = posemb_grid.permute(0, 2, 3, 1).reshape(1, gs_new[0] * gs_new[1], -1)
posemb = torch.cat([posemb_tok, posemb_grid], dim=1)
return posemb

Loading…
Cancel
Save