diff --git a/timm/models/layers/activations.py b/timm/models/layers/activations.py index 165b7951..6f8d2f89 100644 --- a/timm/models/layers/activations.py +++ b/timm/models/layers/activations.py @@ -12,7 +12,7 @@ from torch import nn as nn from torch.nn import functional as F -_USE_MEM_EFFICIENT_ISH = False +_USE_MEM_EFFICIENT_ISH = True if _USE_MEM_EFFICIENT_ISH: # This version reduces memory overhead of Swish during training by # recomputing torch.sigmoid(x) in backward instead of saving it.