From 23e7f7ad39b465a59744fac2ef94067673592db5 Mon Sep 17 00:00:00 2001
From: Kai Zhang <kaizh@fb.com>
Date: Sun, 19 Sep 2021 22:59:51 +0000
Subject: [PATCH 1/3] allow custom activation in SqueezeExcitation

---
 torchvision/models/efficientnet.py | 12 ++++++++++--
 1 file changed, 10 insertions(+), 2 deletions(-)

diff --git a/torchvision/models/efficientnet.py b/torchvision/models/efficientnet.py
index 06b2a301b6d..ba97b7adada 100644
--- a/torchvision/models/efficientnet.py
+++ b/torchvision/models/efficientnet.py
@@ -32,15 +32,23 @@
 
 
 class SqueezeExcitation(nn.Module):
-    def __init__(self, input_channels: int, squeeze_channels: int):
+    def __init__(
+        self,
+        input_channels: int,
+        squeeze_channels: int,
+        activation: Optional[Callable[..., nn.Module]] = None,
+    ) -> None:
         super().__init__()
         self.fc1 = nn.Conv2d(input_channels, squeeze_channels, 1)
         self.fc2 = nn.Conv2d(squeeze_channels, input_channels, 1)
+        if activation is None:
+            activation = nn.SiLU
+        self.activation = activation(inplace=True)
 
     def _scale(self, input: Tensor) -> Tensor:
         scale = F.adaptive_avg_pool2d(input, 1)
         scale = self.fc1(scale)
-        scale = F.silu(scale, inplace=True)
+        scale = self.activation(scale)
         scale = self.fc2(scale)
         return scale.sigmoid()
 

From 3933a462bb5f24199917c97e7c116ee34508f4af Mon Sep 17 00:00:00 2001
From: Kai Zhang <kaizh@fb.com>
Date: Mon, 20 Sep 2021 20:04:51 +0000
Subject: [PATCH 2/3] use ReLU as the default activation

---
 torchvision/models/efficientnet.py | 8 +++-----
 1 file changed, 3 insertions(+), 5 deletions(-)

diff --git a/torchvision/models/efficientnet.py b/torchvision/models/efficientnet.py
index ba97b7adada..8f49ff9c2f6 100644
--- a/torchvision/models/efficientnet.py
+++ b/torchvision/models/efficientnet.py
@@ -36,14 +36,12 @@ def __init__(
         self,
         input_channels: int,
         squeeze_channels: int,
-        activation: Optional[Callable[..., nn.Module]] = None,
+        activation: Callable[..., nn.Module] = nn.ReLU,
     ) -> None:
         super().__init__()
         self.fc1 = nn.Conv2d(input_channels, squeeze_channels, 1)
         self.fc2 = nn.Conv2d(squeeze_channels, input_channels, 1)
-        if activation is None:
-            activation = nn.SiLU
-        self.activation = activation(inplace=True)
+        self.activation = activation()
 
     def _scale(self, input: Tensor) -> Tensor:
         scale = F.adaptive_avg_pool2d(input, 1)
@@ -116,7 +114,7 @@ def __init__(self, cnf: MBConvConfig, stochastic_depth_prob: float, norm_layer:
 
         # squeeze and excitation
         squeeze_channels = max(1, cnf.input_channels // 4)
-        layers.append(se_layer(expanded_channels, squeeze_channels))
+        layers.append(se_layer(expanded_channels, squeeze_channels, activation=partial(nn.SiLU, inplace=True)))
 
         # project
         layers.append(ConvBNActivation(expanded_channels, cnf.out_channels, kernel_size=1, norm_layer=norm_layer,

From 4a94c13b217bca9a8c810e951048c81a2684844b Mon Sep 17 00:00:00 2001
From: Kai Zhang <kaizh@fb.com>
Date: Tue, 21 Sep 2021 17:25:04 +0000
Subject: [PATCH 3/3] make scale activation parameterizable

---
 torchvision/models/efficientnet.py | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/torchvision/models/efficientnet.py b/torchvision/models/efficientnet.py
index 8f49ff9c2f6..bad5b57b25b 100644
--- a/torchvision/models/efficientnet.py
+++ b/torchvision/models/efficientnet.py
@@ -37,18 +37,20 @@ def __init__(
         input_channels: int,
         squeeze_channels: int,
         activation: Callable[..., nn.Module] = nn.ReLU,
+        scale_activation: Callable[..., nn.Module] = nn.Sigmoid,
     ) -> None:
         super().__init__()
         self.fc1 = nn.Conv2d(input_channels, squeeze_channels, 1)
         self.fc2 = nn.Conv2d(squeeze_channels, input_channels, 1)
         self.activation = activation()
+        self.scale_activation = scale_activation()
 
     def _scale(self, input: Tensor) -> Tensor:
         scale = F.adaptive_avg_pool2d(input, 1)
         scale = self.fc1(scale)
         scale = self.activation(scale)
         scale = self.fc2(scale)
-        return scale.sigmoid()
+        return self.scale_activation(scale)
 
     def forward(self, input: Tensor) -> Tensor:
         scale = self._scale(input)