@@ -24,35 +24,68 @@ def __init__(self, in_channels: List[int], num_anchors: List[int], num_classes:
24
24
self .regression_head = SSDRegressionHead (in_channels , num_anchors )
25
25
26
26
27
- class SSDClassificationHead (nn .Module ):
28
- def __init__ (self , in_channels : List [ int ], num_anchors : List [ int ], num_classes : int ):
27
+ class SSDScoringHead (nn .Module ):
28
+ def __init__ (self , module_list : nn . ModuleList , num_columns : int ):
29
29
super ().__init__ ()
30
- self .cls_logits = nn .ModuleList ()
30
+ self .module_list = module_list
31
+ self .num_columns = num_columns
32
+
33
+ def get_result_from_module_list (self , x : Tensor , idx : int ) -> Tensor :
34
+ """
35
+ This is equivalent to self.module_list[idx](x),
36
+ but torchscript doesn't support this yet
37
+ """
38
+ num_blocks = len (self .module_list )
39
+ if idx < 0 :
40
+ idx += num_blocks
41
+ i = 0
42
+ out = x
43
+ for module in self .module_list :
44
+ if i == idx :
45
+ out = module (x )
46
+ i += 1
47
+ return out
48
+
49
+ def forward (self , x : List [Tensor ]) -> Tensor :
50
+ all_results = []
51
+
52
+ for i , features in enumerate (x ):
53
+ results = self .get_result_from_module_list (features , i )
54
+
55
+ # Permute output from (N, A * K, H, W) to (N, HWA, K).
56
+ N , _ , H , W = results .shape
57
+ results = results .view (N , - 1 , self .num_columns , H , W )
58
+ results = results .permute (0 , 3 , 4 , 1 , 2 )
59
+ results = results .reshape (N , - 1 , self .num_columns ) # Size=(N, HWA, K)
60
+
61
+ all_results .append (results )
62
+
63
+ return torch .cat (all_results , dim = 1 )
64
+
65
+
66
+ class SSDClassificationHead (SSDScoringHead ):
67
+ def __init__ (self , in_channels : List [int ], num_anchors : List [int ], num_classes : int ):
68
+ cls_logits = nn .ModuleList ()
31
69
for channels , anchors in zip (in_channels , num_anchors ):
32
- self .cls_logits .append (nn .Conv2d (channels , num_classes * anchors , kernel_size = 3 , padding = 1 ))
70
+ cls_logits .append (nn .Conv2d (channels , num_classes * anchors , kernel_size = 3 , padding = 1 ))
71
+ super ().__init__ (cls_logits , num_classes )
33
72
34
73
def compute_loss (self , targets : List [Dict [str , Tensor ]], head_outputs : Dict [str , Tensor ],
35
74
matched_idxs : List [Tensor ]) -> Tensor :
36
75
pass
37
76
38
- def forward (self , x : List [Tensor ]) -> Tensor :
39
- pass
40
77
41
-
42
- class SSDRegressionHead (nn .Module ):
78
+ class SSDRegressionHead (SSDScoringHead ):
43
79
def __init__ (self , in_channels : List [int ], num_anchors : List [int ]):
44
- super ().__init__ ()
45
- self .bbox_reg = nn .ModuleList ()
80
+ bbox_reg = nn .ModuleList ()
46
81
for channels , anchors in zip (in_channels , num_anchors ):
47
- self .bbox_reg .append (nn .Conv2d (channels , 4 * anchors , kernel_size = 3 , padding = 1 ))
82
+ bbox_reg .append (nn .Conv2d (channels , 4 * anchors , kernel_size = 3 , padding = 1 ))
83
+ super ().__init__ (bbox_reg , 4 )
48
84
49
85
def compute_loss (self , targets : List [Dict [str , Tensor ]], head_outputs : Dict [str , Tensor ], anchors : List [Tensor ],
50
86
matched_idxs : List [Tensor ]) -> Tensor :
51
87
pass
52
88
53
- def forward (self , x : List [Tensor ]) -> Tensor :
54
- pass
55
-
56
89
57
90
class SSD (RetinaNet ):
58
91
def __init__ (self , backbone : nn .Module , num_classes : int ,
@@ -80,8 +113,8 @@ def __init__(self, backbone: nn.Module, num_classes: int,
80
113
self .backbone = backbone
81
114
82
115
# Estimate num of anchors based on aspect ratios: 2 default boxes + 2 * ratios of feaure map.
83
- num_anchors = [2 + 2 * len (r ) for r in aspect_ratios ]
84
- self .head = SSDHead (out_channels , num_anchors , num_classes )
116
+ self . num_anchors = [2 + 2 * len (r ) for r in aspect_ratios ]
117
+ self .head = SSDHead (out_channels , self . num_anchors , num_classes )
85
118
86
119
self .anchor_generator = DBoxGenerator (size , feature_map_sizes , aspect_ratios )
87
120
@@ -97,7 +130,8 @@ def __init__(self, backbone: nn.Module, num_classes: int,
97
130
image_mean = [0.485 , 0.456 , 0.406 ]
98
131
if image_std is None :
99
132
image_std = [0.229 , 0.224 , 0.225 ]
100
- self .transform = GeneralizedRCNNTransform (size , size , image_mean , image_std )
133
+ self .transform = GeneralizedRCNNTransform (size , size , image_mean , image_std ,
134
+ size_divisible = 1 ) # TODO: Discuss/refactor this workaround
101
135
102
136
self .score_thresh = score_thresh
103
137
self .nms_thresh = nms_thresh
@@ -107,6 +141,15 @@ def __init__(self, backbone: nn.Module, num_classes: int,
107
141
# used only on torchscript mode
108
142
self ._has_warned = False
109
143
144
+ def _anchors_per_level (self , features , HWA ):
145
+ # TODO: Discuss/refactor this workaround
146
+ num_anchors_per_level = [x .size (2 ) * x .size (3 ) * anchors for x , anchors in zip (features , self .num_anchors )]
147
+ HW = 0
148
+ for v in num_anchors_per_level :
149
+ HW += v
150
+ A = HWA // HW
151
+ return [hw * A for hw in num_anchors_per_level ]
152
+
110
153
def compute_loss (self , targets : List [Dict [str , Tensor ]], head_outputs : Dict [str , Tensor ],
111
154
anchors : List [Tensor ]) -> Dict [str , Tensor ]:
112
155
pass
@@ -203,7 +246,7 @@ def ssd_vgg16(pretrained: bool = False, progress: bool = True, num_classes: int
203
246
pretrained_backbone = False
204
247
205
248
backbone = _vgg_backbone ("vgg16" , pretrained_backbone , trainable_layers = trainable_backbone_layers )
206
- model = SSD (backbone , num_classes , ** kwargs )
249
+ model = SSD (backbone , num_classes , ** kwargs ) # TODO: fix initializations in all new layers
207
250
if pretrained :
208
251
pass # TODO: load pre-trained COCO weights
209
252
return model
0 commit comments