@@ -171,162 +171,6 @@ def fast_rcnn_inference_single_image(
171
171
return result , filter_inds [:, 0 ]
172
172
173
173
174
- class FastRCNNOutputs :
175
- """
176
- An internal implementation that stores information about outputs of a Fast R-CNN head,
177
- and provides methods that are used to decode the outputs of a Fast R-CNN head.
178
- """
179
-
180
- def __init__ (
181
- self ,
182
- box2box_transform ,
183
- pred_class_logits ,
184
- pred_proposal_deltas ,
185
- proposals ,
186
- smooth_l1_beta = 0.0 ,
187
- box_reg_loss_type = "smooth_l1" ,
188
- ):
189
- """
190
- Args:
191
- box2box_transform (Box2BoxTransform/Box2BoxTransformRotated):
192
- box2box transform instance for proposal-to-detection transformations.
193
- pred_class_logits (Tensor): A tensor of shape (R, K + 1) storing the predicted class
194
- logits for all R predicted object instances.
195
- Each row corresponds to a predicted object instance.
196
- pred_proposal_deltas (Tensor): A tensor of shape (R, K * B) or (R, B) for
197
- class-specific or class-agnostic regression. It stores the predicted deltas that
198
- transform proposals into final box detections.
199
- B is the box dimension (4 or 5).
200
- When B is 4, each row is [dx, dy, dw, dh (, ....)].
201
- When B is 5, each row is [dx, dy, dw, dh, da (, ....)].
202
- proposals (list[Instances]): A list of N Instances, where Instances i stores the
203
- proposals for image i, in the field "proposal_boxes".
204
- When training, each Instances must have ground-truth labels
205
- stored in the field "gt_classes" and "gt_boxes".
206
- The total number of all instances must be equal to R.
207
- smooth_l1_beta (float): The transition point between L1 and L2 loss in
208
- the smooth L1 loss function. When set to 0, the loss becomes L1. When
209
- set to +inf, the loss becomes constant 0.
210
- box_reg_loss_type (str): Box regression loss type. One of: "smooth_l1", "giou"
211
- """
212
- self .box2box_transform = box2box_transform
213
- self .num_preds_per_image = [len (p ) for p in proposals ]
214
- self .pred_class_logits = pred_class_logits
215
- self .pred_proposal_deltas = pred_proposal_deltas
216
- self .smooth_l1_beta = smooth_l1_beta
217
- self .box_reg_loss_type = box_reg_loss_type
218
-
219
- self .image_shapes = [x .image_size for x in proposals ]
220
-
221
- if len (proposals ):
222
- box_type = type (proposals [0 ].proposal_boxes )
223
- # cat(..., dim=0) concatenates over all images in the batch
224
- self .proposals = box_type .cat ([p .proposal_boxes for p in proposals ])
225
- assert (
226
- not self .proposals .tensor .requires_grad
227
- ), "Proposals should not require gradients!"
228
-
229
- # "gt_classes" exists if and only if training. But other gt fields may
230
- # not necessarily exist in training for images that have no groundtruth.
231
- if proposals [0 ].has ("gt_classes" ):
232
- self .gt_classes = cat ([p .gt_classes for p in proposals ], dim = 0 )
233
-
234
- # If "gt_boxes" does not exist, the proposals must be all negative and
235
- # should not be included in regression loss computation.
236
- # Here we just use proposal_boxes as an arbitrary placeholder because its
237
- # value won't be used in self.box_reg_loss().
238
- gt_boxes = [
239
- p .gt_boxes if p .has ("gt_boxes" ) else p .proposal_boxes for p in proposals
240
- ]
241
- self .gt_boxes = box_type .cat (gt_boxes )
242
- else :
243
- self .proposals = Boxes (torch .zeros (0 , 4 , device = self .pred_proposal_deltas .device ))
244
- self ._no_instances = len (self .proposals ) == 0 # no instances found
245
-
246
- def softmax_cross_entropy_loss (self ):
247
- """
248
- Deprecated
249
- """
250
- _log_classification_stats (self .pred_class_logits , self .gt_classes )
251
- return cross_entropy (self .pred_class_logits , self .gt_classes , reduction = "mean" )
252
-
253
- def box_reg_loss (self ):
254
- """
255
- Deprecated
256
- """
257
- if self ._no_instances :
258
- return 0.0 * self .pred_proposal_deltas .sum ()
259
-
260
- box_dim = self .proposals .tensor .size (1 ) # 4 or 5
261
- cls_agnostic_bbox_reg = self .pred_proposal_deltas .size (1 ) == box_dim
262
- device = self .pred_proposal_deltas .device
263
-
264
- bg_class_ind = self .pred_class_logits .shape [1 ] - 1
265
- # Box delta loss is only computed between the prediction for the gt class k
266
- # (if 0 <= k < bg_class_ind) and the target; there is no loss defined on predictions
267
- # for non-gt classes and background.
268
- # Empty fg_inds should produce a valid loss of zero because reduction=sum.
269
- fg_inds = nonzero_tuple ((self .gt_classes >= 0 ) & (self .gt_classes < bg_class_ind ))[0 ]
270
-
271
- if cls_agnostic_bbox_reg :
272
- # pred_proposal_deltas only corresponds to foreground class for agnostic
273
- gt_class_cols = torch .arange (box_dim , device = device )
274
- else :
275
- # pred_proposal_deltas for class k are located in columns [b * k : b * k + b],
276
- # where b is the dimension of box representation (4 or 5)
277
- # Note that compared to Detectron1,
278
- # we do not perform bounding box regression for background classes.
279
- gt_class_cols = box_dim * self .gt_classes [fg_inds , None ] + torch .arange (
280
- box_dim , device = device
281
- )
282
-
283
- if self .box_reg_loss_type == "smooth_l1" :
284
- gt_proposal_deltas = self .box2box_transform .get_deltas (
285
- self .proposals .tensor , self .gt_boxes .tensor
286
- )
287
- loss_box_reg = smooth_l1_loss (
288
- self .pred_proposal_deltas [fg_inds [:, None ], gt_class_cols ],
289
- gt_proposal_deltas [fg_inds ],
290
- self .smooth_l1_beta ,
291
- reduction = "sum" ,
292
- )
293
- elif self .box_reg_loss_type == "giou" :
294
- fg_pred_boxes = self .box2box_transform .apply_deltas (
295
- self .pred_proposal_deltas [fg_inds [:, None ], gt_class_cols ],
296
- self .proposals .tensor [fg_inds ],
297
- )
298
- loss_box_reg = giou_loss (
299
- fg_pred_boxes ,
300
- self .gt_boxes .tensor [fg_inds ],
301
- reduction = "sum" ,
302
- )
303
- else :
304
- raise ValueError (f"Invalid bbox reg loss type '{ self .box_reg_loss_type } '" )
305
-
306
- loss_box_reg = loss_box_reg / self .gt_classes .numel ()
307
- return loss_box_reg
308
-
309
- def losses (self ):
310
- """
311
- Deprecated
312
- """
313
- return {"loss_cls" : self .softmax_cross_entropy_loss (), "loss_box_reg" : self .box_reg_loss ()}
314
-
315
- def predict_boxes (self ):
316
- """
317
- Deprecated
318
- """
319
- pred = self .box2box_transform .apply_deltas (self .pred_proposal_deltas , self .proposals .tensor )
320
- return pred .split (self .num_preds_per_image , dim = 0 )
321
-
322
- def predict_probs (self ):
323
- """
324
- Deprecated
325
- """
326
- probs = F .softmax (self .pred_class_logits , dim = - 1 )
327
- return probs .split (self .num_preds_per_image , dim = 0 )
328
-
329
-
330
174
class FastRCNNOutputLayers (nn .Module ):
331
175
"""
332
176
Two linear layers for predicting Fast R-CNN outputs:
0 commit comments