open-mmlab
diff --git a/‎configs/cascade_rpn/cascade-rpn_fast-rcnn_r50-caffe_fpn_1x_coco.py
Lines changed: 12 additions & 62 deletions b/‎configs/cascade_rpn/cascade-rpn_fast-rcnn_r50-caffe_fpn_1x_coco.py
Lines changed: 12 additions & 62 deletions
diff --git a/‎configs/centripetalnet/README.md
Lines changed: 1 addition & 1 deletion b/‎configs/centripetalnet/README.md
Lines changed: 1 addition & 1 deletion
diff --git a/‎configs/centripetalnet/centripetalnet_hourglass104_16xb6-crop511-210e-mstest_coco.py
Lines changed: 46 additions & 3 deletions b/‎configs/centripetalnet/centripetalnet_hourglass104_16xb6-crop511-210e-mstest_coco.py
Lines changed: 46 additions & 3 deletions
diff --git a/‎configs/cornernet/README.md
Lines changed: 1 addition & 1 deletion b/‎configs/cornernet/README.md
Lines changed: 1 addition & 1 deletion
diff --git a/‎configs/cornernet/cornernet_hourglass104_8xb6-210e-mstest_coco.py
Lines changed: 46 additions & 3 deletions b/‎configs/cornernet/cornernet_hourglass104_8xb6-210e-mstest_coco.py
Lines changed: 46 additions & 3 deletions
diff --git a/‎configs/dcn/mask-rcnn_r50-dconv-c3-c5_fpn_amp-1x_coco.py
Lines changed: 4 additions & 1 deletion b/‎configs/dcn/mask-rcnn_r50-dconv-c3-c5_fpn_amp-1x_coco.py
Lines changed: 4 additions & 1 deletion
diff --git a/‎configs/dcnv2/mask-rcnn_r50-mdconv-c3-c5_fpn_amp-1x_coco.py
Lines changed: 4 additions & 1 deletion b/‎configs/dcnv2/mask-rcnn_r50-mdconv-c3-c5_fpn_amp-1x_coco.py
Lines changed: 4 additions & 1 deletion
diff --git a/‎configs/faster_rcnn/faster-rcnn_r101-caffe_fpn_ms-3x_coco.py
Lines changed: 0 additions & 38 deletions b/‎configs/faster_rcnn/faster-rcnn_r101-caffe_fpn_ms-3x_coco.py
Lines changed: 0 additions & 38 deletions
diff --git a/‎configs/faster_rcnn/faster-rcnn_r50-caffe-c4_ms-1x_coco.py
Lines changed: 8 additions & 32 deletions b/‎configs/faster_rcnn/faster-rcnn_r50-caffe-c4_ms-1x_coco.py
Lines changed: 8 additions & 32 deletions
diff --git a/‎configs/faster_rcnn/faster-rcnn_r50-caffe-dc5_1x_coco.py
Lines changed: 0 additions & 32 deletions b/‎configs/faster_rcnn/faster-rcnn_r50-caffe-dc5_1x_coco.py
Lines changed: 0 additions & 32 deletions
@@ -1,17 +1,5 @@
-_base_ = '../fast_rcnn/fast-rcnn_r50_fpn_1x_coco.py'
+_base_ = '../fast_rcnn/fast-rcnn_r50-caffe_fpn_1x_coco.py'
 model = dict(
-    backbone=dict(
-        type='ResNet',
-        depth=50,
-        num_stages=4,
-        out_indices=(0, 1, 2, 3),
-        frozen_stages=1,
-        norm_cfg=dict(type='BN', requires_grad=False),
-        norm_eval=True,
-        style='caffe',
-        init_cfg=dict(
-            type='Pretrained',
-            checkpoint='open-mmlab://detectron2/resnet50_caffe')),
     roi_head=dict(
         bbox_head=dict(
             bbox_coder=dict(target_stds=[0.04, 0.04, 0.08, 0.08]),
@@ -25,53 +13,15 @@
                 pos_iou_thr=0.65, neg_iou_thr=0.65, min_pos_iou=0.65),
             sampler=dict(num=256))),
     test_cfg=dict(rcnn=dict(score_thr=1e-3)))
-dataset_type = 'CocoDataset'
-data_root = 'data/coco/'
-img_norm_cfg = dict(
-    mean=[103.530, 116.280, 123.675], std=[1.0, 1.0, 1.0], to_rgb=False)
-train_pipeline = [
-    dict(type='LoadImageFromFile'),
-    dict(type='LoadProposals', num_max_proposals=300),
-    dict(type='LoadAnnotations', with_bbox=True),
-    dict(type='Resize', img_scale=(1333, 800), keep_ratio=True),
-    dict(type='RandomFlip', flip_ratio=0.5),
-    dict(type='Normalize', **img_norm_cfg),
-    dict(type='Pad', size_divisor=32),
-    dict(type='DefaultFormatBundle'),
-    dict(type='Collect', keys=['img', 'proposals', 'gt_bboxes', 'gt_labels']),
-]
-test_pipeline = [
-    dict(type='LoadImageFromFile'),
-    dict(type='LoadProposals', num_max_proposals=300),
-    dict(
-        type='MultiScaleFlipAug',
-        img_scale=(1333, 800),
-        flip=False,
-        transforms=[
-            dict(type='Resize', keep_ratio=True),
-            dict(type='RandomFlip'),
-            dict(type='Normalize', **img_norm_cfg),
-            dict(type='Pad', size_divisor=32),
-            dict(type='ImageToTensor', keys=['img']),
-            dict(type='ToTensor', keys=['proposals']),
-            dict(
-                type='ToDataContainer',
-                fields=[dict(key='proposals', stack=False)]),
-            dict(type='Collect', keys=['img', 'proposals']),
-        ])
-]
-# TODO support proposals input
-data = dict(
-    train=dict(
-        proposal_file=data_root +
-        'proposals/crpn_r50_caffe_fpn_1x_train2017.pkl',
-        pipeline=train_pipeline),
-    val=dict(
-        proposal_file=data_root +
-        'proposals/crpn_r50_caffe_fpn_1x_val2017.pkl',
-        pipeline=test_pipeline),
-    test=dict(
-        proposal_file=data_root +
-        'proposals/crpn_r50_caffe_fpn_1x_val2017.pkl',
-        pipeline=test_pipeline))
+
+# MMEngine support the following two ways, users can choose
+# according to convenience
+# train_dataloader = dict(dataset=dict(proposal_file='proposals/crpn_r50_caffe_fpn_1x_train2017.pkl'))  # noqa
+_base_.train_dataloader.dataset.proposal_file = 'proposals/crpn_r50_caffe_fpn_1x_train2017.pkl'  # noqa
+
+# val_dataloader = dict(dataset=dict(proposal_file='proposals/crpn_r50_caffe_fpn_1x_val2017.pkl'))  # noqa
+# test_dataloader = val_dataloader
+_base_.val_dataloader.dataset.proposal_file = 'proposals/crpn_r50_caffe_fpn_1x_val2017.pkl'  # noqa
+test_dataloader = _base_.val_dataloader
+
 optim_wrapper = dict(clip_grad=dict(max_norm=35, norm_type=2))
@@ -20,7 +20,7 @@ Keypoint-based detectors have achieved pretty-well performance. However, incorre
 
 Note:
 
-- TTA setting is single-scale and `flip=True`.
+- TTA setting is single-scale and `flip=True`. If you want to reproduce the TTA performance, please add `--tta` in the test command.
 - The model we released is the best checkpoint rather than the latest checkpoint (box AP 44.8 vs 44.6 in our experiment).
 
 ## Citation
 
@@ -45,7 +45,7 @@
 
 # data settings
 train_pipeline = [
-    dict(type='LoadImageFromFile', backend_args={{_base_.backend_args}}),
+    dict(type='LoadImageFromFile', backend_args=_base_.backend_args),
     dict(type='LoadAnnotations', with_bbox=True),
     dict(
         type='PhotoMetricDistortion',
@@ -70,12 +70,11 @@
     dict(type='PackDetInputs'),
 ]
 
-# TODO: mstest is not currently implemented
 test_pipeline = [
     dict(
         type='LoadImageFromFile',
         to_float32=True,
-        backend_args={{_base_.backend_args}}),
+        backend_args=_base_.backend_args),
     # don't need Resize
     dict(
         type='RandomCenterCropPad',
@@ -136,3 +135,47 @@
 # USER SHOULD NOT CHANGE ITS VALUES.
 # base_batch_size = (16 GPUs) x (6 samples per GPU)
 auto_scale_lr = dict(base_batch_size=96)
+
+tta_model = dict(
+    type='DetTTAModel',
+    tta_cfg=dict(
+        nms=dict(type='soft_nms', iou_threshold=0.5, method='gaussian'),
+        max_per_img=100))
+
+tta_pipeline = [
+    dict(
+        type='LoadImageFromFile',
+        to_float32=True,
+        backend_args=_base_.backend_args),
+    dict(
+        type='TestTimeAug',
+        transforms=[
+            [
+                # ``RandomFlip`` must be placed before ``RandomCenterCropPad``,
+                # otherwise bounding box coordinates after flipping cannot be
+                # recovered correctly.
+                dict(type='RandomFlip', prob=1.),
+                dict(type='RandomFlip', prob=0.)
+            ],
+            [
+                dict(
+                    type='RandomCenterCropPad',
+                    crop_size=None,
+                    ratios=None,
+                    border=None,
+                    test_mode=True,
+                    test_pad_mode=['logical_or', 127],
+                    mean=data_preprocessor['mean'],
+                    std=data_preprocessor['std'],
+                    # Image data is not converted to rgb.
+                    to_rgb=data_preprocessor['bgr_to_rgb'])
+            ],
+            [dict(type='LoadAnnotations', with_bbox=True)],
+            [
+                dict(
+                    type='PackDetInputs',
+                    meta_keys=('img_id', 'img_path', 'ori_shape', 'img_shape',
+                               'flip', 'flip_direction', 'border'))
+            ]
+        ])
+]
@@ -22,7 +22,7 @@ We propose CornerNet, a new approach to object detection where we detect an obje
 
 Note:
 
-- TTA setting is single-scale and `flip=True`.
+- TTA setting is single-scale and `flip=True`. If you want to reproduce the TTA performance, please add `--tta` in the test command.
 - Experiments with `images_per_gpu=6` are conducted on Tesla V100-SXM2-32GB, `images_per_gpu=3` are conducted on GeForce GTX 1080 Ti.
 - Here are the descriptions of each experiment setting:
   - 10 x 5: 10 GPUs with 5 images per gpu. This is the same setting as that reported in the original paper.
 
@@ -45,7 +45,7 @@
 
 # data settings
 train_pipeline = [
-    dict(type='LoadImageFromFile', backend_args={{_base_.backend_args}}),
+    dict(type='LoadImageFromFile', backend_args=_base_.backend_args),
     dict(type='LoadAnnotations', with_bbox=True),
     dict(
         type='PhotoMetricDistortion',
@@ -71,12 +71,11 @@
     dict(type='PackDetInputs'),
 ]
 
-# TODO: mstest is not currently implemented
 test_pipeline = [
     dict(
         type='LoadImageFromFile',
         to_float32=True,
-        backend_args={{_base_.backend_args}},
+        backend_args=_base_.backend_args,
     ),
     # don't need Resize
     dict(
@@ -138,3 +137,47 @@
 # USER SHOULD NOT CHANGE ITS VALUES.
 # base_batch_size = (8 GPUs) x (6 samples per GPU)
 auto_scale_lr = dict(base_batch_size=48)
+
+tta_model = dict(
+    type='DetTTAModel',
+    tta_cfg=dict(
+        nms=dict(type='soft_nms', iou_threshold=0.5, method='gaussian'),
+        max_per_img=100))
+
+tta_pipeline = [
+    dict(
+        type='LoadImageFromFile',
+        to_float32=True,
+        backend_args=_base_.backend_args),
+    dict(
+        type='TestTimeAug',
+        transforms=[
+            [
+                # ``RandomFlip`` must be placed before ``RandomCenterCropPad``,
+                # otherwise bounding box coordinates after flipping cannot be
+                # recovered correctly.
+                dict(type='RandomFlip', prob=1.),
+                dict(type='RandomFlip', prob=0.)
+            ],
+            [
+                dict(
+                    type='RandomCenterCropPad',
+                    crop_size=None,
+                    ratios=None,
+                    border=None,
+                    test_mode=True,
+                    test_pad_mode=['logical_or', 127],
+                    mean=data_preprocessor['mean'],
+                    std=data_preprocessor['std'],
+                    # Image data is not converted to rgb.
+                    to_rgb=data_preprocessor['bgr_to_rgb'])
+            ],
+            [dict(type='LoadAnnotations', with_bbox=True)],
+            [
+                dict(
+                    type='PackDetInputs',
+                    meta_keys=('img_id', 'img_path', 'ori_shape', 'img_shape',
+                               'flip', 'flip_direction', 'border'))
+            ]
+        ])
+]
@@ -4,4 +4,7 @@
         dcn=dict(type='DCN', deform_groups=1, fallback_on_stride=False),
         stage_with_dcn=(False, True, True, True)))
 
-fp16 = dict(loss_scale=512.)
+# MMEngine support the following two ways, users can choose
+# according to convenience
+# optim_wrapper = dict(type='AmpOptimWrapper')
+_base_.optim_wrapper.type = 'AmpOptimWrapper'
@@ -4,4 +4,7 @@
         dcn=dict(type='DCNv2', deform_groups=1, fallback_on_stride=False),
         stage_with_dcn=(False, True, True, True)))
 
-fp16 = dict(loss_scale=512.)
+# MMEngine support the following two ways, users can choose
+# according to convenience
+# optim_wrapper = dict(type='AmpOptimWrapper')
+_base_.optim_wrapper.type = 'AmpOptimWrapper'
@@ -9,41 +9,3 @@
         init_cfg=dict(
             type='Pretrained',
             checkpoint='open-mmlab://detectron2/resnet101_caffe')))
-
-# use caffe img_norm
-img_norm_cfg = dict(
-    mean=[103.530, 116.280, 123.675], std=[1.0, 1.0, 1.0], to_rgb=False)
-train_pipeline = [
-    dict(type='LoadImageFromFile'),
-    dict(type='LoadAnnotations', with_bbox=True),
-    dict(
-        type='Resize',
-        img_scale=[(1333, 640), (1333, 800)],
-        multiscale_mode='range',
-        keep_ratio=True),
-    dict(type='RandomFlip', flip_ratio=0.5),
-    dict(type='Normalize', **img_norm_cfg),
-    dict(type='Pad', size_divisor=32),
-    dict(type='DefaultFormatBundle'),
-    dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels']),
-]
-test_pipeline = [
-    dict(type='LoadImageFromFile'),
-    dict(
-        type='MultiScaleFlipAug',
-        img_scale=(1333, 800),
-        flip=False,
-        transforms=[
-            dict(type='Resize', keep_ratio=True),
-            dict(type='RandomFlip'),
-            dict(type='Normalize', **img_norm_cfg),
-            dict(type='Pad', size_divisor=32),
-            dict(type='ImageToTensor', keys=['img']),
-            dict(type='Collect', keys=['img']),
-        ])
-]
-
-data = dict(
-    train=dict(dataset=dict(pipeline=train_pipeline)),
-    val=dict(pipeline=test_pipeline),
-    test=dict(pipeline=test_pipeline))
@@ -1,38 +1,14 @@
 _base_ = './faster-rcnn_r50-caffe_c4-1x_coco.py'
-# use caffe img_norm
-img_norm_cfg = dict(
-    mean=[103.530, 116.280, 123.675], std=[1.0, 1.0, 1.0], to_rgb=False)
+
 train_pipeline = [
-    dict(type='LoadImageFromFile'),
+    dict(type='LoadImageFromFile', backend_args=_base_.backend_args),
     dict(type='LoadAnnotations', with_bbox=True),
     dict(
-        type='Resize',
-        img_scale=[(1333, 640), (1333, 672), (1333, 704), (1333, 736),
-                   (1333, 768), (1333, 800)],
-        multiscale_mode='value',
+        type='RandomChoiceResize',
+        scale=[(1333, 640), (1333, 672), (1333, 704), (1333, 736), (1333, 768),
+               (1333, 800)],
         keep_ratio=True),
-    dict(type='RandomFlip', flip_ratio=0.5),
-    dict(type='Normalize', **img_norm_cfg),
-    dict(type='Pad', size_divisor=32),
-    dict(type='DefaultFormatBundle'),
-    dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels']),
+    dict(type='RandomFlip', prob=0.5),
+    dict(type='PackDetInputs')
 ]
-test_pipeline = [
-    dict(type='LoadImageFromFile'),
-    dict(
-        type='MultiScaleFlipAug',
-        img_scale=(1333, 800),
-        flip=False,
-        transforms=[
-            dict(type='Resize', keep_ratio=True),
-            dict(type='RandomFlip'),
-            dict(type='Normalize', **img_norm_cfg),
-            dict(type='Pad', size_divisor=32),
-            dict(type='ImageToTensor', keys=['img']),
-            dict(type='Collect', keys=['img']),
-        ])
-]
-data = dict(
-    train=dict(pipeline=train_pipeline),
-    val=dict(pipeline=test_pipeline),
-    test=dict(pipeline=test_pipeline))
+_base_.train_dataloader.dataset.pipeline = train_pipeline
@@ -3,35 +3,3 @@
     '../_base_/datasets/coco_detection.py',
     '../_base_/schedules/schedule_1x.py', '../_base_/default_runtime.py'
 ]
-# use caffe img_norm
-img_norm_cfg = dict(
-    mean=[103.530, 116.280, 123.675], std=[1.0, 1.0, 1.0], to_rgb=False)
-train_pipeline = [
-    dict(type='LoadImageFromFile'),
-    dict(type='LoadAnnotations', with_bbox=True),
-    dict(type='Resize', img_scale=(1333, 800), keep_ratio=True),
-    dict(type='RandomFlip', flip_ratio=0.5),
-    dict(type='Normalize', **img_norm_cfg),
-    dict(type='Pad', size_divisor=32),
-    dict(type='DefaultFormatBundle'),
-    dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels']),
-]
-test_pipeline = [
-    dict(type='LoadImageFromFile'),
-    dict(
-        type='MultiScaleFlipAug',
-        img_scale=(1333, 800),
-        flip=False,
-        transforms=[
-            dict(type='Resize', keep_ratio=True),
-            dict(type='RandomFlip'),
-            dict(type='Normalize', **img_norm_cfg),
-            dict(type='Pad', size_divisor=32),
-            dict(type='ImageToTensor', keys=['img']),
-            dict(type='Collect', keys=['img']),
-        ])
-]
-data = dict(
-    train=dict(pipeline=train_pipeline),
-    val=dict(pipeline=test_pipeline),
-    test=dict(pipeline=test_pipeline))