vghost2008 · Feb 8, 2022
diff --git a/‎keypoints_onnxdemo/keypoints/get_keypoints.py
+14-11 b/‎keypoints_onnxdemo/keypoints/get_keypoints.py
+14-11
diff --git a/‎keypoints_onnxdemo/keypoints_demo.py
+2-2 b/‎keypoints_onnxdemo/keypoints_demo.py
+2-2
diff --git a/‎lib/core/function.py
+3-2 b/‎lib/core/function.py
+3-2
diff --git a/‎lib/models/pose_whrnet.py
+42-70 b/‎lib/models/pose_whrnet.py
+42-70
@@ -95,6 +95,8 @@ def npnormalize(x,mean,std):
     return x
 
 class KPDetection:
+    #SIZE = (288,384)
+    SIZE = (192,256)
     def __init__(self) -> None:
         self.init_model()
         self.person_det = PersonDetection()
@@ -104,12 +106,15 @@ def init_model(self):
         onnx_path = osp.join(curdir_path,"keypoints.torch")
         print(f"Load {onnx_path}")
         self.model = torch.jit.load(onnx_path)
+        print(self.model)
 
     @staticmethod
     def preprocess(img):
-        img = to_tensor(img)
-        img = npnormalize(img,mean=[0.485, 0.456, 0.406],
-                                 std=[0.229, 0.224, 0.225])
+        #img = to_tensor(img)
+        #img = npnormalize(img,mean=[0.485, 0.456, 0.406],
+        #                         std=[0.229, 0.224, 0.225])
+        img = img.astype(np.float32)
+        img = np.transpose(img,[2,0,1])
         return img
 
 
@@ -124,7 +129,7 @@ def cut_and_resize(img,bboxes,size=(288,384)):
                 cur_img,bbox = KPDetection.resize_img(cur_img,bbox,size)
             else:
                 cur_img = np.zeros([size[1],size[0],3],dtype=np.float32)
-            cv2.imwrite(f"{i}.jpg",cur_img)
+            cv2.imwrite(f"{i}.jpg",cur_img[...,::-1])
             res.append(cur_img)
             res_bboxes.append(bbox)
         return res,np.array(res_bboxes,dtype=np.float32)
@@ -141,24 +146,22 @@ def resize_img(img,bbox,target_size,pad_color=(127,127,127)):
         if img.shape[1]>ratio*img.shape[0]:
             nw = target_size[0]
             nh = int(target_size[0]*img.shape[0]/img.shape[1])
-            #bbox_h = bbox_w/ratio
+            bbox_h = bbox_w/ratio
         else:
             nh = target_size[1]
             nw = int(target_size[1]*img.shape[1]/img.shape[0])
-            #bbox_w = bbox_h*ratio
+            bbox_w = bbox_h*ratio
 
         img = cv2.resize(img,(nw,nh),interpolation=cv2.INTER_LINEAR)
         xoffset = (target_size[0]-nw)//2
         yoffset = (target_size[1]-nh)//2
-        xoffset = 0
-        yoffset = 0
         res[yoffset:yoffset+nh,xoffset:xoffset+nw] = img
         bbox = np.array([bbox_cx-bbox_w/2,bbox_cy-bbox_h/2,bbox_cx+bbox_w/2,bbox_cy+bbox_h/2],dtype=np.float32)
         return res,bbox
 
 
     @staticmethod
-    def get_offset_and_scalar(bboxes,size=(288,384)):
+    def get_offset_and_scalar(bboxes,size):
         offset = bboxes[...,:2]
         offset = np.expand_dims(offset,axis=1)
         bboxes_size = bboxes[...,2:]-bboxes[...,:2]
@@ -252,7 +255,7 @@ def get_kps_by_bboxes(self,img,bboxes):
         '''
         #print(bboxes)
         #cv2.imwrite("/home/wj/ai/mldata/0day/x1/a.jpg",img)
-        imgs,bboxes = self.cut_and_resize(img,bboxes.astype(np.int32))
+        imgs,bboxes = self.cut_and_resize(img,bboxes.astype(np.int32),KPDetection.SIZE)
         bboxes = np.array(bboxes)
         imgs = [self.preprocess(x) for x in imgs]
         imgs = np.ascontiguousarray(np.array(imgs))
@@ -266,7 +269,7 @@ def get_kps_by_bboxes(self,img,bboxes):
             print(f"ERROR")
             return np.zeros([imgs.shape[0],17,3],dtype=np.float32)
         output = self.get_final_preds(output)
-        offset,scalar = self.get_offset_and_scalar(bboxes)
+        offset,scalar = self.get_offset_and_scalar(bboxes,KPDetection.SIZE)
         output[...,:2] = output[...,:2]*scalar+offset
         return output
 
 
@@ -9,7 +9,7 @@
 sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
 pdir_path = osp.dirname(osp.dirname(__file__))
 
-os.environ['CUDA_VISIBLE_DEVICES'] = "1"
+os.environ['CUDA_VISIBLE_DEVICES'] = "2"
 
 class Model:
     def __init__(self):
@@ -24,7 +24,7 @@ def __call__(self, img):
         return img
 
 if __name__ == "__main__":
-    vd = VideoDemo(Model(),save_path="tmp0.mp4",buffer_size=1,show_video=False,max_frame_cn=1000)
+    vd = VideoDemo(Model(),save_path="tmp1.mp4",buffer_size=1,show_video=False,max_frame_cn=10)
     #video_path = "/home/wj/ai/mldata/boeoffice/test_data/test3.webm"
     print(f"DATE: 2021-11-29")
     '''video_path = "/home/wj/ai/mldata/global_traj/tennis1.mp4"
 
@@ -216,10 +216,11 @@ def validate(config, val_loader, val_dataset, model, criterion, output_dir,
 
             if i % config.PRINT_FREQ == 0:
                 msg = 'Test: [{0}/{1}]\t' \
+                       'idx {idx}' \
                       'Time {batch_time.val:.3f} ({batch_time.avg:.3f})\t' \
                       'Loss {loss.val:.4f} ({loss.avg:.4f})\t' \
                       'Accuracy {acc.val:.3f} ({acc.avg:.3f})'.format(
-                          i, len(val_loader), batch_time=batch_time,
+                          i, len(val_loader), idx=idx,batch_time=batch_time,
                           loss=losses, acc=acc)
                 logger.info(msg)
 
@@ -269,7 +270,7 @@ def validate(config, val_loader, val_dataset, model, criterion, output_dir,
                     global_steps
                 )
             writer_dict['valid_global_steps'] = global_steps + 1
-
+    print('idx=',idx)
     return perf_indicator
 
 
 
@@ -14,57 +14,44 @@
 import torch
 import torch.nn as nn
 from wtorch.utils import *
-from wtorch.nn import *
-from einops import rearrange
 
 
 BN_MOMENTUM = 0.1
 logger = logging.getLogger(__name__)
 
 
-def conv3x3(in_planes, out_planes, stride=1,bias=False):
+def conv3x3(in_planes, out_planes, stride=1):
     """3x3 convolution with padding"""
     return nn.Conv2d(in_planes, out_planes, kernel_size=3, stride=stride,
-                     padding=1, bias=bias)
+                     padding=1, bias=False)
 
-def get_norm(planes,momentum=None,type="layer_norm"):
-    if type == "bn":
-        return nn.BatchNorm2d(planes, momentum=BN_MOMENTUM)
-    elif type == "layer_norm":
-        return LayerNorm(planes)
-
-def get_activation_fn(*args,**kwargs):
-    return nn.GELU()
 
 class BasicBlock(nn.Module):
     expansion = 1
 
     def __init__(self, inplanes, planes, stride=1, downsample=None):
         super(BasicBlock, self).__init__()
         self.conv1 = conv3x3(inplanes, planes, stride)
-        self.bn1 = get_norm(planes, momentum=BN_MOMENTUM)
-        self.relu = get_activation_fn(inplace=True)
-        self.conv2 = conv3x3(planes, planes,bias=True)
+        self.bn1 = nn.BatchNorm2d(planes, momentum=BN_MOMENTUM)
+        self.relu = nn.ReLU(inplace=True)
+        self.conv2 = conv3x3(planes, planes)
+        self.bn2 = nn.BatchNorm2d(planes, momentum=BN_MOMENTUM)
         self.downsample = downsample
         self.stride = stride
-        if self.stride == 1:
-            self.drop_path = nn.Dropout2d(p=0.08)
-        else:
-            self.drop_path = None
 
     def forward(self, x):
         residual = x
 
         out = self.conv1(x)
         out = self.bn1(out)
+        out = self.relu(out)
 
         out = self.conv2(out)
+        out = self.bn2(out)
 
         if self.downsample is not None:
             residual = self.downsample(x)
-        
-        if self.drop_path is not None:
-            out = self.drop_path(out)
+
         out += residual
         out = self.relu(out)
 
@@ -77,30 +64,37 @@ class Bottleneck(nn.Module):
     def __init__(self, inplanes, planes, stride=1, downsample=None):
         super(Bottleneck, self).__init__()
         self.conv1 = nn.Conv2d(inplanes, planes, kernel_size=1, bias=False)
+        self.bn1 = nn.BatchNorm2d(planes, momentum=BN_MOMENTUM)
         self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=stride,
                                padding=1, bias=False)
-        self.bn2 = get_norm(planes, momentum=BN_MOMENTUM)
+        self.bn2 = nn.BatchNorm2d(planes, momentum=BN_MOMENTUM)
         self.conv3 = nn.Conv2d(planes, planes * self.expansion, kernel_size=1,
-                               bias=True)
-        self.relu = get_activation_fn(inplace=True)
+                               bias=False)
+        self.bn3 = nn.BatchNorm2d(planes * self.expansion,
+                                  momentum=BN_MOMENTUM)
+        self.relu = nn.ReLU(inplace=True)
         self.downsample = downsample
         self.stride = stride
 
     def forward(self, x):
         residual = x
 
         out = self.conv1(x)
+        out = self.bn1(out)
         out = self.relu(out)
 
         out = self.conv2(out)
         out = self.bn2(out)
+        out = self.relu(out)
 
         out = self.conv3(out)
+        out = self.bn3(out)
 
         if self.downsample is not None:
             residual = self.downsample(x)
 
         out += residual
+        out = self.relu(out)
 
         return out
 
@@ -121,7 +115,7 @@ def __init__(self, num_branches, blocks, num_blocks, num_inchannels,
         self.branches = self._make_branches(
             num_branches, blocks, num_blocks, num_channels)
         self.fuse_layers = self._make_fuse_layers()
-        self.relu = get_activation_fn(True)
+        self.relu = nn.ReLU(True)
 
     def _check_branches(self, num_branches, blocks, num_blocks,
                         num_inchannels, num_channels):
@@ -154,7 +148,7 @@ def _make_one_branch(self, branch_index, block, num_blocks, num_channels,
                     num_channels[branch_index] * block.expansion,
                     kernel_size=1, stride=stride, bias=False
                 ),
-                get_norm(
+                nn.BatchNorm2d(
                     num_channels[branch_index] * block.expansion,
                     momentum=BN_MOMENTUM
                 ),
@@ -209,7 +203,7 @@ def _make_fuse_layers(self):
                                 num_inchannels[i],
                                 1, 1, 0, bias=False
                             ),
-                            get_norm(num_inchannels[i]),
+                            nn.BatchNorm2d(num_inchannels[i]),
                             nn.Upsample(scale_factor=2**(j-i), mode='nearest')
                         )
                     )
@@ -227,7 +221,7 @@ def _make_fuse_layers(self):
                                         num_outchannels_conv3x3,
                                         3, 2, 1, bias=False
                                     ),
-                                    get_norm(num_outchannels_conv3x3)
+                                    nn.BatchNorm2d(num_outchannels_conv3x3)
                                 )
                             )
                         else:
@@ -239,8 +233,8 @@ def _make_fuse_layers(self):
                                         num_outchannels_conv3x3,
                                         3, 2, 1, bias=False
                                     ),
-                                    get_norm(num_outchannels_conv3x3),
-                                    get_activation_fn(True)
+                                    nn.BatchNorm2d(num_outchannels_conv3x3),
+                                    nn.ReLU(True)
                                 )
                             )
                     fuse_layer.append(nn.Sequential(*conv3x3s))
@@ -277,31 +271,6 @@ def forward(self, x):
     'BOTTLENECK': Bottleneck
 }
 
-class FCBlock(nn.Module):
-    def __init__(self,channels,width,height):
-        super().__init__()
-        channels1 = width*height
-        self.fc0 = nn.Linear(channels1,channels1,bias=False)
-        self.norm0 = nn.LayerNorm(channels1)
-        self.fc1 = nn.Linear(channels,channels,bias=False)
-        self.norm1 = nn.LayerNorm(channels)
-        self.relu = get_activation_fn()
-    
-    def forward(self,x):
-        residual = x
-        shape = x.shape
-        x = rearrange(x,'b c h w -> b c (h w)')
-        x = self.fc0(x)
-        x = self.norm0(x)
-        x = rearrange(x,'b c s -> b s c')
-        x = self.fc1(x)
-        x = self.norm1(x)
-        x = rearrange(x,'b s c -> b c s')
-        x = torch.reshape(x,shape)
-        x = x+residual
-        x = self.relu(x)
-        return x
-
 
 class PoseHighResolutionNet(nn.Module):
 
@@ -311,10 +280,13 @@ def __init__(self, cfg, **kwargs):
         super(PoseHighResolutionNet, self).__init__()
 
         # stem net
-        self.conv1 = nn.Conv2d(3, 64, kernel_size=4, stride=4,
+        self.conv1 = nn.Conv2d(3, 64, kernel_size=3, stride=2, padding=1,
                                bias=False)
-        self.bn1 = get_norm(64, momentum=BN_MOMENTUM)
-        self.relu = get_activation_fn(inplace=True)
+        self.bn1 = nn.BatchNorm2d(64, momentum=BN_MOMENTUM)
+        self.conv2 = nn.Conv2d(64, 64, kernel_size=3, stride=2, padding=1,
+                               bias=False)
+        self.bn2 = nn.BatchNorm2d(64, momentum=BN_MOMENTUM)
+        self.relu = nn.ReLU(inplace=True)
         self.layer1 = self._make_layer(Bottleneck, 64, 4)
 
         self.stage2_cfg = extra['STAGE2']
@@ -349,8 +321,6 @@ def __init__(self, cfg, **kwargs):
         self.stage4, pre_stage_channels = self._make_stage(
             self.stage4_cfg, num_channels, multi_scale_output=False)
 
-        self.fc_block0 = FCBlock(pre_stage_channels[0],48,64)
-        self.fc_block1 = FCBlock(pre_stage_channels[0],48,64)
         self.final_layer = nn.Conv2d(
             in_channels=pre_stage_channels[0],
             out_channels=cfg['MODEL']['NUM_JOINTS'],
@@ -378,8 +348,8 @@ def _make_transition_layer(
                                 num_channels_cur_layer[i],
                                 3, 1, 1, bias=False
                             ),
-                            get_norm(num_channels_cur_layer[i]),
-                            get_activation_fn(inplace=True)
+                            nn.BatchNorm2d(num_channels_cur_layer[i]),
+                            nn.ReLU(inplace=True)
                         )
                     )
                 else:
@@ -395,8 +365,8 @@ def _make_transition_layer(
                             nn.Conv2d(
                                 inchannels, outchannels, 3, 2, 1, bias=False
                             ),
-                            get_norm(outchannels),
-                            get_activation_fn(inplace=True)
+                            nn.BatchNorm2d(outchannels),
+                            nn.ReLU(inplace=True)
                         )
                     )
                 transition_layers.append(nn.Sequential(*conv3x3s))
@@ -411,7 +381,7 @@ def _make_layer(self, block, planes, blocks, stride=1):
                     self.inplanes, planes * block.expansion,
                     kernel_size=1, stride=stride, bias=False
                 ),
-                get_norm(planes * block.expansion, momentum=BN_MOMENTUM),
+                nn.BatchNorm2d(planes * block.expansion, momentum=BN_MOMENTUM),
             )
 
         layers = []
@@ -468,6 +438,9 @@ def forward(self, x):
         x = self.conv1(x)
         x = self.bn1(x)
         x = self.relu(x)
+        x = self.conv2(x)
+        x = self.bn2(x)
+        x = self.relu(x)
         x = self.layer1(x)
 
         x_list = []
@@ -493,9 +466,8 @@ def forward(self, x):
             else:
                 x_list.append(y_list[i])
         y_list = self.stage4(x_list)
-        x = self.fc_block0(y_list[0])
-        x = self.fc_block1(x)
-        x = self.final_layer(x)
+
+        x = self.final_layer(y_list[0])
 
         return x
 
@@ -508,7 +480,7 @@ def init_weights(self, pretrained=''):
                 for name, _ in m.named_parameters():
                     if name in ['bias']:
                         nn.init.constant_(m.bias, 0)
-            elif isinstance(m, LayerNorm):
+            elif isinstance(m, nn.BatchNorm2d):
                 nn.init.constant_(m.weight, 1)
                 nn.init.constant_(m.bias, 0)
             elif isinstance(m, nn.ConvTranspose2d):