-
Notifications
You must be signed in to change notification settings - Fork 2.9k
/
Copy pathdata_utils.py
executable file
·249 lines (229 loc) · 8.94 KB
/
data_utils.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
# Based on:
# --------------------------------------------------------
# Detectron
# Copyright (c) 2017-present, Facebook, Inc.
# Licensed under the Apache License, Version 2.0;
# Written by Ross Girshick
# --------------------------------------------------------
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
from __future__ import unicode_literals
import cv2
import numpy as np
from config import cfg
import os
from PIL import Image
class DatasetPath(object):
def __init__(self, mode, dataset_name):
self.mode = mode
self.data_dir = dataset_name
def get_data_dir(self):
if self.mode == 'train':
return os.path.join(self.data_dir, 'ch4_training_images')
elif self.mode == 'val':
return os.path.join(self.data_dir, 'ch4_test_images')
def get_file_list(self):
if self.mode == 'train':
return os.path.join(self.data_dir,
'ch4_training_localization_transcription_gt')
elif self.mode == 'val':
return os.path.join(self.data_dir,
'ch4_test_localization_transcription_gt')
def get_image_blob(roidb, mode):
"""Builds an input blob from the images in the roidb at the specified
scales.
"""
if mode == 'train' or mode == 'val':
with open(roidb['image'], 'rb') as f:
data = f.read()
data = np.frombuffer(data, dtype='uint8')
img = cv2.imdecode(data, 1)
img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
gt_boxes = roidb['boxes']
gt_label = roidb['gt_classes']
# resize
if mode == 'train':
img, im_scale = _resize(img, target_size=800, max_size=1333)
need_gt_boxes = gt_boxes.copy()
need_gt_boxes[:, :4] *= im_scale
img, need_gt_boxes, need_gt_label = _rotation(
img, need_gt_boxes, gt_label, prob=1.0, gt_margin=1.4)
else:
img, im_scale = _resize(img, target_size=1000, max_size=1778)
need_gt_boxes = gt_boxes
need_gt_label = gt_label
img = img.astype(np.float32, copy=False)
img = img / 255.0
mean = np.array(cfg.pixel_means)[np.newaxis, np.newaxis, :]
std = np.array(cfg.pixel_std)[np.newaxis, np.newaxis, :]
img -= mean
img /= std
img = img.transpose((2, 0, 1))
return img, im_scale, need_gt_boxes, need_gt_label
def _get_size_scale(w, h, min_size, max_size=None):
size = min_size
scale = 1.0
if max_size is not None:
min_original_size = float(min((w, h)))
max_original_size = float(max((w, h)))
if max_original_size / min_original_size * size > max_size:
size = int(round(max_size * min_original_size / max_original_size))
if (w <= h and w == size) or (h <= w and h == size):
return (h, w), scale
if w < h:
ow = size
oh = int(size * h / w)
scale = size / w
else:
oh = size
ow = int(size * w / h)
scale = size / h
scale = ow / w
return (oh, ow), scale
def _resize(im, target_size=800, max_size=1333):
if not isinstance(im, np.ndarray):
raise TypeError("{}: image type is not numpy.")
if len(im.shape) != 3:
raise ImageError('{}: image is not 3-dimensional.')
im_shape = im.shape
im_size_min = np.min(im_shape[0:2])
im_size_max = np.max(im_shape[0:2])
selected_size = target_size
if float(im_size_min) == 0:
raise ZeroDivisionError('min size of image is 0')
if max_size != 0:
im_scale = float(selected_size) / float(im_size_min)
# Prevent the biggest axis from being more than max_size
if np.round(im_scale * im_size_max) > max_size:
im_scale = float(max_size) / float(im_size_max)
im_scale_x = im_scale
im_scale_y = im_scale
resize_w = np.round(im_scale_x * float(im_shape[1]))
resize_h = np.round(im_scale_y * float(im_shape[0]))
im_info = [resize_h, resize_w, im_scale]
else:
im_scale_x = float(selected_size) / float(im_shape[1])
im_scale_y = float(selected_size) / float(im_shape[0])
resize_w = selected_size
resize_h = selected_size
im = Image.fromarray(im)
im = im.resize((int(resize_w), int(resize_h)), 2)
im = np.array(im)
return im, im_scale_x
def _rotation(image,
gt_boxes,
gt_label,
prob,
fixed_angle=-1,
r_range=(360, 0),
gt_margin=1.4):
rotate_range = r_range[0]
shift = r_range[1]
angle = np.array([np.max([0, fixed_angle])])
if np.random.rand() <= prob:
angle = np.array(
np.random.rand(1) * rotate_range - shift, dtype=np.int16)
'''
rotate image
'''
image = np.array(image)
(h, w) = image.shape[:2]
scale = 1.0
# set the rotation center
center = (w / 2, h / 2)
# anti-clockwise angle in the function
M = cv2.getRotationMatrix2D(center, angle, scale)
image = cv2.warpAffine(image, M, (w, h))
# back to PIL image
im_width, im_height = w, h
'''
rotate boxes
'''
need_gt_boxes = gt_boxes.copy()
origin_gt_boxes = need_gt_boxes
rotated_gt_boxes = np.empty((len(need_gt_boxes), 5), dtype=np.float32)
# anti-clockwise to clockwise arc
cos_cita = np.cos(np.pi / 180 * angle)
sin_cita = np.sin(np.pi / 180 * angle)
# clockwise matrix
rotation_matrix = np.array([[cos_cita, sin_cita], [-sin_cita, cos_cita]])
pts_ctr = origin_gt_boxes[:, 0:2]
pts_ctr = pts_ctr - np.tile((im_width / 2, im_height / 2),
(gt_boxes.shape[0], 1))
pts_ctr = np.array(np.dot(pts_ctr, rotation_matrix), dtype=np.int16)
pts_ctr = np.squeeze(
pts_ctr, axis=-1) + np.tile((im_width / 2, im_height / 2),
(gt_boxes.shape[0], 1))
origin_gt_boxes[:, 0:2] = pts_ctr
len_of_gt = len(origin_gt_boxes)
# rectificate the angle in the range of [-45, 45]
for idx in range(len_of_gt):
ori_angle = origin_gt_boxes[idx, 4]
height = origin_gt_boxes[idx, 3]
width = origin_gt_boxes[idx, 2]
# step 1: normalize gt (-45,135)
if width < height:
ori_angle += 90
width, height = height, width
# step 2: rotate (-45,495)
rotated_angle = ori_angle + angle
# step 3: normalize rotated_angle (-45,135)
while rotated_angle > 135:
rotated_angle = rotated_angle - 180
rotated_gt_boxes[idx, 0] = origin_gt_boxes[idx, 0]
rotated_gt_boxes[idx, 1] = origin_gt_boxes[idx, 1]
rotated_gt_boxes[idx, 3] = height * gt_margin
rotated_gt_boxes[idx, 2] = width * gt_margin
rotated_gt_boxes[idx, 4] = rotated_angle
x_inbound = np.logical_and(rotated_gt_boxes[:, 0] >= 0,
rotated_gt_boxes[:, 0] < im_width)
y_inbound = np.logical_and(rotated_gt_boxes[:, 1] >= 0,
rotated_gt_boxes[:, 1] < im_height)
inbound = np.logical_and(x_inbound, y_inbound)
need_gt_boxes = rotated_gt_boxes[inbound]
need_gt_label = gt_label.copy()
need_gt_label = need_gt_label[inbound]
return image, need_gt_boxes, need_gt_label
def prep_im_for_blob(im, pixel_means, target_size, max_size):
"""Prepare an image for use as a network input blob. Specially:
- Subtract per-channel pixel mean
- Convert to float32
- Rescale to each of the specified target size (capped at max_size)
Returns a list of transformed images, one for each target size. Also returns
the scale factors that were used to compute each returned image.
"""
im = im.astype(np.float32, copy=False)
im -= pixel_means
im_shape = im.shape
im_size_min = np.min(im_shape[0:2])
im_size_max = np.max(im_shape[0:2])
im_scale = float(target_size) / float(im_size_min)
# Prevent the biggest axis from being more than max_size
if np.round(im_scale * im_size_max) > max_size:
im_scale = float(max_size) / float(im_size_max)
im = cv2.resize(
im,
None,
None,
fx=im_scale,
fy=im_scale,
interpolation=cv2.INTER_LINEAR)
im_height, im_width, channel = im.shape
channel_swap = (2, 0, 1) #(batch, channel, height, width)
im = im.transpose(channel_swap)
return im, im_scale