PaddleCV/rrpn/roidbs.py

# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
# Based on:
# --------------------------------------------------------
# Detectron
# Copyright (c) 2017-present, Facebook, Inc.
# Licensed under the Apache License, Version 2.0;
# Written by Ross Girshick
# --------------------------------------------------------

from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
from __future__ import unicode_literals

import copy
import logging
import numpy as np
import os
import scipy.sparse
import random
import time
import matplotlib
import cv2
#import segm_utils
from config import cfg
from data_utils import DatasetPath
logger = logging.getLogger(__name__)


class ICDAR2015Dataset(object):
    """A class representing a ICDAR2015 dataset."""

    def __init__(self, mode):
        print('Creating: {}'.format(cfg.dataset))
        self.name = cfg.data_dir
        self.mode = mode
        data_path = DatasetPath(mode, self.name)
        data_dir = data_path.get_data_dir()
        file_list = data_path.get_file_list()
        self.image_dir = data_dir
        self.gt_dir = file_list

    def get_roidb(self):
        """Return an roidb corresponding to the txt dataset. Optionally:
           - include ground truth boxes in the roidb
        """
        image_list = os.listdir(self.image_dir)
        image_list.sort()
        im_infos = []
        count = 0
        for image in image_list:
            prefix = image[:-4]
            if image.split('.')[-1] != 'jpg':
                continue
            img_name = os.path.join(self.image_dir, image)
            gt_name = os.path.join(self.gt_dir, 'gt_' + prefix + '.txt')
            easy_boxes = []
            hard_boxes = []
            boxes = []
            gt_obj = open(gt_name, 'r', encoding='UTF-8-sig')
            gt_txt = gt_obj.read()
            gt_split = gt_txt.split('\n')
            img = cv2.imread(img_name)
            f = False
            for gt_line in gt_split:
                gt_ind = gt_line.split(',')

                # can get the text information
                if len(gt_ind) > 3 and '###' not in gt_ind[8]:
                    pt1 = (int(gt_ind[0]), int(gt_ind[1]))
                    pt2 = (int(gt_ind[2]), int(gt_ind[3]))
                    pt3 = (int(gt_ind[4]), int(gt_ind[5]))
                    pt4 = (int(gt_ind[6]), int(gt_ind[7]))
                    edge1 = np.sqrt((pt1[0] - pt2[0]) * (pt1[0] - pt2[0]) + (
                        pt1[1] - pt2[1]) * (pt1[1] - pt2[1]))
                    edge2 = np.sqrt((pt2[0] - pt3[0]) * (pt2[0] - pt3[0]) + (
                        pt2[1] - pt3[1]) * (pt2[1] - pt3[1]))
                    angle = 0
                    if edge1 > edge2:
                        width = edge1
                        height = edge2
                        if pt1[0] - pt2[0] != 0:
                            angle = -np.arctan(
                                float(pt1[1] - pt2[1]) /
                                float(pt1[0] - pt2[0])) / np.pi * 180
                        else:
                            angle = 90.0
                    elif edge2 >= edge1:
                        width = edge2
                        height = edge1
                        # print pt2[0], pt3[0]
                        if pt2[0] - pt3[0] != 0:
                            angle = -np.arctan(
                                float(pt2[1] - pt3[1]) /
                                float(pt2[0] - pt3[0])) / np.pi * 180
                        else:
                            angle = 90.0
                    if angle < -45.0:
                        angle = angle + 180
                    x_ctr = float(pt1[0] + pt3[
                        0]) / 2  # pt1[0] + np.abs(float(pt1[0] - pt3[0])) / 2
                    y_ctr = float(pt1[1] + pt3[
                        1]) / 2  # pt1[1] + np.abs(float(pt1[1] - pt3[1])) / 2
                    if self.mode == 'val':
                        easy_boxes.append(
                            list(np.array([pt1, pt2, pt3, pt4]).reshape(8)))
                    else:
                        easy_boxes.append([x_ctr, y_ctr, width, height, angle])
                # can‘t get the text information    
                if len(gt_ind) > 3 and '###' in gt_ind[8]:
                    pt1 = (int(gt_ind[0]), int(gt_ind[1]))
                    pt2 = (int(gt_ind[2]), int(gt_ind[3]))
                    pt3 = (int(gt_ind[4]), int(gt_ind[5]))
                    pt4 = (int(gt_ind[6]), int(gt_ind[7]))
                    edge1 = np.sqrt((pt1[0] - pt2[0]) * (pt1[0] - pt2[0]) + (
                        pt1[1] - pt2[1]) * (pt1[1] - pt2[1]))
                    edge2 = np.sqrt((pt2[0] - pt3[0]) * (pt2[0] - pt3[0]) + (
                        pt2[1] - pt3[1]) * (pt2[1] - pt3[1]))
                    angle = 0
                    if edge1 > edge2:
                        width = edge1
                        height = edge2
                        if pt1[0] - pt2[0] != 0:
                            angle = -np.arctan(
                                float(pt1[1] - pt2[1]) /
                                float(pt1[0] - pt2[0])) / np.pi * 180
                        else:
                            angle = 90.0
                    elif edge2 >= edge1:
                        width = edge2
                        height = edge1
                        if pt2[0] - pt3[0] != 0:
                            angle = -np.arctan(
                                float(pt2[1] - pt3[1]) /
                                float(pt2[0] - pt3[0])) / np.pi * 180
                        else:
                            angle = 90.0
                    if angle < -45.0:
                        angle = angle + 180
                    x_ctr = float(pt1[0] + pt3[
                        0]) / 2  # pt1[0] + np.abs(float(pt1[0] - pt3[0])) / 2
                    y_ctr = float(pt1[1] + pt3[
                        1]) / 2  # pt1[1] + np.abs(float(pt1[1] - pt3[1])) / 2
                    if self.mode == 'val':
                        hard_boxes.append(
                            list(np.array([pt1, pt2, pt3, pt4]).reshape(8)))
                    else:
                        hard_boxes.append([x_ctr, y_ctr, width, height, angle])

            #print(easy_boxes)
            if self.mode == 'train':
                boxes.extend(easy_boxes)
                # hard box only get 1/3 for train
                boxes.extend(hard_boxes[0:int(len(hard_boxes) / 3)])
                is_difficult = [0] * len(easy_boxes)
                is_difficult.extend([1] * int(len(hard_boxes) / 3))
            else:
                boxes.extend(easy_boxes)
                boxes.extend(hard_boxes)
                is_difficult = [0] * len(easy_boxes)
                is_difficult.extend([1] * int(len(hard_boxes)))
            len_of_bboxes = len(boxes)
            #is_difficult = [0] * len(easy_boxes)
            #is_difficult.extend([1] * int(len(hard_boxes)))
            is_difficult = np.array(is_difficult).reshape(
                1, len_of_bboxes).astype(np.int32)
            if self.mode == 'train':
                gt_boxes = np.zeros((len_of_bboxes, 5), dtype=np.int32)
            else:
                gt_boxes = np.zeros((len_of_bboxes, 8), dtype=np.int32)
            gt_classes = np.zeros((len_of_bboxes), dtype=np.int32)
            is_crowd = np.zeros((len_of_bboxes), dtype=np.int32)
            for idx in range(len(boxes)):
                if self.mode == 'train':
                    gt_boxes[idx, :] = [
                        boxes[idx][0], boxes[idx][1], boxes[idx][2],
                        boxes[idx][3], boxes[idx][4]
                    ]
                else:
                    gt_boxes[idx, :] = [
                        boxes[idx][0], boxes[idx][1], boxes[idx][2],
                        boxes[idx][3], boxes[idx][4], boxes[idx][5],
                        boxes[idx][6], boxes[idx][7]
                    ]
                gt_classes[idx] = 1
            if gt_boxes.shape[0] <= 0:
                continue
            gt_boxes = gt_boxes.astype(np.float64)
            im_info = {
                'im_id': count,
                'gt_classes': gt_classes,
                'image': img_name,
                'boxes': gt_boxes,
                'height': img.shape[0],
                'width': img.shape[1],
                'is_crowd': is_crowd,
                'is_difficult': is_difficult
            }
            im_infos.append(im_info)
            count += 1

        return im_infos


class ICDAR2017Dataset(object):
    """A class representing a ICDAR2017 dataset."""

    def __init__(self, mode):
        print('Creating: {}'.format(cfg.dataset))
        self.name = cfg.data_dir
        #print('**************', self.name)
        self.mode = mode
        data_path = DatasetPath(mode, self.name)
        data_dir = data_path.get_data_dir()
        #print("&**************", data_dir)
        file_list = data_path.get_file_list()
        self.image_dir = data_dir
        self.gt_dir = file_list

    def get_roidb(self):
        """Return an roidb corresponding to the json dataset. Optionally:
           - include ground truth boxes in the roidb
        """
        image_list = os.listdir(self.image_dir)
        image_list.sort()
        im_infos = []
        count = 0
        class_idx = 1
        class_name = {}
        post_fix = ['jpg', 'bmp', 'png']
        if self.mode == 'val':
            labels_map = get_labels_maps()
        for image in image_list:
            prefix = image[:-4]
            #print(image)

            if image.split('.')[-1] not in post_fix:
                continue
            img_name = os.path.join(self.image_dir, image)
            gt_name = os.path.join(self.gt_dir, 'gt_' + prefix + '.txt')
            gt_classes = []
            #boxes = []
            #hard_boxes = []
            boxes = []
            gt_obj = open(gt_name, 'r', encoding='UTF-8-sig')
            gt_txt = gt_obj.read()
            gt_split = gt_txt.split('\n')
            img = cv2.imread(img_name)
            f = False
            for gt_line in gt_split:
                gt_ind = gt_line.split(',')
                # can get the text information
                if len(gt_ind) > 3:
                    if self.mode == 'val':
                        gt_classes.append(labels_map[gt_ind[-1]])
                    else:
                        if gt_ind[-1] not in class_name:
                            class_name[gt_ind[-1]] = class_idx
                            #gt_classes.append(class_idx)
                            class_idx += 1
                        gt_classes.append(class_name[gt_ind[-1]])
                    pt1 = (int(gt_ind[0]), int(gt_ind[1]))
                    pt2 = (int(gt_ind[2]), int(gt_ind[3]))
                    pt3 = (int(gt_ind[4]), int(gt_ind[5]))
                    pt4 = (int(gt_ind[6]), int(gt_ind[7]))
                    edge1 = np.sqrt((pt1[0] - pt2[0]) * (pt1[0] - pt2[0]) + (
                        pt1[1] - pt2[1]) * (pt1[1] - pt2[1]))
                    edge2 = np.sqrt((pt2[0] - pt3[0]) * (pt2[0] - pt3[0]) + (
                        pt2[1] - pt3[1]) * (pt2[1] - pt3[1]))
                    angle = 0
                    if edge1 > edge2:
                        width = edge1
                        height = edge2
                        if pt1[0] - pt2[0] != 0:
                            angle = -np.arctan(
                                float(pt1[1] - pt2[1]) /
                                float(pt1[0] - pt2[0])) / np.pi * 180
                        else:
                            angle = 90.0
                    elif edge2 >= edge1:
                        width = edge2
                        height = edge1
                        # print pt2[0], pt3[0]
                        if pt2[0] - pt3[0] != 0:
                            angle = -np.arctan(
                                float(pt2[1] - pt3[1]) /
                                float(pt2[0] - pt3[0])) / np.pi * 180
                        else:
                            angle = 90.0
                    if angle < -45.0:
                        angle = angle + 180
                    x_ctr = float(pt1[0] + pt3[
                        0]) / 2  # pt1[0] + np.abs(float(pt1[0] - pt3[0])) / 2
                    y_ctr = float(pt1[1] + pt3[
                        1]) / 2  # pt1[1] + np.abs(float(pt1[1] - pt3[1])) / 2
                    if self.mode == 'val':
                        boxes.append(
                            list(np.array([pt1, pt2, pt3, pt4]).reshape(8)))
                    else:
                        boxes.append([x_ctr, y_ctr, width, height, angle])
            len_of_bboxes = len(boxes)
            #print(len_of_bboxes)
            is_difficult = np.zeros((len_of_bboxes, 1), dtype=np.int32)
            if self.mode == 'train':
                gt_boxes = np.zeros((len_of_bboxes, 5), dtype=np.int32)
            else:
                gt_boxes = np.zeros((len_of_bboxes, 8), dtype=np.int32)
            gt_classes = np.array(gt_classes).reshape(len_of_bboxes, 1)
            is_crowd = np.zeros((len_of_bboxes), dtype=np.int32)
            for idx in range(len(boxes)):
                if self.mode == 'train':
                    gt_boxes[idx, :] = [
                        boxes[idx][0], boxes[idx][1], boxes[idx][2],
                        boxes[idx][3], boxes[idx][4]
                    ]
                else:
                    gt_boxes[idx, :] = [
                        boxes[idx][0], boxes[idx][1], boxes[idx][2],
                        boxes[idx][3], boxes[idx][4], boxes[idx][5],
                        boxes[idx][6], boxes[idx][7]
                    ]
                #gt_classes[idx] = 1
            if gt_boxes.shape[0] <= 0:
                continue
            gt_boxes = gt_boxes.astype(np.float64)
            im_info = {
                'im_id': count,
                'gt_classes': gt_classes,
                'image': img_name,
                'boxes': gt_boxes,
                'height': img.shape[0],
                'width': img.shape[1],
                'is_crowd': is_crowd,
                'is_difficult': is_difficult
            }
            im_infos.append(im_info)
            count += 1
            if self.mode == 'train':
                with open(os.path.join(cfg.data_dir, 'label_list'), 'w') as g:
                    for k in class_name:
                        g.write(k + "\n")
        return im_infos


def get_labels_maps():
    labels_map = {}
    with open(os.path.join(cfg.data_dir, 'label_list')) as f:
        lines = f.readlines()
        for idx, line in enumerate(lines):
            labels_map[line.strip()] = idx + 1
        return labels_map