PaddleCV/tracking/pytracking/features/augmentation.py

import numpy as np
import math

from paddle.fluid import layers

import cv2 as cv

from pytracking.features.preprocessing import numpy_to_paddle, paddle_to_numpy
from pytracking.libs.Fconv2d import Fconv2d
from pytracking.libs.paddle_utils import PTensor, _padding, n2p


class Transform:
    """Base data augmentation transform class."""

    def __init__(self, output_sz=None, shift=None):
        self.output_sz = output_sz
        self.shift = (0, 0) if shift is None else shift

    def __call__(self, image):
        raise NotImplementedError

    def crop_to_output(self, image, shift=None):
        if isinstance(image, PTensor):
            imsz = image.shape[2:]
        else:
            imsz = image.shape[:2]

        if self.output_sz is None:
            pad_h = 0
            pad_w = 0
        else:
            pad_h = (self.output_sz[0] - imsz[0]) / 2
            pad_w = (self.output_sz[1] - imsz[1]) / 2
        if shift is None:
            shift = self.shift
        pad_left = math.floor(pad_w) + shift[1]
        pad_right = math.ceil(pad_w) - shift[1]
        pad_top = math.floor(pad_h) + shift[0]
        pad_bottom = math.ceil(pad_h) - shift[0]

        if isinstance(image, PTensor):
            return _padding(
                image, (pad_left, pad_right, pad_top, pad_bottom),
                mode='replicate')
        else:
            return _padding(
                image, (0, 0, pad_left, pad_right, pad_top, pad_bottom),
                mode='replicate')


class Identity(Transform):
    """Identity transformation."""

    def __call__(self, image):
        return self.crop_to_output(image)


class FlipHorizontal(Transform):
    """Flip along horizontal axis."""

    def __call__(self, image):
        if isinstance(image, PTensor):
            return self.crop_to_output(layers.reverse(image, 3))
        else:
            return self.crop_to_output(np.fliplr(image))


class FlipVertical(Transform):
    """Flip along vertical axis."""

    def __call__(self, image: PTensor):
        if isinstance(image, PTensor):
            return self.crop_to_output(layers.reverse(image, 2))
        else:
            return self.crop_to_output(np.flipud(image))


class Translation(Transform):
    """Translate."""

    def __init__(self, translation, output_sz=None, shift=None):
        super().__init__(output_sz, shift)
        self.shift = (self.shift[0] + translation[0],
                      self.shift[1] + translation[1])

    def __call__(self, image):
        return self.crop_to_output(image)


class Scale(Transform):
    """Scale."""

    def __init__(self, scale_factor, output_sz=None, shift=None):
        super().__init__(output_sz, shift)
        self.scale_factor = scale_factor

    def __call__(self, image):
        # Calculate new size. Ensure that it is even so that crop/pad becomes easier
        h_orig, w_orig = image.shape[2:]

        if h_orig != w_orig:
            raise NotImplementedError

        h_new = round(h_orig / self.scale_factor)
        h_new += (h_new - h_orig) % 2
        w_new = round(w_orig / self.scale_factor)
        w_new += (w_new - w_orig) % 2

        if isinstance(image, PTensor):
            image_resized = layers.resize_bilinear(
                image, [h_new, w_new], align_corners=False)
        else:
            image_resized = cv.resize(
                image, (w_new, h_new), interpolation=cv.INTER_LINEAR)
        return self.crop_to_output(image_resized)


class Affine(Transform):
    """Affine transformation."""

    def __init__(self, transform_matrix, output_sz=None, shift=None):
        super().__init__(output_sz, shift)
        self.transform_matrix = transform_matrix

    def __call__(self, image, crop=True):
        if isinstance(image, PTensor):
            return self.crop_to_output(
                numpy_to_paddle(self(
                    paddle_to_numpy(image), crop=False)))
        else:
            warp = cv.warpAffine(
                image,
                self.transform_matrix,
                image.shape[1::-1],
                borderMode=cv.BORDER_REPLICATE)
            if crop:
                return self.crop_to_output(warp)
            else:
                return warp


class Rotate(Transform):
    """Rotate with given angle."""

    def __init__(self, angle, output_sz=None, shift=None):
        super().__init__(output_sz, shift)
        self.angle = math.pi * angle / 180

    def __call__(self, image, crop=True):
        if isinstance(image, PTensor):
            return self.crop_to_output(
                numpy_to_paddle(self(
                    paddle_to_numpy(image), crop=False)))
        else:
            c = (np.expand_dims(np.array(image.shape[:2]), 1) - 1) / 2
            R = np.array([[math.cos(self.angle), math.sin(self.angle)],
                          [-math.sin(self.angle), math.cos(self.angle)]])
            H = np.concatenate([R, c - R @c], 1)
            warp = cv.warpAffine(
                image, H, image.shape[1::-1], borderMode=cv.BORDER_REPLICATE)
            if crop:
                return self.crop_to_output(warp)
            else:
                return warp


class Blur(Transform):
    """Blur with given sigma (can be axis dependent)."""

    def __init__(self, sigma, output_sz=None, shift=None):
        super().__init__(output_sz, shift)
        if isinstance(sigma, (float, int)):
            sigma = (sigma, sigma)
        self.sigma = sigma
        self.filter_size = [math.ceil(2 * s) for s in self.sigma]

        x_coord = [
            np.arange(
                -sz, sz + 1, 1, dtype='float32') for sz in self.filter_size
        ]
        self.filter_np = [
            np.exp(0 - (x * x) / (2 * s**2))
            for x, s in zip(x_coord, self.sigma)
        ]
        self.filter_np[0] = np.reshape(
            self.filter_np[0], [1, 1, -1, 1]) / np.sum(self.filter_np[0])
        self.filter_np[1] = np.reshape(
            self.filter_np[1], [1, 1, 1, -1]) / np.sum(self.filter_np[1])

    def __call__(self, image):
        if isinstance(image, PTensor):
            sz = image.shape[2:]
            filter = [n2p(f) for f in self.filter_np]
            im1 = Fconv2d(
                layers.reshape(image, [-1, 1, sz[0], sz[1]]),
                filter[0],
                padding=(self.filter_size[0], 0))
            return self.crop_to_output(
                layers.reshape(
                    Fconv2d(
                        im1, filter[1], padding=(0, self.filter_size[1])),
                    [1, -1, sz[0], sz[1]]))
        else:
            return paddle_to_numpy(self(numpy_to_paddle(image)))