-
Notifications
You must be signed in to change notification settings - Fork 23
/
Copy pathfeature_extractor_holistic.py
154 lines (115 loc) · 5.49 KB
/
feature_extractor_holistic.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
# =====================================================================
# Copyright (C) 2023 Stefan Schubert, stefan.schubert@etit.tu-chemnitz.de
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
# =====================================================================
#
import numpy as np
from typing import List
from .feature_extractor import FeatureExtractor
class AlexNetConv3Extractor(FeatureExtractor):
def __init__(self, nDims: int = 4096):
import torch
from torchvision import transforms
self.nDims = nDims
# load alexnet
self.model = torch.hub.load('pytorch/vision:v0.10.0', 'alexnet', pretrained=True)
# select conv3
self.model = self.model.features[:7]
# preprocess images
self.preprocess = transforms.Compose([
transforms.ToPILImage(),
transforms.Resize([224, 224]),
# transforms.CenterCrop(224),
transforms.ToTensor(),
transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
])
if torch.cuda.is_available():
print('Using GPU')
self.device = torch.device("cuda")
elif torch.backends.mps.is_available() and torch.backends.mps.is_built():
print('Using MPS')
self.device = torch.device("mps")
else:
print('Using CPU')
self.device = torch.device("cpu")
self.model.to(self.device)
def compute_features(self, imgs: List[np.ndarray]) -> List[np.ndarray]:
import torch
imgs_torch = [self.preprocess(img) for img in imgs]
imgs_torch = torch.stack(imgs_torch, dim=0)
imgs_torch = imgs_torch.to(self.device)
with torch.no_grad():
output = self.model(imgs_torch)
output = output.to('cpu').numpy()
Ds = output.reshape([len(imgs), -1])
rng = np.random.default_rng(seed=0)
Proj = rng.standard_normal([Ds.shape[1], self.nDims], 'float32')
Proj = Proj / np.linalg.norm(Proj , axis=1, keepdims=True)
Ds = Ds @ Proj
return Ds
class HDCDELF(FeatureExtractor):
def __init__(self):
from .feature_extractor_local import DELF
self.DELF = DELF() # local DELF descriptor
def compute_features(self, imgs: List[np.ndarray]) -> np.ndarray:
from feature_aggregation.hdc import HDC
D_local = self.DELF.compute_features(imgs)
D_holistic = HDC(D_local).compute_holistic()
return D_holistic
# sum of absolute differences (SAD) [Milford and Wyeth (2012). "SeqSLAM: Visual
# Route-Based Navigation for Sunny Summer Days and Stormy Winter Nights". ICRA.]
class SAD(FeatureExtractor):
def __init__(self, nPixels: int = 2048, patchLength: int = 8):
self.nPixels = nPixels # number pixels in downsampled image
self.patchLength = patchLength # side length of patches for patch normalization
self.imshapeDownsampled = None
def compute_features(self, imgs: List[np.ndarray]) -> np.ndarray:
# determine new image shape to obtain roughly self.nPixels pixels and
# image dimensions that are a multiple of self.patchLength
if self.imshapeDownsampled is None:
[h,w,_] = np.array(imgs[0].shape)
k = np.sqrt(self.nPixels / (h * w))
h = np.ceil(k * h)
h -= np.mod(h, self.patchLength)
w = np.ceil(k * w)
w -= np.mod(w, self.patchLength)
if np.abs(self.nPixels - h*w) > np.abs(self.nPixels - (h+self.patchLength)*(w+self.patchLength)):
h += self.patchLength
w += self.patchLength
self.imshapeDownsampled = [int(h), int(w)]
# grayscale conversion and downsampling
from torchvision import transforms
preprocess = transforms.Compose([
transforms.ToPILImage(),
transforms.Grayscale(),
transforms.Resize(self.imshapeDownsampled),
])
imgs = [np.array(preprocess(img)) for img in imgs]
# patch normalization
Ds = [self.__patch_normalize(img).flatten() for img in imgs]
Ds = np.array(Ds).astype('float32')
return Ds
def __patch_normalize(self, img: np.ndarray) -> np.ndarray:
np.seterr(divide='ignore', invalid='ignore') # ignore potential division by 0
img = img.astype('float32')
[h,w] = img.shape
for i_h in range(h // self.patchLength):
for i_w in range(w // self.patchLength):
patch = img[i_h*self.patchLength:(i_h+1)*self.patchLength, i_w*self.patchLength:(i_w+1)*self.patchLength]
patch_normalized = 255 * ((patch - patch.min()) / (patch.max() - patch.min()))
patch = patch_normalized.round()
img[i_h*self.patchLength:(i_h+1)*self.patchLength, i_w*self.patchLength:(i_w+1)*self.patchLength] = patch
np.seterr(divide='warn', invalid='warn')
return img