Skip to content

Commit 42b4320

Browse files
committed
refactor(hsv): refactor hsv image search
1 parent be3c81a commit 42b4320

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

48 files changed

+449
-7557
lines changed
File renamed without changes.

hsv_image_search/README.md

+76
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,76 @@
1+
# 基于HSV颜色特征的图像搜索引擎
2+
3+
### DP:
4+
* 编写时间: 2018-11-15
5+
* 编写人: Mousse
6+
* 联系邮箱: zibuyu1995@gmail.com
7+
8+
### 系统环境:
9+
* python3.6
10+
* opencv 3.4.2
11+
12+
### 文件结构:
13+
```
14+
├── README.md
15+
├── app.py # flask 运行代码
16+
├── config
17+
│   ├── __init__.py
18+
│   └── config.py # 配置文件
19+
├── generate_index.py # 索引文件生成
20+
├── mlibs
21+
│   ├── __init__.py
22+
│   ├── hsv_features.py # HSV 特征提取
23+
│   └── image_match.py # HSV 特征匹配
24+
├── static
25+
│   ├── dataset # 数据集
26+
├── dataset.db # 序列化后数据集(需要执行generate_index.py后生成)
27+
├── templates
28+
│   ├── index.html
29+
│   └── search.html
30+
└── test
31+
└── 100900.png # 测试图片
32+
└── requirements.txt # 依赖
33+
```
34+
35+
### 项目运行:
36+
> 创建虚拟环境后请依据场景修改config/config.py 配置
37+
38+
* 安装依赖
39+
```bash
40+
pip install -r requirements.txt -i https://pypi.douban.com/simple/
41+
```
42+
43+
* 生成索引文件
44+
```bash
45+
python generate_index.py
46+
```
47+
48+
* 运行项目
49+
```bash
50+
# 常规运行
51+
python app.py
52+
# gunicorn 运行
53+
gunicorn --workers=2 --bind=0.0.0.0:5555 app:app
54+
```
55+
56+
### 测试:
57+
> GET http://0.0.0.0:5555/
58+
59+
* 环境: macOS
60+
![image](https://user-images.githubusercontent.com/17525759/48668695-1b5efb80-eb2f-11e8-895b-4c9c4c1a6105.png)
61+
62+
### 常见错误处理:
63+
* may have been in progress in another thread when fork() was called.
64+
```bash
65+
export OBJC_DISABLE_INITIALIZE_FORK_SAFETY=YES
66+
```
67+
68+
### 备注:
69+
* HSV 算法不适合于颜色不鲜明的图像匹配
70+
* image_bins: (8, 3, 3):最佳颜色特征点选择
71+
* 有什么问题请联系我: zibuyu1995@gmail.com
72+
73+
### reference
74+
* 数据集下载: https://pan.baidu.com/s/1bnhR65SyfONoKTK90T57tQ 密码:r0d2
75+
* [flask](http://flask.pocoo.org/docs/0.12/)
76+

hsv_image_search/app.py

+83
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,83 @@
1+
#!/usr/bin/env python
2+
# coding=utf-8
3+
4+
import time
5+
from operator import itemgetter
6+
7+
from flask import Flask, render_template, request, abort
8+
from tinydb import TinyDB
9+
from werkzeug.contrib.cache import SimpleCache
10+
11+
from config.config import (
12+
image_bins, site_name, upload_image_path
13+
)
14+
from mlibs.hsv_features import get_image_feature
15+
from mlibs.image_match import chi_square
16+
17+
18+
app = Flask(__name__)
19+
cache = SimpleCache()
20+
21+
22+
@app.route('/')
23+
def index():
24+
return render_template('index.html', site_name=site_name)
25+
26+
27+
@app.route("/search")
28+
def view_search_result():
29+
response_dict = cache.get("response_dict")
30+
if not response_dict:
31+
response_dict = {}
32+
return render_template('search.html', **response_dict)
33+
34+
35+
@app.route("/search", methods=["POST"])
36+
def new_search():
37+
upload_image = request.files.get("image")
38+
if not upload_image:
39+
raise abort(404)
40+
image_types = ['image/jpeg', 'image/jpg', 'image/png']
41+
if upload_image.content_type not in image_types:
42+
raise abort(404)
43+
upload_image_type = upload_image.content_type.split('/')[-1]
44+
file_name = str(time.time())[:10] + '.' + upload_image_type
45+
file_path = upload_image_path + file_name
46+
upload_image.save(file_path)
47+
search_results = image_search(file_path)[:5]
48+
response_dict = {
49+
'site_name': site_name,
50+
'upload_image': file_name,
51+
'search_results': search_results
52+
}
53+
cache.set("response_dict", response_dict)
54+
return render_template('search.html', **response_dict)
55+
56+
57+
def image_search(image_path=None):
58+
""" 图像搜索 """
59+
global cache_features
60+
61+
match_results = []
62+
match_results_append = match_results.append
63+
search_feature = get_image_feature(image_path, image_bins)[1]
64+
for image_uid, feature in cache_features.items():
65+
distance = chi_square(feature, search_feature)
66+
match_results_append((image_uid, distance))
67+
match_results = sorted(match_results, key=itemgetter(1))
68+
return match_results
69+
70+
71+
if __name__ != '__main__':
72+
# gunicorn 运行
73+
db = TinyDB('./static/dataset.db')
74+
table = db.table('feature')
75+
cache_features = table.all()[0]
76+
77+
78+
if __name__ == '__main__':
79+
db = TinyDB('./static/dataset.db')
80+
table = db.table('feature')
81+
cache_features = table.all()[0]
82+
app.run(host='0.0.0.0', port=5555)
83+
File renamed without changes.

hsv_image_search/config/config.py

+13
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,13 @@
1+
# coding: utf-8
2+
3+
import os
4+
from multiprocessing import cpu_count
5+
6+
site_name = '基于HSV图像搜索引擎' # 网站标题
7+
project_path = os.path.abspath(os.path.join(os.path.dirname(__file__), os.pardir))
8+
cpu_count = cpu_count()
9+
10+
upload_image_path = os.path.join(project_path, 'static/uploads/') # 上传图片保存地址
11+
dataset_db_path = os.path.join(project_path, 'static/dataset.db') # 序列化后数据集保存地址
12+
dataset_path = os.path.join(project_path, 'static/dataset/') # 数据集图片保存地址
13+
image_bins = (8, 3, 3) # HSV所占权重

hsv_image_search/generate_index.py

+59
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,59 @@
1+
#!/usr/bin/env python
2+
# coding=utf-8
3+
4+
"""
5+
Time : 2018-11-18
6+
Author : Mousse
7+
email: zibuyu1995@gmail.com
8+
"""
9+
10+
import asyncio
11+
import glob
12+
import ujson
13+
from concurrent.futures import ProcessPoolExecutor
14+
15+
import uvloop
16+
from tinydb import TinyDB
17+
18+
from config.config import dataset_path, cpu_count, image_bins
19+
from mlibs.hsv_features import get_image_feature
20+
21+
22+
asyncio.set_event_loop_policy(uvloop.EventLoopPolicy())
23+
24+
25+
def feature_persistence(task_results: list) -> int:
26+
""" 图像特征集持久化到key-value数据库 """
27+
feature_dict = dict(task_result.result() for task_result in task_results)
28+
db = TinyDB('./static/dataset.db')
29+
table = db.table('feature')
30+
table.insert(feature_dict)
31+
feature_count = len(feature_dict)
32+
return feature_count
33+
34+
35+
async def generate_image_index(eve_loop, processes_executor):
36+
""" 多进程生成图像索引 """
37+
image_feature_tasks = []
38+
task_append = image_feature_tasks.append
39+
for image_path in glob.glob(dataset_path + "/*.png"):
40+
task_append(
41+
eve_loop.run_in_executor(
42+
processes_executor, get_image_feature, image_path, image_bins
43+
)
44+
)
45+
task_results, _ = await asyncio.wait(image_feature_tasks)
46+
feature_count = feature_persistence(task_results)
47+
print(f"{feature_count}幅图像完成索引")
48+
49+
50+
if __name__ == '__main__':
51+
asyncio.set_event_loop_policy(uvloop.EventLoopPolicy())
52+
executor = ProcessPoolExecutor(max_workers=cpu_count)
53+
event_loop = asyncio.get_event_loop()
54+
try:
55+
event_loop.run_until_complete(
56+
generate_image_index(event_loop, executor)
57+
)
58+
finally:
59+
event_loop.close()
File renamed without changes.
+66
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,66 @@
1+
# coding=utf-8
2+
3+
4+
import numpy as np
5+
import cv2
6+
7+
8+
def get_image_feature(image_path, bins: tuple) -> tuple:
9+
"""
10+
计算图像hsv特征值
11+
:param image_path:
12+
:param bins: HSV 所占比重
13+
"""
14+
image = cv2.imread(image_path)
15+
image_uid = image_path[image_path.rfind("/") + 1:][:6]
16+
image = cv2.cvtColor(image, cv2.COLOR_BGR2HSV)
17+
# 获取图像中心点, 切割图像
18+
(h, w) = image.shape[:2]
19+
(c_x, c_y) = (int(w * 0.5), int(h * 0.5))
20+
segments = [
21+
(0, c_x, 0, c_y), (c_x, w, 0, c_y),
22+
(c_x, w, c_y, h), (0, c_x, c_y, h)
23+
]
24+
25+
# 绘制椭圆轮廊
26+
(axes_x, axes_y) = (int(w * 0.75 / 2), int(h * 0.75 / 2))
27+
ellipse_mask = np.zeros(image.shape[:2], dtype="uint8")
28+
cv2.ellipse(
29+
ellipse_mask, (c_x, c_y), (axes_x, axes_y),
30+
0.0, 0.0, 360.0, (255, 255, 255), -1
31+
)
32+
33+
features = [] # 图像特征值
34+
features_extend = features.extend
35+
for (start_x, end_x, start_y, end_y) in segments:
36+
corner_mask = np.zeros(image.shape[:2], dtype="uint8")
37+
cv2.rectangle(
38+
corner_mask, (start_x, start_y),
39+
(end_x, end_y), 255, -1
40+
)
41+
# 逆时针计算图像边角颜色直方图
42+
corner_mask = cv2.subtract(corner_mask, ellipse_mask)
43+
image_histogram = calculate_histogram(image, corner_mask, bins)
44+
features_extend(image_histogram)
45+
# 计算中心椭圆颜色直方图
46+
image_histogram = calculate_histogram(image, ellipse_mask, bins)
47+
features_extend(image_histogram)
48+
convert_features = np.array(features).astype(float)
49+
return image_uid, convert_features
50+
51+
52+
def calculate_histogram(image, mask, bins: tuple) -> list:
53+
"""
54+
计算hsv颜色直方图
55+
:param image: 输入图像
56+
:param mask: 计算区域
57+
:param bins: bins: HSV 所占比重
58+
"""
59+
60+
hist = cv2.calcHist(
61+
[image], [0, 1, 2], mask, bins,
62+
[0, 180, 0, 256, 0, 256]
63+
)
64+
# 颜色直方图归一化
65+
histogram = cv2.normalize(hist, hist).flatten()
66+
return histogram

hsv_image_search/mlibs/image_match.py

+21
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,21 @@
1+
#!/usr/bin/env python
2+
# coding: utf-8
3+
4+
import numpy as np
5+
6+
7+
def chi_square(query_feature, match_feature) -> float:
8+
"""
9+
卡方校验
10+
:param query_feature: 搜索图像
11+
:param match_feature: 匹配图像
12+
:return: 卡方距离
13+
"""
14+
15+
distance = 0.5 * np.sum(
16+
[((a - b) ** 2) / (a + b + 1e-10)
17+
for (a, b) in zip(match_feature, query_feature)]
18+
)
19+
return float(distance)
20+
21+

hsv_image_search/requirements.txt

+10
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,10 @@
1+
Flask==1.0.2
2+
gunicorn==19.9.0
3+
Werkzeug==0.14.1
4+
async-timeout==3.0.1
5+
httptools==0.0.11
6+
MarkupSafe==1.0
7+
numpy==1.15.0
8+
opencv-python==3.4.2.17
9+
tinydb==3.11.1
10+
uvloop==0.11.1

hsv_image_search/static/.gitignore

+1
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+

hsv_image_search/static/dataset.db

+1
Large diffs are not rendered by default.
File renamed without changes.
+50
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,50 @@
1+
<!DOCTYPE html>
2+
<html lang="en">
3+
<head>
4+
<meta charset="UTF-8">
5+
<title>{{ site_name }}</title>
6+
</head>
7+
<style>
8+
.footer {
9+
left: 0; bottom: 0;
10+
width: 100%; color: white; text-align: center;
11+
}
12+
.mousse {
13+
background-image: url("/static/css/background.jpg");
14+
-webkit-background-size: cover;
15+
-moz-background-size: cover;
16+
-o-background-size: cover;
17+
background-size: cover;
18+
color: #000000;
19+
}
20+
</style>
21+
22+
23+
<body class="mousse">
24+
<h1 align="center">{{ site_name }}</h1>
25+
<header class="intro" align="center">
26+
<div class="intro-body">
27+
<div class="container">
28+
<h2 style="color: #c1d0eaa8">原图</h2>
29+
<img src="/static/uploads/{{ upload_image }}" style="height: 250px;">
30+
<h2 style="color: #c1d0eaa8">搜索结果</h2>
31+
<table width="100%" border="0" cellspacing="0" cellpadding="0">
32+
{% for search_result in search_results %}
33+
<tr>
34+
<td align="center">卡方距离(越小越好):{{ search_result[1] }} </td>
35+
</tr>
36+
<tr>
37+
<td align="center"><img src="/static/dataset/{{ search_result[0] }}.png" width="300" height="300" /> </td>
38+
</tr>
39+
{% endfor %}
40+
</table>
41+
</div>
42+
</div>
43+
</header>
44+
45+
46+
<footer class="footer">
47+
<p>Mousse by 2018-11-3 © IG NB</p>
48+
</footer>
49+
</body>
50+
</html>
File renamed without changes.

0 commit comments

Comments
 (0)