Skip to content

Commit d3c44af

Browse files
committed
[update] valid proxy
1 parent 0e079b4 commit d3c44af

File tree

2 files changed

+24
-19
lines changed

2 files changed

+24
-19
lines changed

Schedule/ProxyCheck.py

Lines changed: 6 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,6 @@
1313
__author__ = 'J_hao'
1414

1515
import sys
16-
import threading
1716
from threading import Thread
1817

1918
sys.path.append('../')
@@ -22,14 +21,14 @@
2221
from Manager.ProxyManager import ProxyManager
2322
from Util.LogHandler import LogHandler
2423

25-
FAIL_COUNT = 2 # 校验失败次数, 超过次数删除代理
24+
FAIL_COUNT = 1 # 校验失败次数, 超过次数删除代理
2625

2726

2827
class ProxyCheck(ProxyManager, Thread):
2928
def __init__(self, queue, item_dict):
3029
ProxyManager.__init__(self)
3130
Thread.__init__(self)
32-
self.log = LogHandler('proxy_check')
31+
self.log = LogHandler('proxy_check', file=False) # 多线程同时写一个日志文件会有问题
3332
self.queue = queue
3433
self.item_dict = item_dict
3534

@@ -44,11 +43,11 @@ def run(self):
4443
self.db.put(proxy, num=int(count) - 1)
4544
else:
4645
pass
47-
print('ProxyCheck: {} validation pass'.format(proxy))
46+
self.log.info('ProxyCheck: {} validation pass'.format(proxy))
4847
else:
49-
print('ProxyCheck: {} validation fail'.format(proxy))
50-
if count and int(count) > FAIL_COUNT:
51-
print('ProxyCheck: {} fail too many, delete!'.format(proxy))
48+
self.log.info('ProxyCheck: {} validation fail'.format(proxy))
49+
if count and int(count) + 1 >= FAIL_COUNT:
50+
self.log.info('ProxyCheck: {} fail too many, delete!'.format(proxy))
5251
self.db.delete(proxy)
5352
else:
5453
self.db.put(proxy, num=int(count) + 1)

Schedule/ProxyValidSchedule.py

Lines changed: 18 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -13,19 +13,24 @@
1313
__author__ = 'JHao'
1414

1515
import sys
16+
import time
17+
18+
try:
19+
from Queue import Queue # py3
20+
except:
21+
from queue import Queue # py2
1622

1723
sys.path.append('../')
1824

1925
from Schedule.ProxyCheck import ProxyCheck
2026
from Manager.ProxyManager import ProxyManager
21-
from queue import Queue
22-
import time
2327

2428

2529
class ProxyValidSchedule(ProxyManager, object):
2630
def __init__(self):
2731
ProxyManager.__init__(self)
2832
self.queue = Queue()
33+
self.proxy_item = dict()
2934

3035
def __validProxy(self, threads=10):
3136
"""
@@ -35,7 +40,7 @@ def __validProxy(self, threads=10):
3540
"""
3641
thread_list = list()
3742
for index in range(threads):
38-
thread_list.append(ProxyCheck(self.queue, self.item_dict))
43+
thread_list.append(ProxyCheck(self.queue, self.proxy_item))
3944

4045
for thread in thread_list:
4146
thread.daemon = True
@@ -45,19 +50,20 @@ def __validProxy(self, threads=10):
4550
thread.join()
4651

4752
def main(self):
48-
self.put_queue()
53+
self.putQueue()
4954
while True:
50-
if self.queue.qsize():
55+
if not self.queue.empty():
56+
self.log.info("Start valid useful proxy")
5157
self.__validProxy()
5258
else:
53-
print('Time sleep 5 minutes.')
54-
time.sleep(60 * 1)
55-
self.put_queue()
59+
self.log.info('Valid Complete! sleep 5 minutes.')
60+
time.sleep(60 * 5)
61+
self.putQueue()
5662

57-
def put_queue(self):
63+
def putQueue(self):
5864
self.db.changeTable(self.useful_proxy_queue)
59-
self.item_dict = self.db.getAll()
60-
for item in self.item_dict:
65+
self.proxy_item = self.db.getAll()
66+
for item in self.proxy_item:
6167
self.queue.put(item)
6268

6369

@@ -68,4 +74,4 @@ def run():
6874

6975
if __name__ == '__main__':
7076
p = ProxyValidSchedule()
71-
p.main()
77+
p.main()

0 commit comments

Comments
 (0)