diff --git a/fun/pipelines.py b/fun/pipelines.py index 1effc8b..32f687a 100644 --- a/fun/pipelines.py +++ b/fun/pipelines.py @@ -15,9 +15,24 @@ def process_item(self, item, spider): images = [] dir_path = '%s/%s' % (settings.IMAGES_STORE, spider.name) + request_data = {'allow_redirects': False, + 'auth': None, + 'cert': None, + 'data': {}, + 'files': {}, + 'headers': {'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_11_5) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/54.0.2840.98 Safari/537.36'}, + 'method': 'get', + 'params': {}, + 'proxies': {}, + 'stream': True, + 'timeout': 30, + 'url': '', + 'verify': True} + if not os.path.exists(dir_path): os.makedirs(dir_path) for image_url in item['image_urls']: + request_data['url'] = image_url us = image_url.split('/')[3:] image_file_name = '_'.join(us) file_path = '%s/%s' % (dir_path, image_file_name) @@ -26,7 +41,7 @@ def process_item(self, item, spider): continue with open(file_path, 'wb') as handle: - response = requests.get(image_url, stream=True) + response = requests.request(**request_data) for block in response.iter_content(1024): if not block: break diff --git a/fun/settings.py b/fun/settings.py index 6e6e904..7ad26d2 100644 --- a/fun/settings.py +++ b/fun/settings.py @@ -18,4 +18,6 @@ IMAGES_STORE = '/tmp/images' -DOWNLOAD_DELAY = 0.25 # 250 ms of delay \ No newline at end of file +DOWNLOAD_DELAY = 0.25 # 250 ms of delay + +USER_AGENT = 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_11_5) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/54.0.2840.98 Safari/537.36'