diff --git a/NewsSpider/NewsSpider.py b/NewsSpider/NewsSpider.py index 38ba4dac..30c8bcac 100644 --- a/NewsSpider/NewsSpider.py +++ b/NewsSpider/NewsSpider.py @@ -1,7 +1,7 @@ # -*- coding: utf-8 -*- import os -import sys -import urllib2 +#import sys +#import urllib2 import requests import re from lxml import etree @@ -36,7 +36,7 @@ def New_Page_Info(new_page): def Spider(url): i = 0 - print "downloading ", url + print("downloading ", url) myPage = requests.get(url).content.decode("gbk") # myPage = urllib2.urlopen(url).read().decode("gbk") myPageResults = Page_Info(myPage) @@ -45,7 +45,7 @@ def Spider(url): StringListSave(save_path, filename, myPageResults) i += 1 for item, url in myPageResults: - print "downloading ", url + print("downloading ", url) new_page = requests.get(url).content.decode("gbk") # new_page = urllib2.urlopen(url).read().decode("gbk") newPageResults = New_Page_Info(new_page) @@ -55,7 +55,7 @@ def Spider(url): if __name__ == '__main__': - print "start" + print("start") start_url = "http://news.163.com/rank/" Spider(start_url) - print "end" \ No newline at end of file + print("end") \ No newline at end of file