File tree 1 file changed +40
-0
lines changed
1 file changed +40
-0
lines changed Original file line number Diff line number Diff line change
1
+ # To scrape Project Euler website for problem links in its archive page
2
+
3
+ from urllib import request
4
+
5
+ # to handle HTTP requests
6
+
7
+ from bs4 import BeautifulSoup
8
+
9
+ # to parse the html data
10
+
11
+ link = "https://projecteuler.net/archives"
12
+
13
+ # link we need to visit
14
+
15
+ site = request .urlopen (link )
16
+
17
+ # site contains HTTP data received from the link
18
+
19
+ content = site .read ()
20
+
21
+ # content contains the html code
22
+
23
+ soup = BeautifulSoup (content ,"html.parser" )
24
+
25
+ # soup contains html code in a parsed object; also see: print(soup.prettify()) to print indentated html code
26
+
27
+ table = soup .find ("table" ,{"id" :"problems_table" })
28
+
29
+ # we dont need to use find_all() because after examining the html code, there is only one table with this id
30
+
31
+ listLinks = table .find_all ("a" )
32
+
33
+ # "bs4.element.ResultSet" -> list of all <a></a> tags found under table
34
+
35
+ problemLinks = [] # to store links to problems
36
+
37
+ for links in listLinks :
38
+ p = "https://projecteuler.net/" + links .get ("href" )
39
+ problemLinks .append (p )
40
+ print (p )
You can’t perform that action at this time.
0 commit comments