Skip to content

Commit 8b5ecae

Browse files
authored
The code written as the exercise for YT clip https://www.youtube.com/watch?v=F1kZ39SvuGE
1 parent 567c75a commit 8b5ecae

File tree

1 file changed

+45
-0
lines changed

1 file changed

+45
-0
lines changed

web_scrap_test.py

Lines changed: 45 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,45 @@
1+
import requests
2+
import re
3+
from bs4 import BeautifulSoup
4+
5+
# First method of scrapig, simple and dirty (with requests and regex)
6+
# # get the data
7+
# data = requests.get('http://www.mg-cc.org/club-information/club-contacts')
8+
#
9+
# #extract the phone numbers, emails and so on
10+
#
11+
# phones = re.findall(r'(\(?[0-9]{3}\)?(?:\-|\s|\.)?[0-9]{3}(?:\-|\.)[0-9]{4})', data.text)
12+
# emails = re.findall(r'([\d\w\.]+@[\d\w\.\-]+\.\w+)', data.text)
13+
#
14+
# print(phones, emails)
15+
16+
# Scraping with beautiful soup
17+
18+
data = requests.get('https://raw.githubusercontent.com/engineer-man/youtube/master/042/scrape.html')
19+
20+
# load data into bs4
21+
soup = BeautifulSoup(data.text, 'html.parser')
22+
23+
# get data simply by looking for each tr
24+
scrapped_data = []
25+
for tr in soup.find_all('tr'):
26+
values = [td.text for td in tr.find_all('td')]
27+
scrapped_data.append(values)
28+
29+
print(scrapped_data)
30+
31+
# get data simply by looking for each tr
32+
scrapped_data_special = []
33+
for tr in soup.find_all('tr', {'class': 'special'}):
34+
values_special = [td.text for td in tr.find_all('td')]
35+
scrapped_data_special.append(values_special)
36+
print(scrapped_data_special)
37+
38+
# get data within specific element
39+
specific_element = []
40+
div = soup.find('div', {'class': 'special_table'})
41+
for tr in div.find_all('tr'):
42+
values_special_class = [td.text for td in tr.find_all('td')]
43+
specific_element.append(values_special_class)
44+
print(specific_element)
45+

0 commit comments

Comments
 (0)