Skip to content

Commit 6ac55a1

Browse files
the actual code stuff
0 parents  commit 6ac55a1

File tree

1 file changed

+104
-0
lines changed

1 file changed

+104
-0
lines changed

scraper.py

Lines changed: 104 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,104 @@
1+
import requests
2+
import shutil
3+
import os
4+
5+
def removeWhitespace(string):
6+
string = string.replace('\n', '');
7+
string = string.replace('\t', '');
8+
string = string.replace(' ', '');
9+
return string
10+
11+
def safeMkdir(directory):
12+
if not os.path.isdir(directory):
13+
os.mkdir(directory)
14+
15+
# Creates the skins directory if it doesn't exist
16+
skinsDir = "skins"
17+
safeMkdir(skinsDir)
18+
19+
# The URL to the website
20+
url = "https://www.minecraftskins.net"
21+
# Get the HTML from the website
22+
result = requests.get(url).text
23+
# Remove whitespace and unnecessary characters
24+
result = removeWhitespace(result)
25+
26+
# Get the navbar
27+
navbar = result[result.find("<navclass=\"main\">"):result.find("</nav>")]
28+
29+
# Get the navbar's sections
30+
sections = navbar.split("<li>")
31+
# Remove the unnecessary sections at the start and end
32+
sections.pop(0)
33+
sections.pop()
34+
# Remove the HTML surrounding the hrefs
35+
sections = [section[8:] for section in sections]
36+
sections = [section[:section.find('"')] for section in sections]
37+
38+
# Loop over each section
39+
for section in sections:
40+
# Create the section directories if they don't exist
41+
sectionDir = skinsDir + section[9:]
42+
safeMkdir(sectionDir)
43+
44+
# Get the URL to the section
45+
sectionURL = url + section
46+
# Get the HTML from the section
47+
sectionResult = requests.get(sectionURL).text
48+
# Remove whitespace and unnecessary characters
49+
sectionResult = removeWhitespace(sectionResult)
50+
51+
# Get the counter
52+
sectionCounter = sectionResult[sectionResult.find("<spanclass=\"count\">"):]
53+
54+
# Get the number of pages
55+
numPages = sectionCounter[sectionCounter.find("of") + 2:]
56+
numPages = int(numPages[:numPages.find('<')])
57+
58+
# Loop over each page
59+
for i in range(1, numPages + 1):
60+
# Get the URL to the page
61+
pageURL = f"{ sectionURL }/{ i }"
62+
# Get the HTML from the section
63+
pageResult = requests.get(pageURL).text
64+
# Remove whitespace and unnecessary characters
65+
pageResult = removeWhitespace(pageResult)
66+
67+
# Get the skin section
68+
pageSection = pageResult[pageResult.find("<divclass=\"rowgrid\">"):]
69+
70+
# Get the skins
71+
skins = pageSection.split("<aclass=\"panel-link\"href=\"")
72+
skins = [skin[:skin.find('"')] for skin in skins]
73+
# Remove the unnecessary sections at the start
74+
skins.pop(0)
75+
76+
# Loop over each skin
77+
for skin in skins:
78+
# Create the section directories if they don't exist
79+
skinDir = sectionDir + skin
80+
safeMkdir(skinDir)
81+
82+
# Get the URL to the skin
83+
skinURL = url + skin
84+
skinResult = requests.get(skinURL).text
85+
86+
# Get the name of the skin
87+
skinName = skinResult[skinResult.find("<h2 class=\"hero-title\">") + 23:]
88+
skinName = skinName[:skinName.find('<')]
89+
90+
# Get the description for the skin
91+
skinDescription = skinResult[skinResult.find("<p class=\"card-description\">") + 28:]
92+
skinDescription = skinDescription[:skinDescription.find('<')]
93+
94+
# Create a text file containing the skin's name and description
95+
with open(skinDir + "/meta.txt", 'w') as f:
96+
f.write(f"Name: { skinName }\nDescription: { skinDescription }")
97+
98+
# Get the URL to the skin img
99+
skinImgURL = skinURL + "/download"
100+
skinImgResult = requests.get(skinImgURL, stream=True).raw
101+
skinImgResult.decode_content = True
102+
# Save the skin img
103+
with open(skinDir + "/skin.png", "wb") as f:
104+
shutil.copyfileobj(skinImgResult, f)

0 commit comments

Comments
 (0)