1
- import requests
2
- import shutil
1
+ from bs4 import BeautifulSoup
2
+ import aiohttp
3
+ import asyncio
3
4
import os
4
5
5
- def removeWhitespace (string ):
6
- string = string .replace ('\n ' , '' );
7
- string = string .replace ('\t ' , '' );
8
- string = string .replace (' ' , '' );
9
- return string
10
-
11
- def safeMkdir (directory ):
6
+ def safe_mkdir (directory ):
12
7
if not os .path .isdir (directory ):
13
8
os .mkdir (directory )
14
9
15
- # Creates the skins directory if it doesn't exist
16
- skinsDir = "skins"
17
- safeMkdir (skinsDir )
18
-
19
- print ("Getting the navbar!" )
20
- # The URL to the website
21
- url = "https://www.minecraftskins.net"
22
- # Get the HTML from the website
23
- result = requests .get (url ).text
24
- # Remove whitespace and unnecessary characters
25
- result = removeWhitespace (result )
26
-
27
- # Get the navbar
28
- navbar = result [result .find ("<navclass=\" main\" >" ):result .find ("</nav>" )]
29
- print ("Got the navbar!" )
30
-
31
- print ("Getting the sections!" )
32
- # Get the navbar's sections
33
- sections = navbar .split ("<li>" )
34
- # Remove the unnecessary sections at the start and end
35
- sections .pop (0 )
36
- sections .pop ()
37
- # Remove the HTML surrounding the hrefs
38
- sections = [section [8 :] for section in sections ]
39
- sections = [section [:section .find ('"' )] for section in sections ]
40
- print ("Got the sections!" )
41
-
42
- # Loop over each section
43
- for section in sections :
44
- print (f"Getting section { section [9 :] } " )
45
- # Create the section directories if they don't exist
46
- sectionDir = skinsDir + section [9 :]
47
- safeMkdir (sectionDir )
48
-
49
- # Get the URL to the section
50
- sectionURL = url + section
51
- # Get the HTML from the section
52
- sectionResult = requests .get (sectionURL ).text
53
- # Remove whitespace and unnecessary characters
54
- sectionResult = removeWhitespace (sectionResult )
55
-
56
- # Get the counter
57
- sectionCounter = sectionResult [sectionResult .find ("<spanclass=\" count\" >" ):]
58
-
59
- # Get the number of pages
60
- numPages = sectionCounter [sectionCounter .find ("of" ) + 2 :]
61
- numPages = int (numPages [:numPages .find ('<' )])
10
+ async def fetch (session , url ):
11
+ print (f"Fetching URL: { url } " )
12
+ async with session .get (url ) as response :
13
+ return await response .text ()
14
+
15
+ async def download_image (session , url , path_to_file ):
16
+ try :
17
+ print (f"Downloading image from URL: { url } " )
18
+ async with session .get (url ) as response :
19
+ if response .status == 200 :
20
+ with open (path_to_file , 'wb' ) as f :
21
+ while True :
22
+ chunk = await response .content .read (1024 )
23
+ if not chunk :
24
+ break
25
+ f .write (chunk )
26
+ print (f"Image saved to: { path_to_file } " )
27
+ else :
28
+ print (f"Failed to download { url } . Status code: { response .status } " )
29
+ except Exception as e :
30
+ print (f"An error occurred while downloading { url } : { e } " )
31
+
32
+ async def parse_navbar (session , url , skins_dir ):
33
+ print (f"Parsing navbar: { url } " )
34
+ html = await fetch (session , url )
35
+ soup = BeautifulSoup (html , 'lxml' )
36
+
37
+ navbar = soup .find ('nav' , class_ = 'main' )
38
+ sections = navbar .find_all ('li' )[1 :- 1 ] # Skip the first and last item
39
+
40
+ tasks = [parse_section (session , url , li .a ['href' ], skins_dir ) for li in sections ]
41
+ await asyncio .gather (* tasks )
42
+
43
+ async def parse_section (session , base_url , section , skins_dir ):
44
+ section_dir = os .path .join (skins_dir , os .path .basename (section ))
45
+ safe_mkdir (section_dir )
46
+
47
+ section_url = base_url + section
48
+ html = await fetch (session , section_url )
49
+ soup = BeautifulSoup (html , 'lxml' )
62
50
63
- # Loop over each page
64
- for i in range (1 , numPages + 1 ):
65
- print (f"\t Getting page { i } " )
66
- # Get the URL to the page
67
- pageURL = f"{ sectionURL } /{ i } "
68
- # Get the HTML from the section
69
- pageResult = requests .get (pageURL ).text
70
- # Remove whitespace and unnecessary characters
71
- pageResult = removeWhitespace (pageResult )
72
-
73
- # Get the skin section
74
- pageSection = pageResult [pageResult .find ("<divclass=\" rowgrid\" >" ):]
75
-
76
- # Get the skins
77
- skins = pageSection .split ("<aclass=\" panel-link\" href=\" " )
78
- skins = [skin [:skin .find ('"' )] for skin in skins ]
79
- # Remove the unnecessary sections at the start
80
- skins .pop (0 )
81
-
82
- # Loop over each skin
83
- for skin in skins :
84
- print (f"\t \t Getting skin { skin } " )
85
- # Create the section directories if they don't exist
86
- skinDir = sectionDir + skin
87
- safeMkdir (skinDir )
88
-
89
- # Get the URL to the skin
90
- skinURL = url + skin
91
- skinResult = requests .get (skinURL ).text
92
-
93
- # Get the name of the skin
94
- skinName = skinResult [skinResult .find ("<h2 class=\" hero-title\" >" ) + 23 :]
95
- skinName = skinName [:skinName .find ('<' )]
96
-
97
- # Get the description for the skin
98
- skinDescription = skinResult [skinResult .find ("<p class=\" card-description\" >" ) + 28 :]
99
- skinDescription = skinDescription [:skinDescription .find ('<' )]
100
-
101
- # Create a text file containing the skin's name and description
102
- with open (skinDir + "/meta.txt" , 'w' ) as f :
103
- f .write (f"Name: { skinName } \n Description: { skinDescription } " )
104
-
105
- # Get the URL to the skin img
106
- skinImgURL = skinURL + "/download"
107
- skinImgResult = requests .get (skinImgURL , stream = True ).raw
108
- skinImgResult .decode_content = True
109
- # Save the skin img
110
- with open (skinDir + "/skin.png" , "wb" ) as f :
111
- shutil .copyfileobj (skinImgResult , f )
112
- print (f"\t \t Got skin { skin } " )
113
- print (f"\t Got page { i } " )
114
- print (f"Got section { section [9 :] } " )
51
+ # Find all the skin blocks in the section.
52
+ skin_blocks = soup .find_all ('div' , class_ = 'card' )
53
+
54
+ # Loop over each skin block
55
+ for block in skin_blocks :
56
+ # Extract the relative URL of the skin
57
+ skin = block .find ('a' )['href' ]
58
+
59
+ # Create the section directories if they don't exist
60
+ skinDir = section_dir + skin
61
+ safe_mkdir (skinDir )
62
+
63
+ # Get the URL to the skin
64
+ skinURL = base_url + skin
65
+ skinResult = await fetch (session , skinURL )
66
+
67
+ # Get the name of the skin
68
+ skinName = skinResult [skinResult .find ("<h2 class=\" card-title\" >" ) + 23 :]
69
+ skinName = skinName [:skinName .find ('<' )]
70
+
71
+ # Get the description for the skin
72
+ skinDescription = skinResult [skinResult .find ("<p class=\" card-description\" >" ) + 28 :]
73
+ skinDescription = skinDescription [:skinDescription .find ('<' )]
74
+
75
+ # Create a text file containing the skin's name and description
76
+ with open (skinDir + "/meta.txt" , 'w' ) as f :
77
+ f .write (f"Name: { skinName } \n Description: { skinDescription } " )
78
+
79
+ # Get the URL to the skin img
80
+ skinImgURL = skinURL + "/download"
81
+ path_to_file = skinDir + "/skin.png"
82
+ await download_image (session , skinImgURL , path_to_file )
83
+
84
+ # Create a text file containing the skin's name and description
85
+ with open (os .path .join (skinDir , "meta.txt" ), 'w' ) as f :
86
+ f .write (f"Name: { skinName } \n Description: { skinDescription } " )
87
+
88
+ # Get the URL to the skin img
89
+ skinImgURL = skinURL + "/download"
90
+ path_to_file = os .path .join (skinDir , "skin.png" )
91
+ await download_image (session , skinImgURL , path_to_file )
92
+
93
+ # Pagination: Continue to the next page if a 'next' button is present.
94
+ next_button = soup .find ('a' , string = 'Next' )
95
+ if next_button and next_button .has_attr ('href' ):
96
+ next_page_url = base_url + next_button ['href' ]
97
+ await parse_section (session , base_url , next_page_url , skins_dir )
98
+
99
+ async def main ():
100
+ print ("Starting the script." )
101
+ skins_dir = "skins"
102
+ safe_mkdir (skins_dir )
103
+
104
+ url = "https://www.minecraftskins.net"
105
+
106
+ async with aiohttp .ClientSession () as session :
107
+ await parse_navbar (session , url , skins_dir )
108
+
109
+ # Run the main coroutine
110
+ try :
111
+ asyncio .run (main ())
112
+ except Exception as e :
113
+ print (f"An error occurred: { e } " )
0 commit comments