Skip to content

Commit 2eaf689

Browse files
author
lucasgit13
committed
first commit
1 parent c4a14b2 commit 2eaf689

File tree

2 files changed

+288
-0
lines changed

2 files changed

+288
-0
lines changed

Get-Dir-Github-Repo/.gitignore

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
__pycache__/
2+
test/

Get-Dir-Github-Repo/get.py

Lines changed: 286 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,286 @@
1+
import argparse
2+
import concurrent.futures
3+
import fnmatch
4+
import json
5+
import sys
6+
import os
7+
import requests
8+
9+
__version__ = "1.1"
10+
11+
### Comandline arguments ###
12+
parser = argparse.ArgumentParser(
13+
description="Single Github repository directory downloader.",
14+
usage="%(prog)s [<optional arguments>] <url> [<destination>]",
15+
)
16+
parser.add_argument(
17+
"url",
18+
nargs=1,
19+
help="Github repository url, example: https://github.com/[<owner>]/[<repo>]",
20+
)
21+
parser.add_argument(
22+
"-V", "--version", action="version", version=f"%(prog)s {__version__}"
23+
)
24+
parser.add_argument(
25+
"-v",
26+
"--verbose",
27+
action="store_true",
28+
help="Print each file of the repository while clonnig",
29+
)
30+
# parser.add_argument('-s', '--case-sensitive', action="store_true", help='Perform a case-sensitive filter. (Default is case insensitive.)')
31+
parser.add_argument(
32+
"-I",
33+
"--include-only",
34+
dest="include",
35+
nargs=1,
36+
help="Include only the files that match the given glob pattern.",
37+
)
38+
parser.add_argument(
39+
"-E", "--exclude", nargs=1, help="Exclude files that match the given glob pattern."
40+
)
41+
parser.add_argument(
42+
"output",
43+
nargs="?",
44+
default=None,
45+
help="Name of the directory to clone into. (Default is branch name)",
46+
)
47+
48+
if len(sys.argv) == 1:
49+
parser.print_help()
50+
51+
args = parser.parse_args()
52+
53+
54+
### Functions ###
55+
def check_url(url):
56+
"""
57+
Check if the given url is valid and to ensure that get real repository information.
58+
"""
59+
if url[-1] == "/":
60+
url = url[:-1]
61+
try:
62+
r = requests.head(url, timeout=30)
63+
except requests.ConnectionError as e:
64+
print(
65+
"OOPS!! Connection Error. Make sure you are connected to Internet. Technical Details given below.\n"
66+
)
67+
sys.exit(str(e))
68+
except requests.Timeout as e:
69+
print("OOPS!! Timeout Error")
70+
sys.exit(str(e))
71+
except requests.RequestException as e:
72+
print("OOPS!! General Error")
73+
sys.exit(str(e))
74+
except KeyboardInterrupt:
75+
sys.exit("Someone closed the program")
76+
else:
77+
if r.status_code == 404:
78+
sys.exit("404: Verify internet connection or check if the url is correct")
79+
80+
if not "https://github.com/" in url:
81+
sys.exit("Not a Github repo")
82+
83+
user = url.split("/")[3]
84+
repo = url.split("/")[4]
85+
repo_api = f"https://api.github.com/repos/{user}/{repo}/contents"
86+
87+
try:
88+
r2 = requests.get(repo_api, timeout=30)
89+
j = r2.json()
90+
91+
if r2.status_code != 200:
92+
if r2.headers["content-type"] == "application/json; charset=utf-8":
93+
message = r.json()["message"]
94+
if type(message) == dict:
95+
sys.exit(f"server: {message}")
96+
97+
count = 0
98+
for token in range(0, len(j)):
99+
t = j[token]["type"]
100+
if t != "dir":
101+
count += 1
102+
if count == 0:
103+
sys.exit(f"No files found in {url}")
104+
105+
else:
106+
return 0
107+
except requests.exceptions.RequestException:
108+
sys.exit(
109+
"Make sure you are provided a valid link and make sure you are connected to Internet."
110+
)
111+
112+
113+
def Get(url):
114+
user = ""
115+
repo = ""
116+
path = ""
117+
118+
if url[-1] == "/":
119+
url = url[:-1]
120+
121+
try:
122+
sp = url.split("/")
123+
if len(sp) > 5:
124+
for token in range(0, 7):
125+
sp.pop(0)
126+
path = "/".join(sp)
127+
128+
user = url.split("/")[3]
129+
repo = url.split("/")[4]
130+
if path:
131+
api_url = f"https://api.github.com/repos/{user}/{repo}/contents/{path}"
132+
else:
133+
api_url = f"https://api.github.com/repos/{user}/{repo}/contents"
134+
if api_url:
135+
try:
136+
r = requests.get(api_url, timeout=30)
137+
r1 = r.status_code
138+
if r1 != 200:
139+
if r.headers["content-type"] == "application/json; charset=utf-8":
140+
if type(r.json()) == dict:
141+
message = r.json()["message"]
142+
if type(message) == dict:
143+
sys.exit(f"server: {message}")
144+
else:
145+
sys.exit(f"{r1}: invalid url: {url}.")
146+
except requests.exceptions.RequestException:
147+
sys.exit(f"error: invalid url: {url}.")
148+
except:
149+
sys.exit(f"error: invalid url: {url}.")
150+
else:
151+
return {"api_url": api_url, "repo": repo, "path": path}
152+
153+
154+
def search_pattern(obj, pattern_list):
155+
matches = 0
156+
for token in range(0, len(obj)):
157+
f = obj[token]["name"]
158+
for p in pattern_list:
159+
if fnmatch.fnmatch(f, p):
160+
matches += 1
161+
162+
return matches
163+
164+
165+
def include(obj, pattern_list):
166+
"""
167+
Receives a list of dictionaries and a glob pattern list and it returns back a list
168+
with the files that match with each pattern and variable with the amount of matches.
169+
"""
170+
include_list = []
171+
matches = 0
172+
173+
for index in range(0, len(obj)):
174+
f = obj[index]["name"]
175+
t = obj[index]["type"]
176+
if t != "dir":
177+
for p in pattern_list:
178+
if fnmatch.fnmatch(f, p):
179+
include_list.append(obj[index])
180+
matches += 1
181+
182+
return (include_list, matches)
183+
184+
185+
def exclude(obj, pattern_list, matches):
186+
count = 0
187+
while matches != 0:
188+
for _ in obj:
189+
l = len(obj)
190+
if count == l:
191+
count = 0
192+
193+
f = obj[count]["name"]
194+
for p in pattern_list:
195+
if fnmatch.fnmatch(f, p):
196+
# print(f'{f}, {count}')
197+
obj.pop(count)
198+
matches -= 1
199+
count += 1
200+
201+
return obj
202+
203+
204+
def fetch(obj):
205+
file = obj["name"]
206+
url = obj["download_url"]
207+
208+
content = requests.get(url).content
209+
filename = os.path.join(directory, file)
210+
f = open(filename, "bw")
211+
f.write(content)
212+
f.close()
213+
214+
if verbose:
215+
print(file)
216+
217+
218+
### End of functions ###
219+
220+
url = args.url[0]
221+
check_url(url)
222+
223+
verbose = args.verbose
224+
output = args.output
225+
api_url = Get(url)["api_url"]
226+
repo = Get(url)["repo"]
227+
path = Get(url)["path"]
228+
include_list = args.include
229+
exclude_list = args.exclude
230+
directory = ""
231+
232+
if output:
233+
directory = output
234+
else:
235+
directory = repo
236+
237+
if path:
238+
directory = os.path.join(directory, path)
239+
240+
if os.path.isdir(directory): # Check is directory exist.
241+
if any(os.scandir(directory)): # is it empty?
242+
sys.exit(f"'{directory}' already exist and is not empty.")
243+
else:
244+
try:
245+
os.makedirs(directory)
246+
except:
247+
sys.exit(f"Could not create '{directory}'.")
248+
249+
r = ""
250+
251+
try:
252+
r = requests.get(api_url, timeout=30).content.decode("utf-8")
253+
except requests.exceptions.RequestException:
254+
sys.exit("error: Connetion error. Aborted.")
255+
256+
try:
257+
obj = json.loads(r)
258+
obj_len = len(obj)
259+
except:
260+
sys.exit(f"error: Could not load files on {url}")
261+
262+
263+
if include_list:
264+
print("Searching for matches...")
265+
(new_obj, matches) = include(obj, include_list)
266+
267+
if matches != 0:
268+
obj = new_obj
269+
del new_obj
270+
print(f"{matches} matches found to include")
271+
else:
272+
sys.exit(f"no matches for {include_list}")
273+
274+
if exclude_list:
275+
m = search_pattern(obj, exclude_list)
276+
if m:
277+
obj_ = exclude(obj, exclude_list, m)
278+
obj = obj_
279+
del obj_
280+
else:
281+
print(f"{m} matches found to ignore")
282+
283+
print(f"\nClonning into {directory}...")
284+
285+
with concurrent.futures.ThreadPoolExecutor() as executor:
286+
executor.map(fetch, obj)

0 commit comments

Comments
 (0)