8
8
import progressbar
9
9
from dateutil .parser import parse
10
10
11
- sys .path .insert (0 , os .path .abspath (os .path .join (
12
- os .path .dirname (__file__ ), ".." )))
11
+ sys .path .insert (0 , os .path .abspath (os .path .join (os .path .dirname (__file__ ), '..' )))
13
12
14
13
import data .db_session as db_session
15
14
from data .package import Package
@@ -35,14 +34,14 @@ def main():
35
34
def do_summary ():
36
35
session = db_session .create_session ()
37
36
38
- print (" Final numbers:" )
39
- print (" Users: {:,}" .format (session .query (User ).count ()))
40
- print (" Packages: {:,}" .format (session .query (Package ).count ()))
41
- print (" Releases: {:,}" .format (session .query (Release ).count ()))
37
+ print (' Final numbers:' )
38
+ print (' Users: {:,}' .format (session .query (User ).count ()))
39
+ print (' Packages: {:,}' .format (session .query (Package ).count ()))
40
+ print (' Releases: {:,}' .format (session .query (Release ).count ()))
42
41
43
42
44
43
def do_user_import (user_lookup : Dict [str , str ]) -> Dict [str , User ]:
45
- print (" Importing users ... " , flush = True )
44
+ print (' Importing users ... ' , flush = True )
46
45
with progressbar .ProgressBar (max_value = len (user_lookup )) as bar :
47
46
for idx , (email , name ) in enumerate (user_lookup .items ()):
48
47
session = db_session .create_session ()
@@ -66,29 +65,29 @@ def do_user_import(user_lookup: Dict[str, str]) -> Dict[str, User]:
66
65
67
66
def do_import_packages (file_data : List [dict ], user_lookup : Dict [str , User ]):
68
67
errored_packages = []
69
- print (" Importing packages and releases ... " , flush = True )
68
+ print (' Importing packages and releases ... ' , flush = True )
70
69
with progressbar .ProgressBar (max_value = len (file_data )) as bar :
71
70
for idx , p in enumerate (file_data ):
72
71
try :
73
72
load_package (p , user_lookup )
74
73
bar .update (idx )
75
74
except Exception as x :
76
- errored_packages .append ((p , " *** Errored out for package {}, {}" .format (p .get ('package_name' ), x )))
75
+ errored_packages .append ((p , ' *** Errored out for package {}, {}' .format (p .get ('package_name' ), x )))
77
76
raise
78
77
sys .stderr .flush ()
79
78
sys .stdout .flush ()
80
79
print ()
81
- print (" Completed packages with {} errors." .format (len (errored_packages )))
82
- for ( p , txt ) in errored_packages :
80
+ print (' Completed packages with {} errors.' .format (len (errored_packages )))
81
+ for p , txt in errored_packages :
83
82
print (txt )
84
83
85
84
86
85
def do_load_files () -> List [dict ]:
87
86
data_path = os .path .abspath (os .path .join (os .path .dirname (__file__ ), '../../../data/pypi-top-100' ))
88
- print (" Loading files from {}" .format (data_path ))
87
+ print (' Loading files from {}' .format (data_path ))
89
88
files = get_file_names (data_path )
90
- print (" Found {:,} files, loading ..." .format (len (files )), flush = True )
91
- time .sleep (.1 )
89
+ print (' Found {:,} files, loading ...' .format (len (files )), flush = True )
90
+ time .sleep (0 .1 )
92
91
93
92
file_data = []
94
93
with progressbar .ProgressBar (max_value = len (files )) as bar :
@@ -103,7 +102,7 @@ def do_load_files() -> List[dict]:
103
102
104
103
105
104
def find_users (data : List [dict ]) -> dict :
106
- print (" Discovering users..." , flush = True )
105
+ print (' Discovering users...' , flush = True )
107
106
found_users = {}
108
107
109
108
with progressbar .ProgressBar (max_value = len (data )) as bar :
@@ -116,7 +115,7 @@ def find_users(data: List[dict]) -> dict:
116
115
sys .stderr .flush ()
117
116
sys .stdout .flush ()
118
117
print ()
119
- print (" Discovered {:,} users" .format (len (found_users )))
118
+ print (' Discovered {:,} users' .format (len (found_users )))
120
119
print ()
121
120
122
121
return found_users
@@ -147,7 +146,7 @@ def load_file_data(filename: str) -> dict:
147
146
with open (filename , 'r' , encoding = 'utf-8' ) as fin :
148
147
data = json .load (fin )
149
148
except Exception as x :
150
- print (" ERROR in file: {}, details: {}" .format (filename , x ), flush = True )
149
+ print (' ERROR in file: {}, details: {}' .format (filename , x ), flush = True )
151
150
raise
152
151
153
152
return data
@@ -165,7 +164,7 @@ def load_package(data: dict, user_lookup: Dict[str, User]):
165
164
p .author = info .get ('author' )
166
165
p .author_email = info .get ('author_email' )
167
166
168
- releases = build_releases (p .id , data .get (" releases" , {}))
167
+ releases = build_releases (p .id , data .get (' releases' , {}))
169
168
170
169
if releases :
171
170
p .created_date = releases [0 ].created_date
@@ -206,18 +205,13 @@ def detect_license(license_text: str) -> Optional[str]:
206
205
license_text = license_text .strip ()
207
206
208
207
if len (license_text ) > 100 or '\n ' in license_text :
209
- return " CUSTOM"
208
+ return ' CUSTOM'
210
209
211
- license_text = license_text \
212
- .replace ('Software License' , '' ) \
213
- .replace ('License' , '' )
210
+ license_text = license_text .replace ('Software License' , '' ).replace ('License' , '' )
214
211
215
212
if '::' in license_text :
216
213
# E.g. 'License :: OSI Approved :: Apache Software License'
217
- return license_text \
218
- .split (':' )[- 1 ] \
219
- .replace (' ' , ' ' ) \
220
- .strip ()
214
+ return license_text .split (':' )[- 1 ].replace (' ' , ' ' ).strip ()
221
215
222
216
return license_text .strip ()
223
217
@@ -280,9 +274,7 @@ def get_file_names(data_path: str) -> List[str]:
280
274
files = []
281
275
for f in os .listdir (data_path ):
282
276
if f .endswith ('.json' ):
283
- files .append (
284
- os .path .abspath (os .path .join (data_path , f ))
285
- )
277
+ files .append (os .path .abspath (os .path .join (data_path , f )))
286
278
287
279
files .sort ()
288
280
return files
0 commit comments