Skip to content

Commit a9ec5b8

Browse files
committed
Reduce memory usage
1 parent 0c68262 commit a9ec5b8

36 files changed

+855
-88
lines changed

ocr_utils.py

Lines changed: 39 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,17 @@
2222
2323
@author: richard
2424
'''
25+
from django.template.defaultfilters import title
26+
27+
######################################################
28+
show_plot = False #set True to show plot on screen, set False to save to file
29+
#####################################################
30+
31+
##############################################################################
32+
default_zip_file = "fonts.zip" #small data set
33+
#default_zip_file = 'fonts_all.zip' #for the big data set
34+
##############################################################################
35+
2536
import numpy as np
2637
import pandas as pd
2738
import math
@@ -32,11 +43,6 @@
3243
import os
3344

3445

35-
######################################################
36-
show_plot = False #set True to show plot on screen, set False to save to file
37-
#####################################################
38-
39-
4046
def report(blocknr, blocksize, size):
4147
current = blocknr*blocksize
4248
print("{0:.2f}%".format(100.0*current/size),end='\r')
@@ -77,8 +83,8 @@ def read_file(pathName, input_filters_dict, random_state=None):
7783
'''
7884

7985
if os.path.exists(pathName)==False:
80-
print('fonts.zip does not exist! Downloading it from the web', flush=True)
81-
downloadFile('http://lyman.house/download/fonts.zip')
86+
print('{} does not exist! Downloading it from the web'.format(default_zip_file), flush=True)
87+
downloadFile('http://lyman.house/download/{}'.format(default_zip_file))
8288
#downloadFile('http://lyman.house/download/fonts_chinese.zip')
8389

8490
try :
@@ -88,21 +94,35 @@ def read_file(pathName, input_filters_dict, random_state=None):
8894
except:
8995
rd_font = ()
9096

97+
# with ZipFile(pathName, 'r') as myzip:
98+
# if len(rd_font) == 0:
99+
# names = myzip.namelist()
100+
# print ('\nreading all files...please wait')
101+
# df = pd.concat(apply_column_filters(pd.read_csv(myzip.open(fname,'r')), input_filters_dict) for fname in names)
102+
# else:
103+
# try:
104+
# df = pd.concat(apply_column_filters(pd.read_csv(myzip.open(font+".csv",'r')), input_filters_dict) for font in rd_font)
105+
# except:
106+
# raise ValueError('Could not find font file {} in the zip file'.format(rd_font))
107+
# myzip.close()
108+
# assert df.size >0
109+
91110
with ZipFile(pathName, 'r') as myzip:
92111
if len(rd_font) == 0:
93112
names = myzip.namelist()
94113
print ('\nreading all files...please wait')
95-
df = pd.concat(pd.read_csv(myzip.open(fname,'r')) for fname in names)
114+
df = pd.concat(apply_column_filters(pd.read_csv(myzip.open(fname,'r')), input_filters_dict) for fname in names)
96115
else:
97116
try:
98-
df = pd.concat(pd.read_csv(myzip.open(font+".csv",'r'))for font in rd_font)
117+
df = pd.concat(apply_column_filters(pd.read_csv(myzip.open(font+".csv",'r')), input_filters_dict) for font in rd_font)
99118
except:
100119
raise ValueError('Could not find font file {} in the zip file'.format(rd_font))
101120
myzip.close()
102121
assert df.size >0
122+
103123
return df.sample(frac=1, random_state=random_state)
104124

105-
def get_list(pathName="fonts.zip",input_filters_dict={}):
125+
def get_list(pathName=default_zip_file,input_filters_dict={}):
106126
'''
107127
Read the entire database of fonts to find out what unique entries are
108128
available.
@@ -137,8 +157,7 @@ def get_list(pathName="fonts.zip",input_filters_dict={}):
137157
pass
138158

139159
df = read_file(pathName,input_filters_dict)
140-
df = df.loc[:,:'r0c0']
141-
df = apply_column_filters(df, input_filters_dict )
160+
df = df.loc[:,:'r0c0']
142161
keys=list(input_filters_dict.keys())
143162
df = df[keys]
144163
df= df.drop_duplicates()
@@ -273,15 +292,9 @@ def apply_column_filters(df, input_filters_dict ):
273292
if len(value) > 0:
274293
criterion = df[key].map(lambda x: x in value)
275294
df = df[criterion]
276-
print (key,value)
277-
try:
278-
delivered = sorted(df.loc[:,key].unique())
279-
except:
280-
raise ValueError('Could not find column {} in dataframe columns {}'.format(key, df.columns))
281-
print("\t{}(s) : {}".format(key, delivered))
282295
return df
283296

284-
def read_data(fileName="fonts.zip",
297+
def read_data(fileName=default_zip_file,
285298
input_filters_dict={},
286299
output_feature_list=[],
287300
test_size=0.0,
@@ -408,7 +421,7 @@ class DataSets(object):
408421
5) construct training and test set TruthedCharacters classes and return them
409422
'''
410423
engine_type = engine_type.lower()
411-
print (program_name())
424+
412425
print('\nparameter: input_filters_dict\n\t{}'.format(input_filters_dict))
413426
print('parameter: output_feature_list\n\t{}'.format(output_feature_list))
414427

@@ -422,7 +435,7 @@ class DataSets(object):
422435

423436
print('input filters available: \n\t{}:'.format(available_columns))
424437

425-
df = apply_column_filters(df, input_filters_dict )
438+
426439

427440
h=int((df.iloc[0])['h']) # get height and width of the image
428441
w=int((df.iloc[0])['w']) # assumes that h and w are the same for all rows
@@ -721,7 +734,11 @@ def show_figures(plt, title="untitled"):
721734
os.mkdir(plot_dir)
722735
except:
723736
pass
724-
save_file_name= '{}/{}_{}_{}.png'.format(plot_dir, program_name(), num_fig, title)
737+
738+
#\/:*?"<>|
739+
title_file = title.replace('/','_')
740+
save_file_name= '{}/{}_{}_{}.png'.format(plot_dir, program_name(), num_fig, title_file )
741+
print ('plotting {}'.format(save_file_name))
725742
plt.savefig(save_file_name, dpi=300)
726743
plt.clf() # savefig does not clear the figure like show does
727744
plt.cla()

p110_scaling_features.py

Lines changed: 23 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,29 @@
1313
1414
Created on Jun 23, 2016
1515
16-
from Python Machine Learning by Sebastian Raschka
16+
from Python Machine Learning by Sebastian Raschka under the following license
17+
18+
The MIT License (MIT)
19+
20+
Copyright (c) 2015, 2016 SEBASTIAN RASCHKA (mail@sebastianraschka.com)
21+
22+
Permission is hereby granted, free of charge, to any person obtaining a copy
23+
of this software and associated documentation files (the "Software"), to deal
24+
in the Software without restriction, including without limitation the rights
25+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
26+
copies of the Software, and to permit persons to whom the Software is
27+
furnished to do so, subject to the following conditions:
28+
29+
The above copyright notice and this permission notice shall be included in all
30+
copies or substantial portions of the Software.
31+
32+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
33+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
34+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
35+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
36+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
37+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
38+
SOFTWARE.
1739
1840
@author: richard lyman
1941
'''

p115_l1_l2_regularization.py

Lines changed: 23 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,29 @@
2222
2323
Created on Jun 23, 2016
2424
25-
from Python Machine Learning by Sebastian Raschka
25+
from Python Machine Learning by Sebastian Raschka under the following license
26+
27+
The MIT License (MIT)
28+
29+
Copyright (c) 2015, 2016 SEBASTIAN RASCHKA (mail@sebastianraschka.com)
30+
31+
Permission is hereby granted, free of charge, to any person obtaining a copy
32+
of this software and associated documentation files (the "Software"), to deal
33+
in the Software without restriction, including without limitation the rights
34+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
35+
copies of the Software, and to permit persons to whom the Software is
36+
furnished to do so, subject to the following conditions:
37+
38+
The above copyright notice and this permission notice shall be included in all
39+
copies or substantial portions of the Software.
40+
41+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
42+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
43+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
44+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
45+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
46+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
47+
SOFTWARE.
2648
2749
@author: richard lyman
2850
'''

p119_squential_backward_selection.py

Lines changed: 23 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,29 @@
1515
1616
Created on Jun 23, 2016
1717
18-
from Python Machine Learning by Sebastian Raschka
18+
from Python Machine Learning by Sebastian Raschka under the following license
19+
20+
The MIT License (MIT)
21+
22+
Copyright (c) 2015, 2016 SEBASTIAN RASCHKA (mail@sebastianraschka.com)
23+
24+
Permission is hereby granted, free of charge, to any person obtaining a copy
25+
of this software and associated documentation files (the "Software"), to deal
26+
in the Software without restriction, including without limitation the rights
27+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
28+
copies of the Software, and to permit persons to whom the Software is
29+
furnished to do so, subject to the following conditions:
30+
31+
The above copyright notice and this permission notice shall be included in all
32+
copies or substantial portions of the Software.
33+
34+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
35+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
36+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
37+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
38+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
39+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
40+
SOFTWARE.
1941
2042
@author: richard lyman
2143
'''

p124_random_forest.py

Lines changed: 23 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,29 @@
1414
1515
Created on Jun 23, 2016
1616
17-
from Python Machine Learning by Sebastian Raschka
17+
from Python Machine Learning by Sebastian Raschka under the following license
18+
19+
The MIT License (MIT)
20+
21+
Copyright (c) 2015, 2016 SEBASTIAN RASCHKA (mail@sebastianraschka.com)
22+
23+
Permission is hereby granted, free of charge, to any person obtaining a copy
24+
of this software and associated documentation files (the "Software"), to deal
25+
in the Software without restriction, including without limitation the rights
26+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
27+
copies of the Software, and to permit persons to whom the Software is
28+
furnished to do so, subject to the following conditions:
29+
30+
The above copyright notice and this permission notice shall be included in all
31+
copies or substantial portions of the Software.
32+
33+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
34+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
35+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
36+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
37+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
38+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
39+
SOFTWARE.
1840
1941
@author: richard lyman
2042
'''

p124_random_forest_feature_importance.py

Lines changed: 23 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,29 @@
88
99
Created on Jun 23, 2016
1010
11-
from Python Machine Learning by Sebastian Raschka
11+
from Python Machine Learning by Sebastian Raschka under the following license
12+
13+
The MIT License (MIT)
14+
15+
Copyright (c) 2015, 2016 SEBASTIAN RASCHKA (mail@sebastianraschka.com)
16+
17+
Permission is hereby granted, free of charge, to any person obtaining a copy
18+
of this software and associated documentation files (the "Software"), to deal
19+
in the Software without restriction, including without limitation the rights
20+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
21+
copies of the Software, and to permit persons to whom the Software is
22+
furnished to do so, subject to the following conditions:
23+
24+
The above copyright notice and this permission notice shall be included in all
25+
copies or substantial portions of the Software.
26+
27+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
28+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
29+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
30+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
31+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
32+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
33+
SOFTWARE.
1234
1335
@author: richard lyman
1436
'''

p131_principal_component_analysis.py

Lines changed: 23 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,29 @@
1010
1111
Created on Jul 2, 2016
1212
13-
from Python Machine Learning by Sebastian Raschka
13+
from Python Machine Learning by Sebastian Raschka under the following license
14+
15+
The MIT License (MIT)
16+
17+
Copyright (c) 2015, 2016 SEBASTIAN RASCHKA (mail@sebastianraschka.com)
18+
19+
Permission is hereby granted, free of charge, to any person obtaining a copy
20+
of this software and associated documentation files (the "Software"), to deal
21+
in the Software without restriction, including without limitation the rights
22+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
23+
copies of the Software, and to permit persons to whom the Software is
24+
furnished to do so, subject to the following conditions:
25+
26+
The above copyright notice and this permission notice shall be included in all
27+
copies or substantial portions of the Software.
28+
29+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
30+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
31+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
32+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
33+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
34+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
35+
SOFTWARE.
1436
1537
@author: richard lyman
1638
'''

p141_linear_descriminant_analsys.py

Lines changed: 23 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -28,7 +28,29 @@
2828
2929
Created on Jul 2, 2016
3030
31-
from Python Machine Learning by Sebastian Raschka
31+
from Python Machine Learning by Sebastian Raschka under the following license
32+
33+
The MIT License (MIT)
34+
35+
Copyright (c) 2015, 2016 SEBASTIAN RASCHKA (mail@sebastianraschka.com)
36+
37+
Permission is hereby granted, free of charge, to any person obtaining a copy
38+
of this software and associated documentation files (the "Software"), to deal
39+
in the Software without restriction, including without limitation the rights
40+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
41+
copies of the Software, and to permit persons to whom the Software is
42+
furnished to do so, subject to the following conditions:
43+
44+
The above copyright notice and this permission notice shall be included in all
45+
copies or substantial portions of the Software.
46+
47+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
48+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
49+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
50+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
51+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
52+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
53+
SOFTWARE.
3254
3355
@author: richard lyman
3456
'''

p154_pca_nonlinear_mapings.py

Lines changed: 23 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,29 @@
99
1010
Created on Jul 2, 2016
1111
12-
from Python Machine Learning by Sebastian Raschka
12+
from Python Machine Learning by Sebastian Raschka under the following license
13+
14+
The MIT License (MIT)
15+
16+
Copyright (c) 2015, 2016 SEBASTIAN RASCHKA (mail@sebastianraschka.com)
17+
18+
Permission is hereby granted, free of charge, to any person obtaining a copy
19+
of this software and associated documentation files (the "Software"), to deal
20+
in the Software without restriction, including without limitation the rights
21+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
22+
copies of the Software, and to permit persons to whom the Software is
23+
furnished to do so, subject to the following conditions:
24+
25+
The above copyright notice and this permission notice shall be included in all
26+
copies or substantial portions of the Software.
27+
28+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
29+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
30+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
31+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
32+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
33+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
34+
SOFTWARE.
1335
1436
@author: richard lyman
1537
'''

0 commit comments

Comments
 (0)