Skip to content

Commit 6e291fe

Browse files
committed
change get_list output to dataframe because order of entries was
indeterminant
1 parent 3d59deb commit 6e291fe

File tree

3 files changed

+14
-14
lines changed

3 files changed

+14
-14
lines changed

ocr_utils.py

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -116,8 +116,7 @@ def get_list(pathName="fonts.zip",input_filters_dict={}):
116116
117117
Returns
118118
--------------
119-
a list of all the all the unique values available in the
120-
dataset for the given columns
119+
a dataframe of all the all the unique lines in the dataset
121120
122121
Example:
123122
--------------
@@ -141,8 +140,7 @@ def get_list(pathName="fonts.zip",input_filters_dict={}):
141140
keys=list(input_filters_dict.keys())
142141
df = df[keys]
143142
df= df.drop_duplicates()
144-
y = np.array(df).tolist()
145-
return y
143+
return df
146144

147145

148146
class TruthedCharacters(object):

q1_database_statistics.py

Lines changed: 6 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@
77
'''
88
import ocr_utils
99
import numpy as np
10+
import pandas as pd
1011

1112
# # read and show the character images for each font variant
1213
# # output only the character label and the image
@@ -18,12 +19,12 @@
1819
# title = '{}: {}'.format('AGENCY','AGENCY Is')
1920
# ocr_utils.montage(X2D, title=title)
2021

21-
lst = ocr_utils.get_list(input_filters_dict = {'font':()})
22+
df1 = ocr_utils.get_list(input_filters_dict = {'font':()})
2223

2324
print('\n\nAvailable fonts:')
2425
import pprint
2526
pp = pprint.PrettyPrinter()
26-
pp.pprint(lst)
27+
pp.pprint(df1)
2728
#
2829
# for font in lst:
2930
# input_filters_dict = {'font':font, 'm_label': range(100)}
@@ -40,9 +41,9 @@
4041
# read and show the character images for each font variant
4142
# output only the character label and the image
4243
fl = ['m_label','image']
43-
for font in lst:
44-
lst2 = ocr_utils.get_list(input_filters_dict={'font':font, 'fontVariant':()})
45-
for f,fontVariant in lst2:
44+
for font in df1:
45+
df2 = ocr_utils.get_list(input_filters_dict={'font':font, 'fontVariant':()})
46+
for font,fontVariant in zip(df2['font'],df2['fontVariant']):
4647
fd = {'font': font, 'fontVariant': fontVariant}
4748
ds = ocr_utils.read_data(input_filters_dict=fd, output_feature_list=fl, dtype=np.int32)
4849
y,X = ds.train.features

q2_tensorflow_mnist.py

Lines changed: 6 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,7 @@
3030
import datetime
3131
from collections import namedtuple
3232
import numpy as np
33+
import pandas as pd
3334

3435

3536
def train_a_font(input_filters_dict,output_feature_list, nEpochs=5000):
@@ -323,7 +324,7 @@ def computeSize(s,tens):
323324
sess.close()
324325

325326

326-
if True:
327+
if False:
327328
# single font train
328329

329330
# esamples
@@ -360,18 +361,18 @@ def computeSize(s,tens):
360361
# loop through all the fonts and train individually
361362

362363
# pick up the entire list of fonts and font variants. Train each one.
363-
lst = ocr_utils.get_list(input_filters_dict={'font': ()})
364+
df1 = ocr_utils.get_list(input_filters_dict={'font': ()})
364365

365366
import pprint as pprint
366367
pp = pprint.PrettyPrinter(indent=4)
367-
pp.pprint(lst)
368+
pp.pprint(df1)
368369

369370
output_feature_list = ['m_label_one_hot','image','italic','aspect_ratio','upper_case']
370371

371372
# Change nEpochs to 5000 for better results
372-
for l in lst:
373+
for l in df1:
373374
input_filters_dict= {'font': (l[0],)}
374-
train_a_font(input_filters_dict,output_feature_list, nEpochs = 1000)
375+
train_a_font(input_filters_dict,output_feature_list, nEpochs = 500)
375376

376377

377378
print ('\n########################### No Errors ####################################')

0 commit comments

Comments
 (0)