1
+ import tensorflow as tf
2
+ import numpy as np
3
+ from collections import namedtuple
4
+ import datetime
5
+ import ocr_utils
6
+ from n0_network import base_network as b_network
7
+
8
+ class network (b_network ):
9
+ ''' definition of the network
10
+ '''
11
+ def __init__ (self , truthed_features , dtype = np .float32 ):
12
+
13
+ self ._sess = tf .InteractiveSession ()
14
+
15
+ lst = []
16
+ extra_features_width = 0 # width of extra features
17
+
18
+ """# ==============================================================================
19
+
20
+ Placeholders
21
+
22
+ Compute the size of various layers
23
+
24
+ Create a tensorflow Placeholder for each feature of data returned from the
25
+ dataset
26
+
27
+ """ # ==============================================================================
28
+
29
+ for i ,nm in enumerate (truthed_features .feature_names ):
30
+
31
+ # features[0], is always the target. For instance it may be m_label_one_hot
32
+ # the second features[1] is the 'image' that is passed to the convolution layers
33
+ # Any additional features bypass the convolution layers and go directly
34
+ # into the fully connected layer.
35
+
36
+ # The width of the extra features is calculated in order to allocate
37
+ # the correct widths of weights, # and inputs
38
+ # names are assigned to make the look pretty on the tensorboard graph.
39
+
40
+ if i == 0 :
41
+ nm = 'y_' + nm
42
+ else :
43
+ nm = 'x_' + nm
44
+ if i > 1 :
45
+ extra_features_width += truthed_features .feature_width [i ]
46
+ lst .append (tf .placeholder (dtype , shape = [None , truthed_features .feature_width [i ]], name = nm ))
47
+
48
+ # ph is a named tuple with key names like 'image', 'm_label', and values that
49
+ # are tensors. The display name on the Chrome graph are 'y_m_label', 'x_image,
50
+ # x_upper_case etc.
51
+
52
+
53
+ Place_Holders = namedtuple ('Place_Holders' , truthed_features .feature_names )
54
+ self ._ph = Place_Holders (* lst ) # unpack placeholders into named Tuple
55
+ self ._keep_prob = tf .placeholder (dtype ,name = 'keep_prob' )
56
+ self ._nRows = truthed_features .num_rows #image height
57
+ self ._nCols = truthed_features .num_columns #image width
58
+ nFc = 1024 # size of fully connected layer
59
+ nConv1 = 32 # size of first convolution layer
60
+ nConv2 = 64 # size of second convolution layer
61
+ nTarget = truthed_features .feature_width [0 ] # the number of one_hot features in the target, 'm_label'
62
+ n_h_pool2_outputs = int (self ._nRows / 4 ) * int (self ._nCols / 4 ) * nConv2 # second pooling layer
63
+ n_h_pool2_outputsx = n_h_pool2_outputs + extra_features_width # fully connected
64
+
65
+ """# ==============================================================================
66
+
67
+ Build a Multilayer Convolutional Network
68
+
69
+ Weight Initialization
70
+
71
+ """ # ==============================================================================
72
+
73
+ def weight_variable (shape , dtype ):
74
+ initial = tf .truncated_normal (shape , stddev = 0.1 ,dtype = dtype )
75
+ return tf .Variable (initial )
76
+
77
+ def bias_variable (shape , dtype ):
78
+ initial = tf .constant (0.1 , shape = shape , dtype = dtype )
79
+ return tf .Variable (initial )
80
+
81
+ """# ==============================================================================
82
+
83
+ Convolution and Pooling
84
+
85
+ keep our code cleaner, let's also abstract those operations into functions.
86
+
87
+ """ # ==============================================================================
88
+
89
+ def conv2d (x , W ):
90
+ return tf .nn .conv2d (x , W , strides = [1 , 1 , 1 , 1 ], padding = 'SAME' )
91
+
92
+ def max_pool_2x2 (x ):
93
+ return tf .nn .max_pool (x , ksize = [1 , 2 , 2 , 1 ],
94
+ strides = [1 , 2 , 2 , 1 ], padding = 'SAME' )
95
+
96
+ """# ==============================================================================
97
+
98
+ First Convolutional Layer
99
+
100
+ """ # ==============================================================================
101
+ with tf .name_scope ("w_conv1" ) as scope :
102
+ W_conv1 = weight_variable ([5 , 5 , 1 , nConv1 ],dtype )
103
+ b_conv1 = bias_variable ([nConv1 ],dtype )
104
+
105
+ with tf .name_scope ("reshape_x_image" ) as scope :
106
+ self ._x_image = tf .reshape (self ._ph .image , [- 1 ,self ._nCols ,self ._nRows ,1 ])
107
+
108
+ image_summ = tf .image_summary ("x_image" , self ._x_image )
109
+
110
+ """# ==============================================================================
111
+
112
+ We then convolve x_image with the weight tensor, add the bias, apply the ReLU function,
113
+ and finally max pool.
114
+
115
+ """ # ==============================================================================
116
+
117
+ with tf .name_scope ("convolve_1" ) as scope :
118
+ h_conv1 = tf .nn .relu (conv2d (self ._x_image , W_conv1 ) + b_conv1 )
119
+
120
+ with tf .name_scope ("pool_1" ) as scope :
121
+ h_pool1 = max_pool_2x2 (h_conv1 )
122
+
123
+ """# ==============================================================================
124
+
125
+ Second Convolutional Layer
126
+
127
+ In order to build a deep network, we stack several layers of this type. The second
128
+ layer will have 64 features for each 5x5 patch.
129
+
130
+ """ # ==============================================================================
131
+
132
+ with tf .name_scope ("convolve_2" ) as scope :
133
+ W_conv2 = weight_variable ([5 , 5 , nConv1 , nConv2 ],dtype )
134
+ b_conv2 = bias_variable ([64 ],dtype )
135
+ h_conv2 = tf .nn .relu (conv2d (h_pool1 , W_conv2 ) + b_conv2 )
136
+
137
+ with tf .name_scope ("pool_2" ) as scope :
138
+ h_pool2 = max_pool_2x2 (h_conv2 )
139
+
140
+ """# ==============================================================================
141
+
142
+ Densely Connected Layer
143
+
144
+ Now that the image size has been reduced to 7x7, we add a fully-connected layer
145
+ with neurons to allow processing on the entire image. We reshape the tensor
146
+ from the pooling layer into a batch of vectors, multiply by a weight matrix, add
147
+ a bias, and apply a ReLU.
148
+
149
+ """ # ==============================================================================
150
+
151
+ with tf .name_scope ("W_fc1_b" ) as scope :
152
+ W_fc1 = weight_variable ([n_h_pool2_outputsx , nFc ],dtype )
153
+ b_fc1 = bias_variable ([nFc ],dtype )
154
+
155
+ h_pool2_flat = tf .reshape (h_pool2 , [- 1 , n_h_pool2_outputs ])
156
+
157
+ # append the features, the 2nd on, that go directly to the fully connected layer
158
+ for i in range (2 ,truthed_features .num_features ):
159
+ h_pool2_flat = tf .concat (1 , [h_pool2_flat , self ._ph [i ]])
160
+ h_fc1 = tf .nn .relu (tf .matmul (h_pool2_flat , W_fc1 ) + b_fc1 )
161
+
162
+ """# ==============================================================================
163
+
164
+ Dropout
165
+
166
+ """ # ==============================================================================
167
+
168
+
169
+ with tf .name_scope ("drop" ) as scope :
170
+ h_fc1_drop = tf .nn .dropout (h_fc1 , self ._keep_prob )
171
+
172
+ """# ==============================================================================
173
+
174
+ Readout Layer
175
+
176
+ """ # ==============================================================================
177
+ with tf .name_scope ("softmax" ) as scope :
178
+ W_fc2 = weight_variable ([nFc , nTarget ],dtype )
179
+ b_fc2 = bias_variable ([nTarget ],dtype )
180
+ y_conv = tf .nn .softmax (tf .matmul (h_fc1_drop , W_fc2 ) + b_fc2 )
181
+
182
+
183
+ with tf .name_scope ("xent" ) as scope :
184
+
185
+ # 1e-8 added to eliminate the crash of training when taking log of 0
186
+ cross_entropy = - tf .reduce_sum (self ._ph [0 ]* tf .log (y_conv + 1e-8 ))
187
+ ce_summ = tf .scalar_summary ("cross entropy" , cross_entropy )
188
+
189
+ with tf .name_scope ("train" ) as scope :
190
+ self ._train_step = tf .train .AdamOptimizer (1e-4 ).minimize (cross_entropy )
191
+
192
+ with tf .name_scope ("test" ) as scope :
193
+ self ._correct_prediction = tf .equal (tf .argmax (y_conv ,1 ), tf .argmax (self ._ph [0 ],1 ))
194
+ self ._prediction = tf .argmax (y_conv ,1 )
195
+
196
+ self ._accuracy = tf .reduce_mean (tf .cast (self ._correct_prediction , dtype ))
197
+ accuracy_summary = tf .scalar_summary ("accuracy" , self ._accuracy )
198
+ """# ==============================================================================
199
+
200
+ Start TensorFlow Interactive Session
201
+
202
+ """ # ==============================================================================
203
+
204
+ self ._sess .run (tf .initialize_all_variables ())
205
+ self ._merged = tf .merge_all_summaries ()
206
+ tm = ""
207
+ tp = datetime .datetime .now ().timetuple ()
208
+ for i in range (4 ):
209
+ tm += str (tp [i ])+ '-'
210
+ tm += str (tp [4 ])
211
+ self ._writer = tf .train .SummaryWriter ("/tmp/ds_logs/" + tm , self ._sess .graph )
212
+
213
+ def computeSize (s ,tens ):
214
+ sumC = 1
215
+ tShape = tens .get_shape ()
216
+ nDims = len (tShape )
217
+ for i in range (nDims ):
218
+ sumC *= tShape [i ].value
219
+ print ('\t {}\t {}' .format (s ,sumC ),flush = True )
220
+ return sumC
221
+
222
+ print ('network size:' ,flush = True )
223
+ total = computeSize ("W_fc1" ,W_fc1 )+ \
224
+ computeSize ("b_fc1" ,b_fc1 ) + \
225
+ computeSize ("W_conv1" ,W_conv1 ) + \
226
+ computeSize ("b_conv1" ,b_conv1 ) + \
227
+ computeSize ("W_conv2" ,W_conv2 ) + \
228
+ computeSize ("b_conv2" ,b_conv2 ) + \
229
+ computeSize ("W_fc2" ,W_fc2 ) + \
230
+ computeSize ("b_fc2" ,b_fc2 )
231
+ print ('\t total\t {}' .format (total ),flush = True )
232
+
233
+
234
+ def reset_graph (self ):
235
+ tf .reset_default_graph () # only necessary when iterating through fonts
236
+ self ._sess .close ()
237
+
238
+
239
+
240
+
0 commit comments