Skip to content

Commit fd01ad4

Browse files
authored
Add files via upload
Change name of Theano program
1 parent d87ff18 commit fd01ad4

File tree

1 file changed

+325
-0
lines changed

1 file changed

+325
-0
lines changed

q4_Theano_mlp.py

Lines changed: 325 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,325 @@
1+
"""
2+
Example employing Lasagne for digit recognition using the MNIST dataset.
3+
This example is deliberately structured as a long flat file, focusing on how
4+
to use Lasagne, instead of focusing on writing maximally modular and reusable
5+
code. It is used as the foundation for the introductory Lasagne tutorial:
6+
http://lasagne.readthedocs.org/en/latest/user/tutorial.html
7+
More in-depth examples and reproductions of paper results are maintained in
8+
a separate repository: https://github.com/Lasagne/Recipes
9+
10+
https://github.com/Lasagne/Lasagne/blob/master/examples/mnist.py
11+
12+
@author: richard lyman
13+
"""
14+
15+
from __future__ import print_function
16+
17+
import sys
18+
import time
19+
import numpy as np
20+
import theano
21+
import theano.tensor as T
22+
import lasagne
23+
import ocr_utils
24+
25+
# ##################### Build the neural network model #######################
26+
# This script supports three types of models. For each one, we define a
27+
# function that takes a Theano variable representing the input and returns
28+
# the output layer of a neural network model built in Lasagne.
29+
30+
def build_mlp(input_var=None, nRow=28, nCol=28):
31+
# This creates an MLP of two hidden layers of 800 units each, followed by
32+
# a softmax output layer of 10 units. It applies 20% dropout to the input
33+
# data and 50% dropout to the hidden layers.
34+
35+
# Input layer, specifying the expected input shape of the network
36+
# (unspecified batchsize, 1 channel, 28 rows and 28 columns) and
37+
# linking it to the given Theano variable `input_var`, if any:
38+
l_in = lasagne.layers.InputLayer(shape=(None, 1, nRow, nCol),
39+
input_var=input_var)
40+
41+
# Apply 20% dropout to the input data:
42+
l_in_drop = lasagne.layers.DropoutLayer(l_in, p=0.2)
43+
44+
# Add a fully-connected layer of 800 units, using the linear rectifier, and
45+
# initializing weights with Glorot's scheme (which is the default anyway):
46+
l_hid1 = lasagne.layers.DenseLayer(
47+
l_in_drop, num_units=800,
48+
nonlinearity=lasagne.nonlinearities.rectify,
49+
W=lasagne.init.GlorotUniform())
50+
51+
# We'll now add dropout of 50%:
52+
l_hid1_drop = lasagne.layers.DropoutLayer(l_hid1, p=0.5)
53+
54+
# Another 800-unit layer:
55+
l_hid2 = lasagne.layers.DenseLayer(
56+
l_hid1_drop, num_units=800,
57+
nonlinearity=lasagne.nonlinearities.rectify)
58+
59+
# 50% dropout again:
60+
l_hid2_drop = lasagne.layers.DropoutLayer(l_hid2, p=0.5)
61+
62+
# Finally, we'll add the fully-connected output layer, of 10 softmax units:
63+
l_out = lasagne.layers.DenseLayer(
64+
l_hid2_drop, num_units=10,
65+
nonlinearity=lasagne.nonlinearities.softmax)
66+
67+
# Each layer is linked to its incoming layer(s), so we only need to pass
68+
# the output layer to give access to a network in Lasagne:
69+
return l_out
70+
71+
72+
def build_custom_mlp(input_var=None, depth=2, width=800, drop_input=.2,
73+
drop_hidden=.5):
74+
# By default, this creates the same network as `build_mlp`, but it can be
75+
# customized with respect to the number and size of hidden layers. This
76+
# mostly showcases how creating a network in Python code can be a lot more
77+
# flexible than a configuration file. Note that to make the code easier,
78+
# all the layers are just called `network` -- there is no need to give them
79+
# different names if all we return is the last one we created anyway; we
80+
# just used different names above for clarity.
81+
82+
# Input layer and dropout (with shortcut `dropout` for `DropoutLayer`):
83+
network = lasagne.layers.InputLayer(shape=(None, 1, 28, 28),
84+
input_var=input_var)
85+
if drop_input:
86+
network = lasagne.layers.dropout(network, p=drop_input)
87+
# Hidden layers and dropout:
88+
nonlin = lasagne.nonlinearities.rectify
89+
for _ in range(depth):
90+
network = lasagne.layers.DenseLayer(
91+
network, width, nonlinearity=nonlin)
92+
if drop_hidden:
93+
network = lasagne.layers.dropout(network, p=drop_hidden)
94+
# Output layer:
95+
softmax = lasagne.nonlinearities.softmax
96+
network = lasagne.layers.DenseLayer(network, 10, nonlinearity=softmax)
97+
return network
98+
99+
100+
def build_cnn(input_var=None):
101+
# As a third model, we'll create a CNN of two convolution + pooling stages
102+
# and a fully-connected hidden layer in front of the output layer.
103+
104+
# Input layer, as usual:
105+
network = lasagne.layers.InputLayer(shape=(None, 1, 28, 28),
106+
input_var=input_var)
107+
# This time we do not apply input dropout, as it tends to work less well
108+
# for convolutional layers.
109+
110+
# Convolutional layer with 32 kernels of size 5x5. Strided and padded
111+
# convolutions are supported as well; see the docstring.
112+
network = lasagne.layers.Conv2DLayer(
113+
network, num_filters=32, filter_size=(5, 5),
114+
nonlinearity=lasagne.nonlinearities.rectify,
115+
W=lasagne.init.GlorotUniform())
116+
# Expert note: Lasagne provides alternative convolutional layers that
117+
# override Theano's choice of which implementation to use; for details
118+
# please see http://lasagne.readthedocs.org/en/latest/user/tutorial.html.
119+
120+
# Max-pooling layer of factor 2 in both dimensions:
121+
network = lasagne.layers.MaxPool2DLayer(network, pool_size=(2, 2))
122+
123+
# Another convolution with 32 5x5 kernels, and another 2x2 pooling:
124+
network = lasagne.layers.Conv2DLayer(
125+
network, num_filters=32, filter_size=(5, 5),
126+
nonlinearity=lasagne.nonlinearities.rectify)
127+
network = lasagne.layers.MaxPool2DLayer(network, pool_size=(2, 2))
128+
129+
# A fully-connected layer of 256 units with 50% dropout on its inputs:
130+
network = lasagne.layers.DenseLayer(
131+
lasagne.layers.dropout(network, p=.5),
132+
num_units=256,
133+
nonlinearity=lasagne.nonlinearities.rectify)
134+
135+
# And, finally, the 10-unit output layer with 50% dropout on its inputs:
136+
network = lasagne.layers.DenseLayer(
137+
lasagne.layers.dropout(network, p=.5),
138+
num_units=10,
139+
nonlinearity=lasagne.nonlinearities.softmax)
140+
141+
return network
142+
143+
144+
# ############################# Batch iterator ###############################
145+
# This is just a simple helper function iterating over training data in
146+
# mini-batches of a particular size, optionally in random order. It assumes
147+
# data is available as numpy arrays. For big datasets, you could load numpy
148+
# arrays as memory-mapped files (np.load(..., mmap_mode='r')), or write your
149+
# own custom data iteration function. For small datasets, you can also copy
150+
# them to GPU at once for slightly improved performance. This would involve
151+
# several changes in the main program, though, and is not demonstrated here.
152+
# Notice that this function returns only mini-batches of size `batchsize`.
153+
# If the size of the data is not a multiple of `batchsize`, it will not
154+
# return the last (remaining) mini-batch.
155+
156+
def iterate_minibatches(inputs, targets, batchsize, shuffle=False):
157+
assert len(inputs) == len(targets)
158+
ln = len(inputs) - len(inputs) % batchsize
159+
assert ln % batchsize == 0
160+
if shuffle:
161+
indices = np.arange(ln)
162+
np.random.shuffle(indices)
163+
164+
for start_idx in range(0, ln , batchsize):
165+
if shuffle:
166+
excerpt = indices[start_idx:start_idx + batchsize]
167+
else:
168+
excerpt = slice(start_idx, start_idx + batchsize)
169+
yield inputs[excerpt], targets[excerpt]
170+
171+
172+
# ############################## Main program ################################
173+
# Everything else will be handled in our main program now. We could pull out
174+
# more functions to better separate the code, but it wouldn't make it any
175+
# easier to read.
176+
177+
def main(model='mlp', num_epochs=50):
178+
179+
print("Loading data...")
180+
181+
input_filters_dict = {'font': ('HANDPRINT',), 'm_label': range(48,57)}
182+
output_feature_list = ['m_label','image']
183+
ds = ocr_utils.read_data(input_filters_dict = input_filters_dict,
184+
output_feature_list=output_feature_list,
185+
engine_type='theano',
186+
test_size = .1,
187+
evaluation_size = .1,
188+
dtype='float32')
189+
nRows = ds.train.num_rows
190+
nCols = ds.train.num_columns
191+
X_train = ds.train.features[1]
192+
193+
X_val = ds.evaluation.features[1]
194+
195+
X_test = ds.test.features[1]
196+
y_train = np.array(ds.train.features[0]-48,dtype=np.int32)
197+
y_test = np.array(ds.test.features[0]-48,dtype=np.int32)
198+
y_val = np.array(ds.evaluation.features[0]-48,dtype=np.int32)
199+
200+
# Prepare Theano variables for inputs and targets
201+
input_var = T.tensor4('inputs')
202+
target_var = T.ivector('targets')
203+
204+
# Create neural network model (depending on first command line parameter)
205+
print("Building model and compiling functions...")
206+
if model == 'mlp':
207+
network = build_mlp(input_var,nRows, nCols)
208+
elif model.startswith('custom_mlp:'):
209+
depth, width, drop_in, drop_hid = model.split(':', 1)[1].split(',')
210+
network = build_custom_mlp(input_var, int(depth), int(width),
211+
float(drop_in), float(drop_hid))
212+
elif model == 'cnn':
213+
network = build_cnn(input_var)
214+
else:
215+
print("Unrecognized model type %r." % model,flush=True)
216+
return
217+
218+
# Create a loss expression for training, i.e., a scalar objective we want
219+
# to minimize (for our multi-class problem, it is the cross-entropy loss):
220+
prediction = lasagne.layers.get_output(network)
221+
loss = lasagne.objectives.categorical_crossentropy(prediction, target_var)
222+
loss = loss.mean()
223+
# We could add some weight decay as well here, see lasagne.regularization.
224+
225+
# Create update expressions for training, i.e., how to modify the
226+
# parameters at each training step. Here, we'll use Stochastic Gradient
227+
# Descent (SGD) with Nesterov momentum, but Lasagne offers plenty more.
228+
params = lasagne.layers.get_all_params(network, trainable=True)
229+
updates = lasagne.updates.nesterov_momentum(
230+
loss, params, learning_rate=0.01, momentum=0.9)
231+
232+
# Create a loss expression for validation/testing. The crucial difference
233+
# here is that we do a deterministic forward pass through the network,
234+
# disabling dropout layers.
235+
test_prediction = lasagne.layers.get_output(network, deterministic=True)
236+
test_loss = lasagne.objectives.categorical_crossentropy(test_prediction,
237+
target_var)
238+
test_loss = test_loss.mean()
239+
# As a bonus, also create an expression for the classification accuracy:
240+
test_acc = T.mean(T.eq(T.argmax(test_prediction, axis=1), target_var),
241+
dtype='float32')
242+
243+
# Compile a function performing a training step on a mini-batch (by giving
244+
# the updates dictionary) and returning the corresponding training loss:
245+
train_fn = theano.function([input_var, target_var], loss, updates=updates)
246+
247+
# Compile a second function computing the validation loss and accuracy:
248+
val_fn = theano.function([input_var, target_var], [test_loss, test_acc])
249+
250+
# Finally, launch the training loop.
251+
print("Starting training...")
252+
# We iterate over epochs:
253+
for epoch in range(num_epochs):
254+
# In each epoch, we do a full pass over the training data:
255+
train_err = 0
256+
train_batches = 0
257+
start_time = time.time()
258+
for batch in iterate_minibatches(X_train, y_train, 500, shuffle=True):
259+
inputs, targets = batch
260+
train_err += train_fn(inputs, targets)
261+
train_batches += 1
262+
263+
# And a full pass over the validation data:
264+
val_err = 0
265+
val_acc = 0
266+
val_batches = 0
267+
for batch in iterate_minibatches(X_val, y_val, 500, shuffle=False):
268+
inputs, targets = batch
269+
err, acc = val_fn(inputs, targets)
270+
val_err += err
271+
val_acc += acc
272+
val_batches += 1
273+
274+
# Then we print the results for this epoch:
275+
print("Epoch {} of {} took {:.3f}s".format(
276+
epoch + 1, num_epochs, time.time() - start_time),flush=True)
277+
print(" training loss:\t\t{:.6f}".format(train_err / train_batches),flush=True)
278+
print(" validation loss:\t\t{:.6f}".format(val_err / val_batches),flush=True)
279+
print(" validation accuracy:\t\t{:.2f} %".format(
280+
val_acc / val_batches * 100))
281+
282+
# After training, we compute and print the test error:
283+
test_err = 0
284+
test_acc = 0
285+
test_batches = 0
286+
for batch in iterate_minibatches(X_test, y_test, 500, shuffle=False):
287+
inputs, targets = batch
288+
err, acc = val_fn(inputs, targets)
289+
test_err += err
290+
test_acc += acc
291+
test_batches += 1
292+
print("Final results:",flush=True)
293+
print(" test loss:\t\t\t{:.6f}".format(test_err / test_batches),flush=True)
294+
print(" test accuracy:\t\t{:.2f} %".format(
295+
test_acc / test_batches * 100),flush=True)
296+
297+
# Optionally, you could now dump the network weights to a file like this:
298+
# np.savez('model.npz', *lasagne.layers.get_all_param_values(network))
299+
#
300+
# And load them again later on like this:
301+
# with np.load('model.npz') as f:
302+
# param_values = [f['arr_%d' % i] for i in range(len(f.files))]
303+
# lasagne.layers.set_all_param_values(network, param_values)
304+
305+
306+
if __name__ == '__main__':
307+
if ('--help' in sys.argv) or ('-h' in sys.argv):
308+
print("Trains a neural network on MNIST using Lasagne.")
309+
print("Usage: %s [MODEL [EPOCHS]]" % sys.argv[0])
310+
print()
311+
print("MODEL: 'mlp' for a simple Multi-Layer Perceptron (MLP),")
312+
print(" 'custom_mlp:DEPTH,WIDTH,DROP_IN,DROP_HID' for an MLP")
313+
print(" with DEPTH hidden layers of WIDTH units, DROP_IN")
314+
print(" input dropout and DROP_HID hidden dropout,")
315+
print(" 'cnn' for a simple Convolutional Neural Network (CNN).")
316+
print("EPOCHS: number of training epochs to perform (default: 500)")
317+
else:
318+
kwargs = {}
319+
if len(sys.argv) > 1:
320+
kwargs['model'] = sys.argv[1]
321+
if len(sys.argv) > 2:
322+
kwargs['num_epochs'] = int(sys.argv[2])
323+
main(**kwargs)
324+
325+
print ('\n########################### No Errors ####################################')

0 commit comments

Comments
 (0)