Add files via upload

rrlyman · web-flow · commit fd01ad4c34b3 · 2016-08-24T17:21:42.000-07:00
Change name of Theano program
diff --git a/q4_Theano_mlp.py b/q4_Theano_mlp.py
@@ -0,0 +1,325 @@
+"""
+Example employing Lasagne for digit recognition using the MNIST dataset.
+This example is deliberately structured as a long flat file, focusing on how
+to use Lasagne, instead of focusing on writing maximally modular and reusable
+code. It is used as the foundation for the introductory Lasagne tutorial:
+http://lasagne.readthedocs.org/en/latest/user/tutorial.html
+More in-depth examples and reproductions of paper results are maintained in
+a separate repository: https://github.com/Lasagne/Recipes
+
+https://github.com/Lasagne/Lasagne/blob/master/examples/mnist.py
+
+@author: richard lyman
+"""
+
+from __future__ import print_function
+
+import sys
+import time
+import numpy as np
+import theano
+import theano.tensor as T
+import lasagne
+import ocr_utils
+
+# ##################### Build the neural network model #######################
+# This script supports three types of models. For each one, we define a
+# function that takes a Theano variable representing the input and returns
+# the output layer of a neural network model built in Lasagne.
+
+def build_mlp(input_var=None, nRow=28, nCol=28):
+    # This creates an MLP of two hidden layers of 800 units each, followed by
+    # a softmax output layer of 10 units. It applies 20% dropout to the input
+    # data and 50% dropout to the hidden layers.
+
+    # Input layer, specifying the expected input shape of the network
+    # (unspecified batchsize, 1 channel, 28 rows and 28 columns) and
+    # linking it to the given Theano variable `input_var`, if any:
+    l_in = lasagne.layers.InputLayer(shape=(None, 1, nRow, nCol),
+                                     input_var=input_var)
+
+    # Apply 20% dropout to the input data:
+    l_in_drop = lasagne.layers.DropoutLayer(l_in, p=0.2)
+
+    # Add a fully-connected layer of 800 units, using the linear rectifier, and
+    # initializing weights with Glorot's scheme (which is the default anyway):
+    l_hid1 = lasagne.layers.DenseLayer(
+            l_in_drop, num_units=800,
+            nonlinearity=lasagne.nonlinearities.rectify,
+            W=lasagne.init.GlorotUniform())
+
+    # We'll now add dropout of 50%:
+    l_hid1_drop = lasagne.layers.DropoutLayer(l_hid1, p=0.5)
+
+    # Another 800-unit layer:
+    l_hid2 = lasagne.layers.DenseLayer(
+            l_hid1_drop, num_units=800,
+            nonlinearity=lasagne.nonlinearities.rectify)
+
+    # 50% dropout again:
+    l_hid2_drop = lasagne.layers.DropoutLayer(l_hid2, p=0.5)
+
+    # Finally, we'll add the fully-connected output layer, of 10 softmax units:
+    l_out = lasagne.layers.DenseLayer(
+            l_hid2_drop, num_units=10,
+            nonlinearity=lasagne.nonlinearities.softmax)
+
+    # Each layer is linked to its incoming layer(s), so we only need to pass
+    # the output layer to give access to a network in Lasagne:
+    return l_out
+
+
+def build_custom_mlp(input_var=None, depth=2, width=800, drop_input=.2,
+                     drop_hidden=.5):
+    # By default, this creates the same network as `build_mlp`, but it can be
+    # customized with respect to the number and size of hidden layers. This
+    # mostly showcases how creating a network in Python code can be a lot more
+    # flexible than a configuration file. Note that to make the code easier,
+    # all the layers are just called `network` -- there is no need to give them
+    # different names if all we return is the last one we created anyway; we
+    # just used different names above for clarity.
+
+    # Input layer and dropout (with shortcut `dropout` for `DropoutLayer`):
+    network = lasagne.layers.InputLayer(shape=(None, 1, 28, 28),
+                                        input_var=input_var)
+    if drop_input:
+        network = lasagne.layers.dropout(network, p=drop_input)
+    # Hidden layers and dropout:
+    nonlin = lasagne.nonlinearities.rectify
+    for _ in range(depth):
+        network = lasagne.layers.DenseLayer(
+                network, width, nonlinearity=nonlin)
+        if drop_hidden:
+            network = lasagne.layers.dropout(network, p=drop_hidden)
+    # Output layer:
+    softmax = lasagne.nonlinearities.softmax
+    network = lasagne.layers.DenseLayer(network, 10, nonlinearity=softmax)
+    return network
+
+
+def build_cnn(input_var=None):
+    # As a third model, we'll create a CNN of two convolution + pooling stages
+    # and a fully-connected hidden layer in front of the output layer.
+
+    # Input layer, as usual:
+    network = lasagne.layers.InputLayer(shape=(None, 1, 28, 28),
+                                        input_var=input_var)
+    # This time we do not apply input dropout, as it tends to work less well
+    # for convolutional layers.
+
+    # Convolutional layer with 32 kernels of size 5x5. Strided and padded
+    # convolutions are supported as well; see the docstring.
+    network = lasagne.layers.Conv2DLayer(
+            network, num_filters=32, filter_size=(5, 5),
+            nonlinearity=lasagne.nonlinearities.rectify,
+            W=lasagne.init.GlorotUniform())
+    # Expert note: Lasagne provides alternative convolutional layers that
+    # override Theano's choice of which implementation to use; for details
+    # please see http://lasagne.readthedocs.org/en/latest/user/tutorial.html.
+
+    # Max-pooling layer of factor 2 in both dimensions:
+    network = lasagne.layers.MaxPool2DLayer(network, pool_size=(2, 2))
+
+    # Another convolution with 32 5x5 kernels, and another 2x2 pooling:
+    network = lasagne.layers.Conv2DLayer(
+            network, num_filters=32, filter_size=(5, 5),
+            nonlinearity=lasagne.nonlinearities.rectify)
+    network = lasagne.layers.MaxPool2DLayer(network, pool_size=(2, 2))
+
+    # A fully-connected layer of 256 units with 50% dropout on its inputs:
+    network = lasagne.layers.DenseLayer(
+            lasagne.layers.dropout(network, p=.5),
+            num_units=256,
+            nonlinearity=lasagne.nonlinearities.rectify)
+
+    # And, finally, the 10-unit output layer with 50% dropout on its inputs:
+    network = lasagne.layers.DenseLayer(
+            lasagne.layers.dropout(network, p=.5),
+            num_units=10,
+            nonlinearity=lasagne.nonlinearities.softmax)
+
+    return network
+
+
+# ############################# Batch iterator ###############################
+# This is just a simple helper function iterating over training data in
+# mini-batches of a particular size, optionally in random order. It assumes
+# data is available as numpy arrays. For big datasets, you could load numpy
+# arrays as memory-mapped files (np.load(..., mmap_mode='r')), or write your
+# own custom data iteration function. For small datasets, you can also copy
+# them to GPU at once for slightly improved performance. This would involve
+# several changes in the main program, though, and is not demonstrated here.
+# Notice that this function returns only mini-batches of size `batchsize`.
+# If the size of the data is not a multiple of `batchsize`, it will not
+# return the last (remaining) mini-batch.
+
+def iterate_minibatches(inputs, targets, batchsize, shuffle=False):
+    assert len(inputs)  == len(targets)
+    ln = len(inputs) - len(inputs) % batchsize 
+    assert ln % batchsize == 0
+    if shuffle:
+        indices = np.arange(ln)
+        np.random.shuffle(indices)
+    
+    for start_idx in range(0, ln , batchsize):
+        if shuffle:
+            excerpt = indices[start_idx:start_idx + batchsize]
+        else:
+            excerpt = slice(start_idx, start_idx + batchsize)
+        yield inputs[excerpt], targets[excerpt]
+
+
+# ############################## Main program ################################
+# Everything else will be handled in our main program now. We could pull out
+# more functions to better separate the code, but it wouldn't make it any
+# easier to read.
+
+def main(model='mlp', num_epochs=50):
+
+    print("Loading data...")
+
+    input_filters_dict = {'font': ('HANDPRINT',), 'm_label': range(48,57)}    
+    output_feature_list = ['m_label','image']    
+    ds = ocr_utils.read_data(input_filters_dict = input_filters_dict, 
+                             output_feature_list=output_feature_list, 
+                             engine_type='theano', 
+                             test_size = .1, 
+                             evaluation_size = .1, 
+                             dtype='float32')
+    nRows = ds.train.num_rows
+    nCols = ds.train.num_columns
+    X_train = ds.train.features[1]
+
+    X_val = ds.evaluation.features[1]
+
+    X_test = ds.test.features[1]
+    y_train = np.array(ds.train.features[0]-48,dtype=np.int32)    
+    y_test = np.array(ds.test.features[0]-48,dtype=np.int32)
+    y_val = np.array(ds.evaluation.features[0]-48,dtype=np.int32)
+      
+    # Prepare Theano variables for inputs and targets
+    input_var = T.tensor4('inputs')
+    target_var = T.ivector('targets')
+
+    # Create neural network model (depending on first command line parameter)
+    print("Building model and compiling functions...")
+    if model == 'mlp':
+        network = build_mlp(input_var,nRows, nCols)
+    elif model.startswith('custom_mlp:'):
+        depth, width, drop_in, drop_hid = model.split(':', 1)[1].split(',')
+        network = build_custom_mlp(input_var, int(depth), int(width),
+                                   float(drop_in), float(drop_hid))
+    elif model == 'cnn':
+        network = build_cnn(input_var)
+    else:
+        print("Unrecognized model type %r." % model,flush=True)
+        return
+
+    # Create a loss expression for training, i.e., a scalar objective we want
+    # to minimize (for our multi-class problem, it is the cross-entropy loss):
+    prediction = lasagne.layers.get_output(network)
+    loss = lasagne.objectives.categorical_crossentropy(prediction, target_var)
+    loss = loss.mean()
+    # We could add some weight decay as well here, see lasagne.regularization.
+
+    # Create update expressions for training, i.e., how to modify the
+    # parameters at each training step. Here, we'll use Stochastic Gradient
+    # Descent (SGD) with Nesterov momentum, but Lasagne offers plenty more.
+    params = lasagne.layers.get_all_params(network, trainable=True)
+    updates = lasagne.updates.nesterov_momentum(
+            loss, params, learning_rate=0.01, momentum=0.9)
+
+    # Create a loss expression for validation/testing. The crucial difference
+    # here is that we do a deterministic forward pass through the network,
+    # disabling dropout layers.
+    test_prediction = lasagne.layers.get_output(network, deterministic=True)
+    test_loss = lasagne.objectives.categorical_crossentropy(test_prediction,
+                                                            target_var)
+    test_loss = test_loss.mean()
+    # As a bonus, also create an expression for the classification accuracy:
+    test_acc = T.mean(T.eq(T.argmax(test_prediction, axis=1), target_var),
+                      dtype='float32')
+
+    # Compile a function performing a training step on a mini-batch (by giving
+    # the updates dictionary) and returning the corresponding training loss:
+    train_fn = theano.function([input_var, target_var], loss, updates=updates)
+
+    # Compile a second function computing the validation loss and accuracy:
+    val_fn = theano.function([input_var, target_var], [test_loss, test_acc])
+
+    # Finally, launch the training loop.
+    print("Starting training...")
+    # We iterate over epochs:
+    for epoch in range(num_epochs):
+        # In each epoch, we do a full pass over the training data:
+        train_err = 0
+        train_batches = 0
+        start_time = time.time()
+        for batch in iterate_minibatches(X_train, y_train, 500, shuffle=True):
+            inputs, targets = batch
+            train_err += train_fn(inputs, targets)
+            train_batches += 1
+
+        # And a full pass over the validation data:
+        val_err = 0
+        val_acc = 0
+        val_batches = 0
+        for batch in iterate_minibatches(X_val, y_val, 500, shuffle=False):
+            inputs, targets = batch
+            err, acc = val_fn(inputs, targets)
+            val_err += err
+            val_acc += acc
+            val_batches += 1
+
+        # Then we print the results for this epoch:
+        print("Epoch {} of {} took {:.3f}s".format(
+            epoch + 1, num_epochs, time.time() - start_time),flush=True)
+        print("  training loss:\t\t{:.6f}".format(train_err / train_batches),flush=True)
+        print("  validation loss:\t\t{:.6f}".format(val_err / val_batches),flush=True)
+        print("  validation accuracy:\t\t{:.2f} %".format(
+            val_acc / val_batches * 100))
+
+    # After training, we compute and print the test error:
+    test_err = 0
+    test_acc = 0
+    test_batches = 0
+    for batch in iterate_minibatches(X_test, y_test, 500, shuffle=False):
+        inputs, targets = batch
+        err, acc = val_fn(inputs, targets)
+        test_err += err
+        test_acc += acc
+        test_batches += 1
+    print("Final results:",flush=True)
+    print("  test loss:\t\t\t{:.6f}".format(test_err / test_batches),flush=True)
+    print("  test accuracy:\t\t{:.2f} %".format(
+        test_acc / test_batches * 100),flush=True)
+
+    # Optionally, you could now dump the network weights to a file like this:
+    # np.savez('model.npz', *lasagne.layers.get_all_param_values(network))
+    #
+    # And load them again later on like this:
+    # with np.load('model.npz') as f:
+    #     param_values = [f['arr_%d' % i] for i in range(len(f.files))]
+    # lasagne.layers.set_all_param_values(network, param_values)
+
+
+if __name__ == '__main__':
+    if ('--help' in sys.argv) or ('-h' in sys.argv):
+        print("Trains a neural network on MNIST using Lasagne.")
+        print("Usage: %s [MODEL [EPOCHS]]" % sys.argv[0])
+        print()
+        print("MODEL: 'mlp' for a simple Multi-Layer Perceptron (MLP),")
+        print("       'custom_mlp:DEPTH,WIDTH,DROP_IN,DROP_HID' for an MLP")
+        print("       with DEPTH hidden layers of WIDTH units, DROP_IN")
+        print("       input dropout and DROP_HID hidden dropout,")
+        print("       'cnn' for a simple Convolutional Neural Network (CNN).")
+        print("EPOCHS: number of training epochs to perform (default: 500)")
+    else:
+        kwargs = {}
+        if len(sys.argv) > 1:
+            kwargs['model'] = sys.argv[1]
+        if len(sys.argv) > 2:
+            kwargs['num_epochs'] = int(sys.argv[2])
+        main(**kwargs)
+
+print ('\n########################### No Errors ####################################')