q6_tensorflow_residual3x3.py

"""# ==========================================================================

# Copyright 2015 Google Inc. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================

This sample program is a modified version of the Google mnist convolutional 
network tutorial example.  See the mnist tutorial in www.tensorflow.org 

This graph has multiple sections  3 layers each, 400 100 400 followed
by a fully connected layer.

see tensor_flow_graph.png
"""# ==============================================================================
import ocr_utils
import datetime
from collections import namedtuple
import numpy as np
import pandas as pd
 
import tensorflow as tf  
dtype = np.float32
#with tf.device('/GPU:0'):
#with tf.device('/cpu:0'): 
       
def train_a_font(input_filters_dict,output_feature_list, nEpochs=5000):
 
    ds = ocr_utils.read_data(input_filters_dict = input_filters_dict, 
                                output_feature_list=output_feature_list,
                                test_size = .1,
                                engine_type='tensorflow',dtype=dtype)

        
    """# ==============================================================================
    
    Start TensorFlow Interactive Session
    
    """# ==============================================================================

    sess = tf.InteractiveSession()
    
    """# ==============================================================================
    
    Placeholders
    
    Compute the size of various layers 
    
    Create a tensorflow Placeholder for each feature of data returned from the
    dataset
    
    """# ==============================================================================

    

    lst = []
    extra_features_width = 0 # width of extra features
    
    for i,nm in enumerate(output_feature_list):
        
        # features[0], is always the target. For instance it may be m_label_one_hot 
        # the second features[1] is the 'image' that is passed to the convolution layers 
        # Any additional features bypass the convolution layers and go directly 
        # into the fully connected layer.  
        
        # The width of the extra features is calculated in order to allocate 
        # the correct widths of weights,  # and inputs 
        # names are assigned to make the look pretty on the tensorboard graph.
        
        if i == 0:
            nm = 'y_'+nm
        else:
            nm = 'x_'+nm
        if i>1:
            extra_features_width += ds.train.feature_width[i]
        print (ds.train.features[i].dtype)
        lst.append(tf.placeholder(dtype, shape=[None, ds.train.feature_width[i]], name=nm))
        
    # ph is a named tuple with key names like 'image', 'm_label', and values that
    # are tensors.  The display name on the Chrome graph are 'y_m_label', 'x_image, 
    # x_upper_case etc.
    Place_Holders = namedtuple('Place_Holders', ds.train.feature_names)   
    ph = Place_Holders(*lst) # unpack placeholders into named Tuple
        
    nRows = ds.train.num_rows #image height
    nCols = ds.train.num_columns #image width    
    
    nSections = 10
    w = list(range(nSections*3))
    b = list(range(nSections*3))
    h = list(range(nSections*3+1))

    
    in_out_width = nRows*nCols
    internal_width = int(in_out_width/4)

    
#     nFc0 = 2048      # size of fully connected layer
    nFc1 = 2048      # size of fully connected layer        
#     nFc2 = 2048      # size of fully connected layer    
#     nConv1 = 32     # size of first convolution layer
#     nConv2 = 64     # size of second convolution layer
    nTarget = ds.train.feature_width[0]  # the number of one_hot features in the target, 'm_label'    
    
#     n_h_pool2_outputs = int(nRows/4) * int(nCols/4) * nConv2 # second pooling layer 
#     n_h_pool2_outputsx = n_h_pool2_outputs + extra_features_width # fully connected
#         
    """# ==============================================================================
    
    Build a Multilayer Convolutional Network
    
    Weight Initialization
    
    """# ==============================================================================
       
    def weight_variable(shape, dtype):
        initial = tf.truncated_normal(shape, stddev=0.1,dtype=dtype)
        return tf.Variable(initial)
    
    def bias_variable(shape, dtype):
        initial = tf.constant(0.1, shape=shape, dtype=dtype)
        return tf.Variable(initial)    

        
    """# ==============================================================================
    Convolution and Pooling
    
    keep our code cleaner, let's also abstract those operations into functions.
    
    """# ==============================================================================
    
    def conv2d(x, W):
        return tf.nn.conv2d(x, W, strides=[1, 1, 1, 1], padding='SAME')

    
    """# ==============================================================================
    
    First Convolutional Layers
    
    """# ==============================================================================
    
    def shapeOuts(n):
        print ('n={}, hin={},w={}, b={} ,hout={}\n'.format(n, h[n]._shape, w[n]._variable._shape, b[n]._variable._shape, h[n+1]._shape))
        
    def section(n):
        with tf.name_scope('section_'+str(n)+'_0') as scope:     
            w[n]=weight_variable([in_out_width, internal_width],dtype)
            b[n]=bias_variable([internal_width],dtype)  
            h[n+1] = tf.nn.relu(tf.matmul(h[n], w[n]) + b[n])
            shapeOuts(n)
            
        with tf.name_scope('section_'+str(n)+'_1') as scope:  
            w[n+1]=weight_variable([internal_width, internal_width],dtype)
            b[n+1]=bias_variable([internal_width],dtype)     
                          
            h[n+2]=tf.nn.relu(tf.matmul(h[n+1], w[n+1]) + b[n+1])
            shapeOuts(n+1)                  
                            
        with tf.name_scope('section_'+str(n)+'_2') as scope:  
            w[n+2]=weight_variable([internal_width, in_out_width],dtype)
            b[n+2]=bias_variable([in_out_width],dtype)   
            z= tf.nn.relu(tf.matmul(h[n+2], w[n+2]) + b[n+2])
            h[n+3]= tf.add(z   ,h[n]) #n+3   
                     
            print('z shape ={}'.format(z._shape)) 
            shapeOuts(n+2)                  
        return    
              
    def computeSize(s,tens):
        sumC = 1
        tShape = tens.get_shape()
        nDims = len(tShape)
        for i in range(nDims):
            sumC *= tShape[i].value
        print ('\t{}\t{}'.format(s,sumC),flush=True)
        return sumC
                    
    """# ==============================================================================        
    Build sectional network
    
    """# ==============================================================================      
    h[0]= ph[1]
    for i in range(nSections):
        section(3*i)
            
    """# ==============================================================================        
    Dropout
    
    """# ==============================================================================
    keep_prob = tf.placeholder(dtype,name='keep_prob')
    
    with tf.name_scope("drop") as scope:
        h_fc2_drop = tf.nn.dropout(h[nSections*3], keep_prob)
    
    """# ==============================================================================
    
    Readout Layer
    
    """# ==============================================================================
    with tf.name_scope("softmax") as scope:
        w_fc3 = weight_variable([in_out_width, nTarget],dtype)
        b_fc3 = bias_variable([nTarget],dtype)    
        y_conv=tf.nn.softmax(tf.matmul(h_fc2_drop, w_fc3) + b_fc3)
    
    print ('network size:',flush=True)
    total = 0
    for i in range(nSections*3):
        total = total + computeSize("w{}".format(i),w[i])
    total = total + computeSize ("b_fc3",b_fc3) + \
        computeSize ("w_fc3",w_fc3) 

    
    print('\ttotal\t{}'.format(total),flush=True)
        
    """# ==============================================================================
    
    Train and Evaluate the Model
    
    """# ==============================================================================
    with tf.name_scope("reshape_x_image") as scope:
        x_image = tf.reshape(ph.image, [-1,nCols,nRows,1])
        
    with tf.name_scope("xent") as scope:
        # 1e-8 added to eliminate the crash of training when taking log of 0
        cross_entropy = -tf.reduce_sum(ph[0]*tf.log(y_conv+1e-8))
        ce_summ = tf.scalar_summary("cross entropy", cross_entropy)
            
    with tf.name_scope("train") as scope:
        train_step = tf.train.AdamOptimizer(1e-4).minimize(cross_entropy)
        
    with tf.name_scope("test") as scope:        
        correct_prediction = tf.equal(tf.argmax(y_conv,1), tf.argmax(ph[0],1))
    
        accuracy = tf.reduce_mean(tf.cast(correct_prediction, dtype))
        accuracy_summary = tf.scalar_summary("accuracy", accuracy)    
    
    merged = tf.merge_all_summaries()
    tm = ""
    tp = datetime.datetime.now().timetuple()
    for i in range(4):
        tm += str(tp[i])+'-'
    tm += str(tp[4])    
    writer = tf.train.SummaryWriter("/tmp/ds_logs/"+ tm, sess.graph)
    
    # To see the results in Chrome, 
    # Run the following in terminal to activate server.
    # tensorboard --logdir '/tmp/ds_logs/'
    # See results on localhost:6006 
    
    sess.run(tf.initialize_all_variables())
    
    perfect_count=10
    for i in range(nEpochs):
    
        batch = ds.train.next_batch(100)
        # assign feature data to each placeholder
        # the batch list is returned in the same order as the features requested
        feed = {keep_prob: 0.5}
        for j in range(ds.train.num_features):
            feed[ph[j]] = batch[j]  
            
        if i%100 == 0:
            # sh=h_pool2_flat.get_shape()
            feed[keep_prob] = 1.0
            result = sess.run([merged, accuracy ], feed_dict=feed)    
            summary_str = result[0]
            #acc = result[1]       
            writer.add_summary(summary_str, i)
            train_accuracy = accuracy.eval(feed)    
            if train_accuracy != 1:
                perfect_count=10;
            else:
                perfect_count -= 1
                if perfect_count==0:
                    break;  
                
            print ("step %d, training accuracy %g"%(i, train_accuracy),flush=True)
        train_step.run(feed_dict=feed)
        
    feed={keep_prob: 1.0}
    # assign feature data to each placeholder
    error_images = np.empty((0,nRows,nCols))
        
    test_accuracy=0
    m=0
    for n in range(0,ds.test.features[0].shape[0],100 ):   
        for i in range(ds.train.num_features ):  
            feed[ph[i]] = ds.test.features[i] [n:n+100]
        result = sess.run([accuracy, x_image, correct_prediction], feed_dict=feed)    
        test_accuracy += result[0]
        error_images = np.append(error_images, result[1][:,:,:,0][result[2]==False],axis=0)
        m += 1
    try:        
        print ("test accuracy {} for font: {}".format(test_accuracy/m, input_filters_dict['font']),flush=True)       
        ocr_utils.montage(error_images,title='TensorFlow {} Error Images'.format(input_filters_dict['font']))  
    except:                            
        print ("test accuracy {}".format(test_accuracy/m),flush=True)       
        ocr_utils.montage(error_images,title='TensorFlow Error Images')    
    
    tf.reset_default_graph() # only necessary when iterating through fonts
    sess.close()

    
if True:
    # single font train
    
    # esamples
    # select only images from 'OCRB'  scanned font
    # input_filters_dict = {'font': ('OCRA',)}
    
    # select only images from 'HANDPRINT'  font
    #input_filters_dict = {'font': ('HANDPRINT',)}
    
    # select only images from 'OCRA' and 'OCRB' fonts with the 'scanned" fontVariant
    # input_filters_dict = {'font': ('OCRA','OCRB'), 'fontVariant':('scanned',)}
    
    # select everything; all fonts , font variants, etc.
    # input_filters_dict = {}
    
    # select the digits 0 through 9 in the E13B font
    # input_filters_dict = {'m_label': range(48,58), 'font': 'E13B'}
    
    # select the digits 0 and 2in the E13B font
    # input_filters_dict = {'m_label': (48,50), 'font': 'E13B'}
    
    # output the character label, image, italic flag, aspect_ratio and upper_case flag
    # output_feature_list = ['m_label_one_hot','image','italic','aspect_ratio','upper_case']    
    
    # output only the character label and the image
    # output_feature_list = ['m_label_one_hot','image'] 
    
    #   identify the font given the input images
    #output_feature_list = ['font_one_hot','image','italic','aspect_ratio','upper_case']   

    # train the digits 0-9 for all fonts
    #input_filters_dict = {'m_label': range(48,58)}
    input_filters_dict = {'font':'ARIAL','m_label': list(range(48,58))+list(range(65,91))+list(range(97,123))}    
    #input_filters_dict = {}    
    output_feature_list = ['m_label_one_hot','image']    
    train_a_font(input_filters_dict,  output_feature_list, nEpochs = 50000)    
    
else:
    # loop through all the fonts and train individually

    # pick up the entire list of fonts and font variants. Train each one.
    df1 = ocr_utils.get_list(input_filters_dict={'font': ()})      
    
    import pprint as pprint
    pp = pprint.PrettyPrinter(indent=4)
    pp.pprint(df1)
   
    output_feature_list = ['m_label_one_hot','image','italic','aspect_ratio','upper_case','font_one_hot']
    
    # Change nEpochs to 5000 for better results
    for l in df1:
        input_filters_dict= {'font': (l[0],)}       
        train_a_font(input_filters_dict,output_feature_list, nEpochs = 500) 
    
    
print ('\n########################### No Errors ####################################')