Allow both with tensorflow and without tensorflow to remove the redundant code (train_tb.py).

ruotianluo · ruotianluo · commit e7872202d0b5 · 2017-08-02T13:02:35.000-07:00
diff --git a/README.md b/README.md
@@ -5,9 +5,6 @@ There's something difference compared to neuraltalk2.
 - Instead of including the convnet in the model, we use preprocessed features. (finetuneable cnn version is in the branch **with_finetune**)
 - Use resnet101; the same way as in self-critical (the preprocessing code may have bug, haven't tested yet)
 
-# TODO:
-- Other models
-
 # Requirements
 Python 2.7 (no [coco-caption](https://github.com/tylin/coco-caption) version for python 3), pytorch
 
@@ -49,7 +46,7 @@ $ python train.py --input_json coco/cocotalk.json --input_json --input_fc_dir da
 
 The train script will take over, and start dumping checkpoints into the folder specified by `checkpoint_path` (default = current folder). For more options, see `opts.py`.
 
-If you have tensorflow, you can run train.py instead of `train_tb.py`. `train_tb.py` saves learning curves by summary writer, and can be visualized using tensorboard.
+If you have tensorflow, the loss histories are automatically dumped into checkpoint_path, and can be visualized using tensorboard.
 
 The current command use scheduled sampling, you can also set scheduled_sampling_start to -1 to turn off scheduled sampling.
 
diff --git a/train.py b/train.py
@@ -19,13 +19,23 @@
 import eval_utils
 import misc.utils as utils
 
-import os
+try:
+    import tensorflow as tf
+except ImportError:
+    print("Tensorflow not installed; No tensorboard logging.")
+    tf = None
+
+def add_summary_value(writer, key, value, iteration):
+    summary = tf.Summary(value=[tf.Summary.Value(tag=key, simple_value=value)])
+    writer.add_summary(summary, iteration)
 
 def train(opt):
     loader = DataLoader(opt)
     opt.vocab_size = loader.vocab_size
     opt.seq_length = loader.seq_length
 
+    tf_summary_writer = tf and tf.summary.FileWriter(opt.checkpoint_path)
+
     infos = {}
     if opt.start_from is not None:
         # open old infos and check if models are compatible
@@ -111,6 +121,12 @@ def train(opt):
 
         # Write the training loss summary
         if (iteration % opt.losses_log_every == 0):
+            if tf is not None:
+                add_summary_value(tf_summary_writer, 'train_loss', train_loss, iteration)
+                add_summary_value(tf_summary_writer, 'learning_rate', opt.current_lr, iteration)
+                add_summary_value(tf_summary_writer, 'scheduled_sampling_prob', model.ss_prob, iteration)
+                tf_summary_writer.flush()
+
             loss_history[iteration] = train_loss
             lr_history[iteration] = opt.current_lr
             ss_prob_history[iteration] = model.ss_prob
@@ -123,6 +139,12 @@ def train(opt):
             eval_kwargs.update(vars(opt))
             val_loss, predictions, lang_stats = eval_utils.eval_split(model, crit, loader, eval_kwargs)
 
+            # Write validation result into summary
+            if tf is not None:
+                add_summary_value(tf_summary_writer, 'validation loss', val_loss, iteration)
+                for k,v in lang_stats.items():
+                    add_summary_value(tf_summary_writer, k, v, iteration)
+                tf_summary_writer.flush()
             val_result_history[iteration] = {'loss': val_loss, 'lang_stats': lang_stats, 'predictions': predictions}
 
             # Save model if is improving on validation result
diff --git a/train_tb.py b/train_tb.py