fixed some bugs and updated README

JunweiLiang · JunweiLiang · commit 3b1143c95c2a · 2020-04-14T17:49:11.000-04:00
diff --git a/README.md b/README.md
@@ -66,44 +66,27 @@ $ wget https://aladdin-eax.inf.cs.cmu.edu/shares/diva_obj_detect_models/models/o
 $ tar -zxvf obj_v3_model.tgz
 ```
 
-2. Run object detection on the test videos
-```
-$ python obj_detect.py --model_path obj_v3_model --version 3 --video_dir v1-val_testvideos \
---video_lst_file v1-val_testvideos.lst --out_dir test_json_out --frame_gap 1 --visualize \
---vis_path test_vis_out --get_box_feat --box_feat_path test_box_feat_out
-```
-The object detection output for each frame will be in `test_json_out/` and in COCO format. The visualization frames will be in `test_vis_out/`. The ROI features will be in `test_box_feat_out/`. Remove `--visualize  --vis_path test_vis_out` and `--get_box_feat --box_feat_path test_box_feat_out` if you only want the json files.
-
-3. Run object detection & tracking on the test videos
+2. Run object detection & tracking on the test videos
 ```
 $ python obj_detect_tracking.py --model_path obj_v3_model --version 3 --video_dir v1-val_testvideos \
---video_lst_file v1-val_testvideos.lst --out_dir test_json_out --frame_gap 1 --get_tracking \
+--video_lst_file v1-val_testvideos.lst --frame_gap 1 --get_tracking \
 --tracking_dir test_track_out
 ```
+To have the object detection output in COCO json format, add `--out_dir test_json_out `; To have the bounding box visualization, add `--visualize  --vis_path test_vis_out`.
+To speed it up, try `--frame_gap 8`, and the tracks between detection frames will be linearly interpolated.
 The tracking results will be in `test_track_out/` and in MOTChallenge format. To visualize the tracking results:
 ```
-$ ls $PWD/v1-val_testvideos/* > v1-val_testvideos.abs.lst
-$ python get_frames_resize.py v1-val_testvideos.abs.lst v1-val_testvideos_frames/ --use_2level
-$ cd test_track_out/VIRAT_S_000205_05_001092_001124.mp4
-$ ls Person > Person.lst; ls Vehicle > Vehicle.lst
-$ python ../../track_to_json.py Vehicle Vehicle.lst Vehicle Vehicle_json
-$ python ../../track_to_json.py Person Person.lst Person Person_json
-$ python ../../vis_json.py Person.lst ../../v1-val_testvideos_frames/ Person_json/ Person_vis
-$ python ../../vis_json.py Vehicle.lst ../../v1-val_testvideos_frames/ Vehicle_json/ Vehicle_vis
-$ ffmpeg -framerate 30 -i Vehicle_vis/VIRAT_S_000205_05_001092_001124/VIRAT_S_000205_05_001092_001124_F_%08d.jpg Vehicle_vis_video.mp4
-$ ffmpeg -framerate 30 -i Person_vis/VIRAT_S_000205_05_001092_001124/VIRAT_S_000205_05_001092_001124_F_%08d.jpg Person_vis_video.mp4
-
-# or you could put "Person/Vehicle" visualization into the same video
+# Put "Person/Vehicle" tracks visualization into the same video
 $ ls $PWD/v1-val_testvideos/* > v1-val_testvideos.abs.lst
 $ python get_frames_resize.py v1-val_testvideos.abs.lst v1-val_testvideos_frames/ --use_2level
 $ python tracks_to_json.py test_track_out/ v1-val_testvideos.abs.lst test_track_out_json
 $ python vis_json.py v1-val_testvideos.abs.lst v1-val_testvideos_frames/ test_track_out_json/ test_track_out_vis
 # then use ffmpeg to make videos
-
+$ ffmpeg -framerate 30 -i test_track_out_vis/VIRAT_S_000205_05_001092_001124/VIRAT_S_000205_05_001092_001124_F_%08d.jpg vis_video.mp4
 ```
 Now you have the tracking visualization videos for both "Person" and "Vehicle" class.
 
-4. You can also run both inferencing with frozen graph (See [this](SPEED.md) for instructions of how to pack the model). Change `--model_path obj_v3.pb` and add `--is_load_from_pb`. It is about 30% faster.
+3. You can also run inferencing with frozen graph (See [this](SPEED.md) for instructions of how to pack the model). Change `--model_path obj_v3.pb` and add `--is_load_from_pb`. It is about 30% faster. For running on [MEVA](http://mevadata.org/) dataset (avi videos & indoor scenes) or with [EfficientDet](https://github.com/google/automl/tree/master/efficientdet) models, see examples [here](COMMANDS.md).
 
 ## Models
 These are the models you can use for inferencing. The original ActEv annotations can be downloaded from [here](https://next.cs.cmu.edu/data/actev-v1-drop4-yaml.tgz). I will add instruction for training and testing if requested. Click to download each model.
diff --git a/obj_detect_tracking.py b/obj_detect_tracking.py
@@ -777,6 +777,7 @@ def log_gpu_util(interval, gpuid_range):
           vis_file = os.path.join(vis_path,
                                   "%s_F_%08d.jpg" % (videoname, vis_count))
           cv2.imwrite(vis_file, newim)
+          vis_count += 1
 
         cur_frame += 1
 
diff --git a/track_to_json.py b/track_to_json.py
@@ -41,7 +41,8 @@
 
 			box = [float(left), float(top), float(width), float(height)]
 
-			if not data.has_key(frameIdx):
+			#if not data.has_key(frameIdx):
+			if not frameIdx in data:
 				data[frameIdx] = []
 			data[frameIdx].append({
 				"category_id": targetClass2id[args.cat_name],
@@ -53,7 +54,7 @@
 			})
 
 		for frameIndex in data:
-			
+
 			annofile = os.path.join(args.despath, "%s_F_%08d.json"%(videoname, frameIndex))
 
 			with open(annofile, "w") as f:
diff --git a/tracks_to_json.py b/tracks_to_json.py
@@ -44,7 +44,8 @@
 
 				box = [float(left), float(top), float(width), float(height)]
 
-				if not data.has_key(frameIdx):
+				#if not data.has_key(frameIdx):
+				if not frameIdx in data:
 					data[frameIdx] = []
 				data[frameIdx].append({
 					"category_id": targetClass2id[cat_name],
@@ -56,7 +57,7 @@
 				})
 
 		for frameIndex in data:
-			
+
 			annofile = os.path.join(args.despath, "%s_F_%08d.json"%(os.path.splitext(videoname)[0], frameIndex))
 
 			with open(annofile, "w") as f:
diff --git a/vis_json.py b/vis_json.py
@@ -251,11 +251,13 @@ def draw_boxes(im, boxes, labels=None, colors=None, font_scale=0.6,
           box = one['bbox'] # [x, y, w, h]
           box = [box[0], box[1], box[0] + box[2], box[1] + box[3]]
           boxes.append(box)
-          if one.has_key("trackId"):
+          #if one.has_key("trackId"):
+          if "trackId" not in one:
             trackId = int(one['trackId'])
             color_key = (trackId, one['cat_name'])
             labels.append("%s: #%s"%(one['cat_name'], trackId))
-            if not color_assign.has_key(color_key):
+            #if not color_assign.has_key(color_key):
+            if color_key not in color_assign:
               this_color = color_queue.pop()
               color_assign[color_key] = this_color
               # recycle it
@@ -266,7 +268,8 @@ def draw_boxes(im, boxes, labels=None, colors=None, font_scale=0.6,
             # no trackId, just visualize the boxes
             cat_name = one['cat_name']
             labels.append("%s: %.2f"%(cat_name, float(one['score'])))
-            if not color_assign.has_key(cat_name):
+            #if not color_assign.has_key(cat_name):
+            if cat_name not in color_assign:
               this_color = color_queue.pop()
               color_assign[cat_name] = this_color
               # recycle it