Skip to content
This repository was archived by the owner on Jun 5, 2024. It is now read-only.

Commit 895baf0

Browse files
committed
Add speed profiling, and frozen graph inferencing.
1 parent 4bed770 commit 895baf0

19 files changed

+344
-17724
lines changed

README.md

+15-3
Original file line numberDiff line numberDiff line change
@@ -30,7 +30,7 @@ We utilize state-of-the-art object deteciton and tracking algorithm in surveilla
3030
## Dependencies
3131
The code is originally written for Tensorflow v1.10 with Python 2.7 but it works on v1.13.1, too. Note that I didn't change the code for v1.13.1 instead I just disable Tensorflow warnings and logging.
3232

33-
Other dependencies: numpy; scipy; sklearn; cv2
33+
Other dependencies: numpy; scipy; sklearn; cv2; matplotlib; pycocotools
3434

3535
## Code Overview
3636
- `obj_detect.py`: Inference code for object detection.
@@ -56,14 +56,15 @@ $ python obj_detect.py --model_path obj_v3_model --version 3 --video_dir v1-val_
5656
--video_lst_file v1-val_testvideos.lst --out_dir test_json_out --frame_gap 1 --visualize \
5757
--vis_path test_vis_out --get_box_feat --box_feat_path test_box_feat_out
5858
```
59-
The object detection output for each frame will be in `test_json_out/` and in COCO format. The visualization frames will be in `test_vis_out/`. The ROI features will be in `test_box_feat_out/`. Remove `--visualize --vis_path test_vis_out` and `--get_box_feat --box_feat_path test_box_feat_out` if you only want the json files.
59+
The object detection output for each frame will be in `test_json_out/` and in COCO format. The visualization frames will be in `test_vis_out/`. The ROI features will be in `test_box_feat_out/`. Remove `--visualize --vis_path test_vis_out` and `--get_box_feat --box_feat_path test_box_feat_out` if you only want the json files. You can also add `--is_load_from_pb` and change '--model_path' to point to a frozen graph to boost the inferencing speed a bit.
6060

6161
3. Run object detection & tracking on the test videos
6262
```
6363
$ python obj_detect_tracking.py --model_path obj_v3_model --version 3 --video_dir v1-val_testvideos \
6464
--video_lst_file v1-val_testvideos.lst --out_dir test_json_out --frame_gap 1 --get_tracking \
6565
--tracking_dir test_track_out
6666
```
67+
You can also add `--is_load_from_pb` and change '--model_path' to point to a frozen graph to boost the inferencing speed a bit.
6768
The tracking results will be in `test_track_out/` and in MOTChallenge format. To visualize the tracking results:
6869
```
6970
$ ls $PWD/v1-val_testvideos/* > v1-val_testvideos.abs.lst
@@ -247,4 +248,15 @@ These are my experiences with working on this [surveillance dataset](https://act
247248
1. FPN provides significant improvement over non-FPN backbone;
248249
2. Dilated CNN in backbone also helps;
249250
3. Cascade RCNN doesn't help (IOU=0.5). I'm using IOU=0.5 in my evaluation since the original annotations are not "tight" bounding boxes.
250-
4. Decoupled RCNN slightly improves AP (Person: 0.836 -> 0.837) but takes 7x more time.
251+
4. Decoupled RCNN slightly improves AP (Person: 0.836 -> 0.837) but takes 7x more time.
252+
253+
## Speed
254+
**TL;DR**:
255+
TF v1.10 -> v1.13 (CUDA 9 & cuDNN v7.1 -> CUDA 10 & cuDNN v7.4) ~ +8% faster
256+
Use frozen graph ~ +..% faster
257+
Use TensorRT optimized graph ~ +..% faster
258+
Experiments are recorded [here](SPEED.md)
259+
260+
261+
262+

SPEED.md

+3
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
# Experiments on Inferencing Speed
2+
3+
`InternalError (see above for traceback): Native FunctionDef TRTEngineOp_34_native_segment can't be found in function library`

main.py

+21-156
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,8 @@
44

55
import sys,os,argparse
66
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3' # so here won't have poll allocator info
7+
# solve the issue of a bug in while loop, when you import the graph in multi-gpu, prefix is not added in while loop op # https://github.com/tensorflow/tensorflow/issues/26526
8+
os.environ['TF_ENABLE_CONTROL_FLOW_V2'] = '1'
79

810
# remove all the annoying warnings from tf v1.10 to v1.13
911
import logging
@@ -23,12 +25,11 @@
2325
import pycocotools.mask as cocomask
2426

2527
from tqdm import tqdm
26-
from models import fill_full_mask, resizeImage
27-
from utils import evalcoco, get_op_tensor_name, match_detection, computeAP, computeAR, computeAR_2, grouper, gather_dt, gather_gt, match_dt_gt, gather_act_singles, aggregate_eval, weighted_average
28+
from models import fill_full_mask, resizeImage, pack, initialize
29+
from utils import evalcoco, match_detection, computeAP, computeAR, computeAR_2, grouper, gather_dt, gather_gt, match_dt_gt, gather_act_singles, aggregate_eval, weighted_average
2830

2931
from utils import Dataset, Summary, nms_wrapper, FIFO_ME
3032

31-
from nn import soft_nms, nms
3233

3334
def get_args():
3435
global targetClass2id, targetid2class
@@ -206,8 +207,13 @@ def get_args():
206207

207208
# -------------------- save model for deployment
208209
parser.add_argument("--is_pack_model",action="store_true",default=False,help="with is_test, this will pack the model to a path instead of testing")
209-
parser.add_argument("--pack_model_path",type=str,default=None,help="path to save model")
210-
parser.add_argument("--pack_model_note",type=str,default=None,help="leave a note for this packed model for future reference")
210+
parser.add_argument("--pack_model_path",type=str,default=None,help="path to save model, a .pb file")
211+
parser.add_argument("--note",type=str,default=None,help="leave a note for this packed model for future reference")
212+
parser.add_argument("--pack_modelconfig_path", type=str, default=None, help="json file to save the config and note")
213+
214+
# forward with frozen gragp
215+
parser.add_argument("--is_load_from_pb", action="store_true")
216+
211217

212218
# ------------------------------------ model specifics
213219

@@ -1158,50 +1164,6 @@ def train_diva(config):
11581164
json.dump(stats,f)
11591165

11601166

1161-
def pack(config):
1162-
model = get_model(config)
1163-
tfconfig = tf.ConfigProto(allow_soft_placement=True)
1164-
tfconfig.gpu_options.allow_growth = True # this way it will only allocate nessasary
1165-
with tf.Session(config=tfconfig) as sess:
1166-
1167-
initialize(load=True,load_best=config.load_best,config=config,sess=sess)
1168-
1169-
saver = tf.train.Saver()
1170-
global_step = model.global_step
1171-
# put input and output to a universal name for reference when in deployment
1172-
# find the nessary stuff in model.get_feed_dict
1173-
tf.add_to_collection("input",model.x)
1174-
tf.add_to_collection("is_train",model.is_train) # TODO, change this to a constant
1175-
tf.add_to_collection("output",model.yp)
1176-
# also save all the model config and note into the model
1177-
pack_model_note = tf.get_variable("model_note",shape=[],dtype=tf.string,initializer=tf.constant_initializer(config.pack_model_note),trainable=False)
1178-
full_config = tf.get_variable("model_config",shape=[],dtype=tf.string,initializer=tf.constant_initializer(json.dumps(vars(config))),trainable=False)
1179-
1180-
print "saving packed model"
1181-
# the following wont save the var model_note, model_config that's not in the graph,
1182-
# TODO: fix this
1183-
"""
1184-
# put into one big file to save
1185-
input_graph_def = tf.get_default_graph().as_graph_def()
1186-
#print [n.name for n in input_graph_def.node]
1187-
# We use a built-in TF helper to export variables to constants
1188-
output_graph_def = tf.graph_util.convert_variables_to_constants(
1189-
sess, # The session is used to retrieve the weights
1190-
input_graph_def, # The graph_def is used to retrieve the nodes
1191-
[tf.get_collection("output")[0].name.split(":")[0]] # The output node names are used to select the usefull nodes
1192-
)
1193-
output_graph = os.path.join(config.pack_model_path,"final.pb")
1194-
# Finally we serialize and dump the output graph to the filesystem
1195-
with tf.gfile.GFile(output_graph, "wb") as f:
1196-
f.write(output_graph_def.SerializeToString())
1197-
print("%d ops in the final graph." % len(output_graph_def.node))
1198-
"""
1199-
# save it into a path with multiple files
1200-
saver.save(sess,
1201-
os.path.join(config.pack_model_path,"final"),
1202-
global_step=global_step)
1203-
print "model saved in %s"%(config.pack_model_path)
1204-
12051167
# given the box, extract feature
12061168
def boxfeat(config):
12071169
imagelist = config.imgpath
@@ -1389,10 +1351,10 @@ def forward(config):
13891351

13901352
#model = get_model(config) # input image -> final_box, final_label, final_masks
13911353
#tester = Tester(model,config,add_mask=config.add_mask)
1354+
13921355
models = []
13931356
for i in xrange(config.gpuid_start, config.gpuid_start+config.gpu):
1394-
models.append(get_model(config,i,controller=config.controller))
1395-
1357+
models.append(get_model(config, i, controller=config.controller))
13961358

13971359
model_final_boxes = [model.final_boxes for model in models]
13981360
# [R]
@@ -1419,15 +1381,18 @@ def forward(config):
14191381
if not config.diva_class and not config.diva_class2 and not config.diva_class3:
14201382
add_coco(config,config.datajson)
14211383

1422-
tfconfig = tf.ConfigProto(allow_soft_placement=True)
1384+
tfconfig = tf.ConfigProto(allow_soft_placement=True, log_device_placement=False)
14231385
if not config.use_all_mem:
14241386
tfconfig.gpu_options.allow_growth = True # this way it will only allocate nessasary gpu, not take all
14251387

14261388
tfconfig.gpu_options.visible_device_list = "%s"%(",".join(["%s"%i for i in range(config.gpuid_start, config.gpuid_start+config.gpu)])) # so only this gpu will be used
14271389

14281390
with tf.Session(config=tfconfig) as sess:
14291391

1430-
initialize(load=True,load_best=config.load_best,config=config,sess=sess)
1392+
# for packing model, the weights are already loaded
1393+
if not config.is_load_from_pb:
1394+
initialize(load=True, load_best=config.load_best, config=config, sess=sess)
1395+
14311396
# num_epoch should be 1
14321397
assert config.num_epochs == 1
14331398

@@ -2091,109 +2056,6 @@ def test(config):
20912056
print "mean AP with IoU 0.5:%s, mean AR with max detection %s:%s, took %s seconds"%(mean_ap,maxDet,mean_ar,took)
20922057

20932058

2094-
2095-
2096-
def initialize(load,load_best,config,sess):
2097-
tf.global_variables_initializer().run()
2098-
if load:
2099-
print "restoring model..."
2100-
allvars = tf.global_variables()
2101-
allvars = [var for var in allvars if "global_step" not in var.name]
2102-
#restore_vars = allvars
2103-
opts = ["Adam","beta1_power","beta1_power_1","beta2_power","beta2_power_1","Adam_1","Adadelta_1","Adadelta","Momentum"]
2104-
2105-
2106-
allvars = [var for var in allvars if var.name.split(":")[0].split("/")[-1] not in opts]
2107-
# so allvars is actually the variables except things for training
2108-
2109-
if config.ignore_gn_vars:
2110-
allvars = [var for var in allvars if "/gn" not in var.name.split(":")[0]]
2111-
2112-
if config.ignore_vars is not None:
2113-
ignore_vars = config.ignore_vars.split(":")
2114-
ignore_vars.extend(opts)
2115-
# also these
2116-
#ignore_vars+=["global_step"]
2117-
2118-
restore_vars = []
2119-
for var in allvars:
2120-
ignore_it = False
2121-
for ivar in ignore_vars:
2122-
if ivar in var.name:
2123-
ignore_it=True
2124-
print "ignored %s"%var.name
2125-
break
2126-
if not ignore_it:
2127-
restore_vars.append(var)
2128-
2129-
print "ignoring %s variables, original %s vars, restoring for %s vars"% (len(ignore_vars),len(allvars),len(restore_vars))
2130-
2131-
else:
2132-
restore_vars = allvars
2133-
2134-
saver = tf.train.Saver(restore_vars, max_to_keep=5)
2135-
2136-
load_from = None
2137-
2138-
if config.load_from is not None:
2139-
load_from = config.load_from
2140-
else:
2141-
if load_best:
2142-
load_from = config.save_dir_best
2143-
else:
2144-
load_from = config.save_dir
2145-
2146-
ckpt = tf.train.get_checkpoint_state(load_from)
2147-
if ckpt and ckpt.model_checkpoint_path:
2148-
loadpath = ckpt.model_checkpoint_path
2149-
2150-
saver.restore(sess, loadpath)
2151-
print "Model:"
2152-
print "\tloaded %s"%loadpath
2153-
print ""
2154-
else:
2155-
if os.path.exists(load_from):
2156-
if load_from.endswith(".ckpt"):
2157-
# load_from should be a single .ckpt file
2158-
saver.restore(sess,load_from)
2159-
elif load_from.endswith(".npz"):
2160-
# load from dict
2161-
weights = np.load(load_from)
2162-
params = {get_op_tensor_name(n)[1]:v for n,v in dict(weights).iteritems()}
2163-
param_names = set(params.iterkeys())
2164-
2165-
#variables = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES)
2166-
2167-
variables = restore_vars
2168-
2169-
variable_names = set([k.name for k in variables])
2170-
2171-
intersect = variable_names & param_names
2172-
2173-
restore_vars = [v for v in variables if v.name in intersect]
2174-
2175-
with sess.as_default():
2176-
for v in restore_vars:
2177-
vname = v.name
2178-
v.load(params[vname])
2179-
2180-
#print variables # all the model's params
2181-
2182-
not_used = [(one,weights[one].shape) for one in weights.keys() if get_op_tensor_name(one)[1] not in intersect]
2183-
if len(not_used) > 0:
2184-
print "warning, %s/%s in npz not restored:%s"%(len(weights.keys()) - len(intersect), len(weights.keys()), not_used)
2185-
2186-
#if config.show_restore:
2187-
# print "loaded %s vars:%s"%(len(intersect),intersect)
2188-
2189-
2190-
else:
2191-
raise Exception("Not recognized model type:%s"%load_from)
2192-
else:
2193-
raise Exception("Model not exists")
2194-
print "done."
2195-
2196-
21972059
# https://stackoverflow.com/questions/38160940/how-to-count-total-number-of-trainable-parameters-in-a-tensorflow-model
21982060
def cal_total_param():
21992061
total = 0
@@ -2225,9 +2087,12 @@ def log_gpu_util(interval, gpuid_range):
22252087
gpu_util_logs.extend(gpu_utils)
22262088
gpu_temp_logs.extend(gpu_temps)
22272089

2090+
22282091
if __name__ == "__main__":
22292092
config = get_args()
22302093

2094+
if config.mode == "pack":
2095+
config.is_pack_model = True
22312096
if config.is_pack_model:
22322097
pack(config)
22332098
else:

0 commit comments

Comments
 (0)