-
Notifications
You must be signed in to change notification settings - Fork 377
/
Copy pathread_tsv.py
executable file
·37 lines (27 loc) · 1.02 KB
/
read_tsv.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
#!/usr/bin/env python
import base64
import numpy as np
import csv
import sys
import zlib
import time
import mmap
csv.field_size_limit(sys.maxsize)
FIELDNAMES = ['image_id', 'image_w','image_h','num_boxes', 'boxes', 'features']
infile = '/data/coco/tsv/trainval/karpathy_val_resnet101_faster_rcnn_genome.tsv'
if __name__ == '__main__':
# Verify we can read a tsv
in_data = {}
with open(infile, "r+b") as tsv_in_file:
reader = csv.DictReader(tsv_in_file, delimiter='\t', fieldnames = FIELDNAMES)
for item in reader:
item['image_id'] = int(item['image_id'])
item['image_h'] = int(item['image_h'])
item['image_w'] = int(item['image_w'])
item['num_boxes'] = int(item['num_boxes'])
for field in ['boxes', 'features']:
item[field] = np.frombuffer(base64.decodestring(item[field]),
dtype=np.float32).reshape((item['num_boxes'],-1))
in_data[item['image_id']] = item
break
print in_data