Skip to content

Commit a73d9c3

Browse files
committed
Support for Python 3, PyTorch 1.3+, CPU, ship weights
1 parent 8c5862e commit a73d9c3

File tree

8 files changed

+39
-9
lines changed

8 files changed

+39
-9
lines changed

README.md

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,18 @@
11
# ImageCaptioning.pytorch
22

3+
This is a fork of [Rotian Luo's ImageCaptioning repo](https://github.com/ruotianluo/ImageCaptioning.pytorch), adapted for the Deep Learning with PyTorch book (Manning).
4+
5+
Notable changes:
6+
7+
* Python 3.6+
8+
* PyTorch 1.3+
9+
* CPU and GPU support
10+
* a set of weights is provided in the repo to facilitate getting up to speed
11+
12+
Following are the original notes.
13+
14+
# ImageCaptioning.pytorch
15+
316
This is an image captioning codebase in PyTorch. If you are familiar with neuraltalk2, here are the differences compared to neuraltalk2.
417
- Instead of using random split, we use [karpathy's train-val-test split](http://cs.stanford.edu/people/karpathy/deepimagesent/caption_datasets.zip).
518
- Instead of including the convnet in the model, we use preprocessed features. (finetuneable cnn version is in the branch **with_finetune**)
85.8 MB
Binary file not shown.
84.6 MB
Binary file not shown.

dataloaderraw.py

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,8 @@
2222
from misc.resnet_utils import myResnet
2323
import misc.resnet
2424

25+
device = 'cuda' if torch.cuda.is_available() else 'cpu'
26+
2527
class DataLoaderRaw():
2628

2729
def __init__(self, opt):
@@ -37,7 +39,7 @@ def __init__(self, opt):
3739
self.my_resnet = getattr(misc.resnet, self.cnn_model)()
3840
self.my_resnet.load_state_dict(torch.load('./data/imagenet_weights/'+self.cnn_model+'.pth'))
3941
self.my_resnet = myResnet(self.my_resnet)
40-
self.my_resnet.cuda()
42+
self.my_resnet.to(device=device)
4143
self.my_resnet.eval()
4244

4345

@@ -108,7 +110,7 @@ def get_batch(self, split, batch_size=None):
108110
img = np.concatenate((img, img, img), axis=2)
109111

110112
img = img.astype('float32')/255.0
111-
img = torch.from_numpy(img.transpose([2, 0, 1])).cuda()
113+
img = torch.from_numpy(img.transpose([2, 0, 1])).to(device=device)
112114
with torch.no_grad():
113115
img = Variable(preprocess(img))
114116
tmp_fc, tmp_att = self.my_resnet(img)

eval.py

Lines changed: 13 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@
1818
import argparse
1919
import misc.utils as utils
2020
import torch
21+
import shutil
2122

2223
# Input arguments and options
2324
parser = argparse.ArgumentParser()
@@ -73,9 +74,19 @@
7374

7475
opt = parser.parse_args()
7576

77+
cnn_model_weights = './data/imagenet_weights/' + opt.cnn_model + '.pth'
78+
if not os.path.isfile(cnn_model_weights):
79+
chunk1 = './data/imagenet_weights/' + opt.cnn_model + 'a.pth'
80+
chunk2 = './data/imagenet_weights/' + opt.cnn_model + 'b.pth'
81+
with open(cnn_model_weights,'wb') as destination:
82+
with open(chunk1,'rb') as source:
83+
shutil.copyfileobj(source, destination)
84+
with open(chunk2,'rb') as source:
85+
shutil.copyfileobj(source, destination)
86+
7687
# Load infos
7788
with open(opt.infos_path, 'rb') as f:
78-
infos = cPickle.load(f)
89+
infos = cPickle.load(f, encoding='latin1')
7990

8091
# override and collect parameters
8192
if len(opt.input_fc_dir) == 0:
@@ -100,8 +111,7 @@
100111

101112
# Setup the model
102113
model = models.setup(opt)
103-
model.load_state_dict(torch.load(opt.model))
104-
model.cuda()
114+
model.load_state_dict(torch.load(opt.model, map_location=torch.device(device)))
105115
model.eval()
106116
crit = utils.LanguageModelCriterion()
107117

eval_utils.py

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,8 @@
1616
import sys
1717
import misc.utils as utils
1818

19+
device = 'cuda' if torch.cuda.is_available() else 'cpu'
20+
1921
def language_eval(dataset, preds, model_id, split):
2022
import sys
2123
if 'coco' in dataset:
@@ -86,7 +88,7 @@ def eval_split(model, crit, loader, eval_kwargs={}):
8688
# forward the model to get loss
8789
tmp = [data['fc_feats'], data['att_feats'], data['labels'], data['masks']]
8890
with torch.no_grad():
89-
tmp = [Variable(torch.from_numpy(_)).cuda() for _ in tmp]
91+
tmp = [Variable(torch.from_numpy(_)).to(device=device) for _ in tmp]
9092
fc_feats, att_feats, labels, masks = tmp
9193

9294
loss = crit(model(fc_feats, att_feats, labels), labels[:,1:], masks[:,1:]).item()
@@ -98,7 +100,7 @@ def eval_split(model, crit, loader, eval_kwargs={}):
98100
tmp = [data['fc_feats'][np.arange(loader.batch_size) * loader.seq_per_img],
99101
data['att_feats'][np.arange(loader.batch_size) * loader.seq_per_img]]
100102
with torch.no_grad():
101-
tmp = [Variable(torch.from_numpy(_)).cuda() for _ in tmp]
103+
tmp = [Variable(torch.from_numpy(_)).to(device=device) for _ in tmp]
102104
fc_feats, att_feats = tmp
103105
# forward the model to also get generated samples for each image
104106
seq, _ = model.sample(fc_feats, att_feats, eval_kwargs)

models/CaptionModel.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@
1515
from torch.autograd import *
1616
import misc.utils as utils
1717

18+
device = 'cuda' if torch.cuda.is_available() else 'cpu'
1819

1920
class CaptionModel(nn.Module):
2021
def __init__(self):
@@ -121,7 +122,7 @@ def beam_step(logprobsf, beam_size, t, beam_seq, beam_seq_logprobs, beam_logprob
121122

122123
# encode as vectors
123124
it = beam_seq[t]
124-
logprobs, state = self.get_logprobs_state(Variable(it.cuda()), *(args + (state,)))
125+
logprobs, state = self.get_logprobs_state(Variable(it.to(device=device)), *(args + (state,)))
125126

126127
done_beams = sorted(done_beams, key=lambda x: -x['p'])[:beam_size]
127128
return done_beams

models/FCModel.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,8 @@
1010

1111
from .CaptionModel import CaptionModel
1212

13+
device = 'cuda' if torch.cuda.is_available() else 'cpu'
14+
1315
class LSTMCore(nn.Module):
1416
def __init__(self, opt):
1517
super(LSTMCore, self).__init__()
@@ -176,7 +178,7 @@ def sample(self, fc_feats, att_feats, opt={}):
176178
else:
177179
# scale logprobs by temperature
178180
prob_prev = torch.exp(torch.div(logprobs.data, temperature)).cpu()
179-
it = torch.multinomial(prob_prev, 1).cuda()
181+
it = torch.multinomial(prob_prev, 1).to(device=device)
180182
sampleLogprobs = logprobs.gather(1, Variable(it, requires_grad=False)) # gather the logprobs at sampled positions
181183
it = it.view(-1).long() # and flatten indices for downstream processing
182184

0 commit comments

Comments
 (0)