150
150
######################################################################
151
151
# Cuda
152
152
# ~~~~
153
- #
153
+ #
154
154
# If you have a GPU on your computer, it is preferable to run the
155
155
# algorithm on it, especially if you want to try larger networks (like
156
156
# VGG). For this, we have ``torch.cuda.is_available()`` that returns
160
160
# CPU (e.g. to use numpy), we use the ``.cpu()`` method. Finally,
161
161
# ``.type(dtype)`` will be use to convert a ``torch.FloatTensor`` into
162
162
# ``torch.cuda.FloatTensor`` to feed GPU processes.
163
- #
163
+ #
164
164
165
165
use_cuda = torch .cuda .is_available ()
166
166
dtype = torch .cuda .FloatTensor if use_cuda else torch .FloatTensor
169
169
######################################################################
170
170
# Load images
171
171
# ~~~~~~~~~~~
172
- #
172
+ #
173
173
# In order to simplify the implementation, let's start by importing a
174
174
# style and a content image of the same dimentions. We then scale them to
175
175
# the desired output image size (128 or 512 in the example, depending on gpu
176
- # availablity) and transform them into torch tensors, ready to feed
176
+ # availablity) and transform them into torch tensors, ready to feed
177
177
# a neural network:
178
- #
178
+ #
179
179
# Here are links to download the images required to run the notebook:
180
180
# `picasso.jpg </_static/img/neural-style/picasso.jpg>`__ and
181
- # `dancing.jpg </_static/img/neural-style/dancing.jpg>`__. Download these images and add to a
182
- # directory with name ``images``
183
- #
181
+ # `dancing.jpg </_static/img/neural-style/dancing.jpg>`__. Download these two
182
+ # images and add them to a directory with name ``images``
183
+ #
184
184
185
185
# desired size of the output image
186
- imsize = 512 if use_cuda else 128 # use small size if no gpu
186
+ imsize = 512 if use_cuda else 128 # use small size if no gpu
187
187
188
188
loader = transforms .Compose ([
189
189
transforms .Scale (imsize ), # scale imported image
@@ -201,8 +201,8 @@ def image_loader(image_name):
201
201
style_img = image_loader ("images/picasso.jpg" ).type (dtype )
202
202
content_img = image_loader ("images/dancing.jpg" ).type (dtype )
203
203
204
- assert style_img .size () == content_img .size (
205
- ), "we need to import style and content images of the same size"
204
+ assert style_img .size () == content_img .size (), \
205
+ "we need to import style and content images of the same size"
206
206
207
207
208
208
######################################################################
@@ -213,13 +213,13 @@ def image_loader(image_name):
213
213
# feature maps will have no sense. This is not the case with pre-trained
214
214
# networks from the Caffe library: they are trained with 0-255 tensor
215
215
# images.
216
- #
216
+ #
217
217
# Display images
218
218
# ~~~~~~~~~~~~~~
219
- #
219
+ #
220
220
# We will use ``plt.imshow`` to display images. So we need to first
221
221
# reconvert them into PIL images:
222
- #
222
+ #
223
223
224
224
unloader = transforms .ToPILImage () # reconvert into PIL image
225
225
@@ -243,7 +243,7 @@ def imshow(tensor):
243
243
######################################################################
244
244
# Content loss
245
245
# ~~~~~~~~~~~~
246
- #
246
+ #
247
247
# The content loss is a function that takes as input the feature maps
248
248
# :math:`F_{XL}` at a layer :math:`L` in a network fed by :math:`X` and
249
249
# return the weigthed content distance :math:`w_{CL}.D_C^L(X,C)` between
@@ -253,7 +253,7 @@ def imshow(tensor):
253
253
# these parameters as input. The distance :math:`\|F_{XL} - F_{YL}\|^2` is
254
254
# the Mean Square Error between the two sets of feature maps, that can be
255
255
# computed using a criterion ``nn.MSELoss`` stated as a third parameter.
256
- #
256
+ #
257
257
# We will add our content losses at each desired layer as additive modules
258
258
# of the neural network. That way, each time we will feed the network with
259
259
# an input image :math:`X`, all the content losses will be computed at the
@@ -262,15 +262,16 @@ def imshow(tensor):
262
262
# module returning the input: the module becomes a ''transparent layer''
263
263
# of the neural network. The computed loss is saved as a parameter of the
264
264
# module.
265
- #
265
+ #
266
266
# Finally, we define a fake ``backward`` method, that just call the
267
267
# backward method of ``nn.MSELoss`` in order to reconstruct the gradient.
268
268
# This method returns the computed loss: this will be usefull when running
269
269
# the gradien descent in order to display the evolution of style and
270
270
# content losses.
271
- #
271
+ #
272
272
273
273
class ContentLoss (nn .Module ):
274
+
274
275
def __init__ (self , target , weight ):
275
276
super (ContentLoss , self ).__init__ ()
276
277
# we 'detach' the target content from the tree used
@@ -298,10 +299,10 @@ def backward(self, retain_variables=True):
298
299
# loss as a PyTorch Loss, you have to create a PyTorch autograd Function
299
300
# and to recompute/implement the gradient by the hand in the ``backward``
300
301
# method.
301
- #
302
+ #
302
303
# Style loss
303
304
# ~~~~~~~~~~
304
- #
305
+ #
305
306
# For the style loss, we need first to define a module that compute the
306
307
# gram produce :math:`G_{XL}` given the feature maps :math:`F_{XL}` of the
307
308
# neural network fed by :math:`X`, at layer :math:`L`. Let
@@ -312,9 +313,10 @@ def backward(self, retain_variables=True):
312
313
# :math:`\hat{F}_{XL}` is :math:`F_{XL}^k`. We let you check that
313
314
# :math:`\hat{F}_{XL} \cdot \hat{F}_{XL}^T = G_{XL}`. Given that, it
314
315
# becomes easy to implement our module:
315
- #
316
+ #
316
317
317
318
class GramMatrix (nn .Module ):
319
+
318
320
def forward (self , input ):
319
321
a , b , c , d = input .size () # a=batch size(=1)
320
322
# b=number of feature maps
@@ -335,13 +337,14 @@ def forward(self, input):
335
337
# the loss computed at the first layers (before pooling layers) will have
336
338
# much more importance during the gradient descent. We dont want that,
337
339
# since the most interesting style features are in the deepest layers!
338
- #
340
+ #
339
341
# Then, the style loss module is implemented exactly the same way than the
340
342
# content loss module, but we have to add the ``gramMatrix`` as a
341
343
# parameter:
342
- #
344
+ #
343
345
344
346
class StyleLoss (nn .Module ):
347
+
345
348
def __init__ (self , target , weight ):
346
349
super (StyleLoss , self ).__init__ ()
347
350
self .target = target .detach () * weight
@@ -364,15 +367,15 @@ def backward(self, retain_variables=True):
364
367
######################################################################
365
368
# Load the neural network
366
369
# ~~~~~~~~~~~~~~~~~~~~~~~
367
- #
370
+ #
368
371
# Now, we have to import a pre-trained neural network. As in the paper, we
369
372
# are going to use a pretrained VGG network with 19 layers (VGG19).
370
- #
373
+ #
371
374
# PyTorch's implementation of VGG is a module divided in two child
372
375
# ``Sequential`` modules: ``features`` (containing convolution and pooling
373
376
# layers) and ``classifier`` (containing fully connected layers). We are
374
377
# just interested by ``features``:
375
- #
378
+ #
376
379
377
380
cnn = models .vgg19 (pretrained = True ).features
378
381
@@ -390,19 +393,21 @@ def backward(self, retain_variables=True):
390
393
# depths. For that, we construct a new ``Sequential`` module, in wich we
391
394
# are going to add modules from ``vgg19`` and our loss modules in the
392
395
# right order:
393
- #
396
+ #
394
397
395
398
# desired depth layers to compute style/content losses :
396
399
content_layers_default = ['conv_4' ]
397
400
style_layers_default = ['conv_1' , 'conv_2' , 'conv_3' , 'conv_4' , 'conv_5' ]
398
401
402
+
399
403
def get_style_model_and_losses (cnn , style_img , content_img ,
400
- style_weight = 1000 , content_weight = 1 ,
401
- content_layers = content_layers_default ,
404
+ style_weight = 1000 , content_weight = 1 ,
405
+ content_layers = content_layers_default ,
402
406
style_layers = style_layers_default ):
403
407
cnn = copy .deepcopy (cnn )
404
-
405
- # just in order to have an iterable access to or list of content/syle losses
408
+
409
+ # just in order to have an iterable access to or list of content/syle
410
+ # losses
406
411
content_losses = []
407
412
style_losses = []
408
413
@@ -412,8 +417,7 @@ def get_style_model_and_losses(cnn, style_img, content_img,
412
417
# move these modules to the GPU if possible:
413
418
if use_cuda :
414
419
model = model .cuda ()
415
- gram = gram .cuda ()
416
-
420
+ gram = gram .cuda ()
417
421
418
422
i = 1
419
423
for layer in list (cnn ):
@@ -460,12 +464,12 @@ def get_style_model_and_losses(cnn, style_img, content_img,
460
464
if isinstance (layer , nn .MaxPool2d ):
461
465
name = "pool_" + str (i )
462
466
model .add_module (name , layer ) # ***
463
-
467
+
464
468
return model , style_losses , content_losses
465
469
466
470
467
471
######################################################################
468
- # .. Note::
472
+ # .. Note::
469
473
# In the paper they recommend to change max pooling layers into
470
474
# average pooling. With AlexNet, that is a small network compared to VGG19
471
475
# used in the paper, we are not going to see any difference of quality in
@@ -474,20 +478,19 @@ def get_style_model_and_losses(cnn, style_img, content_img,
474
478
#
475
479
# ::
476
480
#
477
- # # avgpool = nn.AvgPool2d(kernel_size=layer.kernel_size,
478
- stride = layer .stride , padding = layer .padding )
481
+ # # avgpool = nn.AvgPool2d(kernel_size=layer.kernel_size,
482
+ # # stride=layer.stride, padding = layer.padding)
479
483
# # model.add_module(name,avgpool)
480
484
481
485
482
-
483
486
######################################################################
484
487
# Input image
485
488
# ~~~~~~~~~~~
486
- #
489
+ #
487
490
# Again, in order to simplify the code, we take an image of the same
488
491
# dimensions than content and style images. This image can be a white
489
492
# noise, or it can also be a copy of the content-image.
490
- #
493
+ #
491
494
492
495
input_img = content_img .clone ()
493
496
# if you want to use a white noise instead uncomment the below line:
@@ -501,7 +504,7 @@ def get_style_model_and_losses(cnn, style_img, content_img,
501
504
######################################################################
502
505
# Gradient descent
503
506
# ~~~~~~~~~~~~~~~~
504
- #
507
+ #
505
508
# As Leon Gatys, the author of the algorithm, suggested
506
509
# `here <https://discuss.pytorch.org/t/pytorch-tutorial-for-neural-transfert-of-artistic-style/336/20?u=alexis-jacq>`__,
507
510
# we will use L-BFGS algorithm to run our gradient descent. Unlike
@@ -515,7 +518,7 @@ def get_style_model_and_losses(cnn, style_img, content_img,
515
518
# to construct a ``Parameter`` object from the input image. Then, we just
516
519
# give a list containing this ``Parameter`` to the optimizer's
517
520
# constructor:
518
- #
521
+ #
519
522
520
523
def get_input_param_optimizer (input_img ):
521
524
# this line to show that input is a parameter that requires a gradient
@@ -531,30 +534,30 @@ def get_input_param_optimizer(input_img):
531
534
# their gradients and perform the step of gradient descent. The optimizer
532
535
# requires as argument a "closure": a function that reevaluates the model
533
536
# and returns the loss.
534
- #
537
+ #
535
538
# However, there's a small catch. The optimized image may take its values
536
539
# between :math:`-\infty` and :math:`+\infty` instead of staying between 0
537
540
# and 1. In other words, the image might be well optimized and have absurd
538
541
# values. In fact, we must perform an optimization under constraints in
539
542
# order to keep having right vaues into our input image. There is a simple
540
543
# solution: at each step, to correct the image to maintain its values into
541
544
# the 0-1 interval.
542
- #
545
+ #
543
546
544
- def run_style_transfer (cnn , content_img , style_img , input_img , num_steps = 300 ,
545
- style_weight = 1000 , content_weight = 1 ):
547
+ def run_style_transfer (cnn , content_img , style_img , input_img , num_steps = 300 ,
548
+ style_weight = 1000 , content_weight = 1 ):
546
549
"""Run the style transfer."""
547
- model , style_losses , content_losses = get_style_model_and_losses (cnn , style_img , content_img ,
548
- style_weight , content_weight )
550
+ model , style_losses , content_losses = get_style_model_and_losses (cnn ,
551
+ style_img , content_img , style_weight , content_weight )
549
552
input_param , optimizer = get_input_param_optimizer (input_img )
550
-
553
+
551
554
run = [0 ]
552
555
while run [0 ] <= num_steps :
553
556
554
557
def closure ():
555
558
# correct the values of updated input image
556
559
input_param .data .clamp_ (0 , 1 )
557
-
560
+
558
561
optimizer .zero_grad ()
559
562
model .forward (input_param )
560
563
style_score = 0
@@ -565,24 +568,27 @@ def closure():
565
568
for cl in content_losses :
566
569
content_score += cl .backward ()
567
570
568
- run [0 ]+= 1
571
+ run [0 ] += 1
569
572
if run [0 ] % 50 == 0 :
570
573
print ("run {}:" .format (run ))
571
574
print ('Style Loss : {:4f} Content Loss: {:4f}' .format (
572
- style_score .data [0 ], content_score .data [0 ]))
575
+ style_score .data [0 ], content_score .data [0 ]))
573
576
print ()
574
577
575
- return style_score + style_score
578
+ return style_score + style_score
576
579
577
580
optimizer .step (closure )
578
-
581
+
579
582
# a last correction...
580
583
input_param .data .clamp_ (0 , 1 )
581
-
584
+
582
585
return input_param .data
583
586
587
+ ######################################################################
588
+ # Finally, run the algorithm
589
+
584
590
output = run_style_transfer (cnn , content_img , style_img , input_img )
585
591
586
592
plt .subplot (224 )
587
593
imshow (output )
588
- plt .show ()
594
+ plt .show ()
0 commit comments