forked from pytorch/tutorials
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathsave_load_across_devices.py
182 lines (147 loc) · 5.17 KB
/
save_load_across_devices.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
"""
Saving and loading models across devices in PyTorch
===================================================
There may be instances where you want to save and load your neural
networks across different devices.
Introduction
------------
Saving and loading models across devices is relatively straightforward
using PyTorch. In this recipe, we will experiment with saving and
loading models across CPUs and GPUs.
Setup
-----
In order for every code block to run properly in this recipe, you must
first change the runtime to “GPU” or higher. Once you do, we need to
install ``torch`` if it isn’t already available.
::
pip install torch
"""
######################################################################
# Steps
# -----
#
# 1. Import all necessary libraries for loading our data
# 2. Define and initialize the neural network
# 3. Save on a GPU, load on a CPU
# 4. Save on a GPU, load on a GPU
# 5. Save on a CPU, load on a GPU
# 6. Saving and loading ``DataParallel`` models
#
# 1. Import necessary libraries for loading our data
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
#
# For this recipe, we will use ``torch`` and its subsidiaries ``torch.nn``
# and ``torch.optim``.
#
import torch
import torch.nn as nn
import torch.optim as optim
######################################################################
# 2. Define and initialize the neural network
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
#
# For sake of example, we will create a neural network for training
# images. To learn more see the Defining a Neural Network recipe.
#
class Net(nn.Module):
def __init__(self):
super(Net, self).__init__()
self.conv1 = nn.Conv2d(3, 6, 5)
self.pool = nn.MaxPool2d(2, 2)
self.conv2 = nn.Conv2d(6, 16, 5)
self.fc1 = nn.Linear(16 * 5 * 5, 120)
self.fc2 = nn.Linear(120, 84)
self.fc3 = nn.Linear(84, 10)
def forward(self, x):
x = self.pool(F.relu(self.conv1(x)))
x = self.pool(F.relu(self.conv2(x)))
x = x.view(-1, 16 * 5 * 5)
x = F.relu(self.fc1(x))
x = F.relu(self.fc2(x))
x = self.fc3(x)
return x
net = Net()
print(net)
######################################################################
# 3. Save on GPU, Load on CPU
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
#
# When loading a model on a CPU that was trained with a GPU, pass
# ``torch.device('cpu')`` to the ``map_location`` argument in the
# ``torch.load()`` function.
#
# Specify a path to save to
PATH = "model.pt"
# Save
torch.save(net.state_dict(), PATH)
# Load
device = torch.device('cpu')
model = Net()
model.load_state_dict(torch.load(PATH, map_location=device))
######################################################################
# In this case, the storages underlying the tensors are dynamically
# remapped to the CPU device using the ``map_location`` argument.
#
# 4. Save on GPU, Load on GPU
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
#
# When loading a model on a GPU that was trained and saved on GPU, simply
# convert the initialized model to a CUDA optimized model using
# ``model.to(torch.device('cuda'))``.
#
# Be sure to use the ``.to(torch.device('cuda'))`` function on all model
# inputs to prepare the data for the model.
#
# Save
torch.save(net.state_dict(), PATH)
# Load
device = torch.device("cuda")
model = Net()
model.load_state_dict(torch.load(PATH))
model.to(device)
######################################################################
# Note that calling ``my_tensor.to(device)`` returns a new copy of
# ``my_tensor`` on GPU. It does NOT overwrite ``my_tensor``. Therefore,
# remember to manually overwrite tensors:
# ``my_tensor = my_tensor.to(torch.device('cuda'))``.
#
# 5. Save on CPU, Load on GPU
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
#
# When loading a model on a GPU that was trained and saved on CPU, set the
# ``map_location`` argument in the ``torch.load()`` function to
# ``cuda:device_id``. This loads the model to a given GPU device.
#
# Be sure to call ``model.to(torch.device('cuda'))`` to convert the
# model’s parameter tensors to CUDA tensors.
#
# Finally, also be sure to use the ``.to(torch.device('cuda'))`` function
# on all model inputs to prepare the data for the CUDA optimized model.
#
# Save
torch.save(net.state_dict(), PATH)
# Load
device = torch.device("cuda")
model = Net()
# Choose whatever GPU device number you want
model.load_state_dict(torch.load(PATH, map_location="cuda:0"))
# Make sure to call input = input.to(device) on any input tensors that you feed to the model
model.to(device)
######################################################################
# 6. Saving ``torch.nn.DataParallel`` Models
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
#
# ``torch.nn.DataParallel`` is a model wrapper that enables parallel GPU
# utilization.
#
# To save a ``DataParallel`` model generically, save the
# ``model.module.state_dict()``. This way, you have the flexibility to
# load the model any way you want to any device you want.
#
# Save
torch.save(net.module.state_dict(), PATH)
# Load to whatever device you want
######################################################################
# Congratulations! You have successfully saved and loaded models across
# devices in PyTorch.
#