diff --git a/.gitignore b/.gitignore index 515ff88..854eb76 100644 --- a/.gitignore +++ b/.gitignore @@ -1,6 +1,5 @@ .idea *.key -*.pdf .nsml* *.pt X* @@ -9,5 +8,7 @@ ppts .ipynb_checkpoints client_secret.json __pycache__/ +.py* tmp template.pdf +*.ipynb diff --git a/01_basics.py b/01_basics.py index 7fa70d6..a96bbc2 100644 --- a/01_basics.py +++ b/01_basics.py @@ -15,22 +15,29 @@ def loss(x, y): y_pred = forward(x) return (y_pred - y) * (y_pred - y) - +# List of weights/Mean square Error (Mse) for each input w_list = [] mse_list = [] for w in np.arange(0.0, 4.1, 0.1): + # Print the weights and initialize the lost print("w=", w) l_sum = 0 + for x_val, y_val in zip(x_data, y_data): + # For each input and output, calculate y_hat + # Compute the total loss and add to the total error y_pred_val = forward(x_val) l = loss(x_val, y_val) l_sum += l print("\t", x_val, y_val, y_pred_val, l) - print("MSE=", l_sum / 3) + # Now compute the Mean squared error (mse) of each + # Aggregate the weight/mse from this run + print("MSE=", l_sum / len(x_data)) w_list.append(w) - mse_list.append(l_sum / 3) + mse_list.append(l_sum / len(x_data)) +# Plot it all plt.plot(w_list, mse_list) plt.ylabel('Loss') plt.xlabel('w') diff --git a/02_manual_gradient.py b/02_manual_gradient.py index 81d1924..a84e789 100644 --- a/02_manual_gradient.py +++ b/02_manual_gradient.py @@ -1,11 +1,11 @@ +# Training Data x_data = [1.0, 2.0, 3.0] y_data = [2.0, 4.0, 6.0] w = 1.0 # a random guess: random value -# our model forward pass - +# our model forward pass def forward(x): return x * w @@ -20,18 +20,21 @@ def loss(x, y): def gradient(x, y): # d_loss/d_w return 2 * x * (x * w - y) + # Before training -print("predict (before training)", 4, forward(4)) +print("Prediction (before training)", 4, forward(4)) # Training loop for epoch in range(10): for x_val, y_val in zip(x_data, y_data): + # Compute derivative w.r.t to the learned weights + # Update the weights + # Compute the loss and print progress grad = gradient(x_val, y_val) w = w - 0.01 * grad print("\tgrad: ", x_val, y_val, round(grad, 2)) l = loss(x_val, y_val) - print("progress:", epoch, "w=", round(w, 2), "loss=", round(l, 2)) # After training -print("predict (after training)", "4 hours", forward(4)) +print("Predicted score (after training)", "4 hours of studying: ", forward(4)) diff --git a/03_auto_gradient.py b/03_auto_gradient.py index fc44021..9689703 100644 --- a/03_auto_gradient.py +++ b/03_auto_gradient.py @@ -1,39 +1,34 @@ import torch -from torch.autograd import Variable +import pdb x_data = [1.0, 2.0, 3.0] y_data = [2.0, 4.0, 6.0] - -w = Variable(torch.Tensor([1.0]), requires_grad=True) # Any random value +w = torch.tensor([1.0], requires_grad=True) # our model forward pass - - def forward(x): return x * w # Loss function - - -def loss(x, y): - y_pred = forward(x) - return (y_pred - y) * (y_pred - y) +def loss(y_pred, y_val): + return (y_pred - y_val) ** 2 # Before training -print("predict (before training)", 4, forward(4).data[0]) +print("Prediction (before training)", 4, forward(4).item()) # Training loop for epoch in range(10): for x_val, y_val in zip(x_data, y_data): - l = loss(x_val, y_val) - l.backward() - print("\tgrad: ", x_val, y_val, w.grad.data[0]) - w.data = w.data - 0.01 * w.grad.data + y_pred = forward(x_val) # 1) Forward pass + l = loss(y_pred, y_val) # 2) Compute loss + l.backward() # 3) Back propagation to update weights + print("\tgrad: ", x_val, y_val, w.grad.item()) + w.data = w.data - 0.01 * w.grad.item() # Manually zero the gradients after updating weights w.grad.data.zero_() - print("progress:", epoch, l.data[0]) + print(f"Epoch: {epoch} | Loss: {l.item()}") # After training -print("predict (after training)", 4, forward(4).data[0]) +print("Prediction (after training)", 4, forward(4).item()) diff --git a/05_linear_regression.py b/05_linear_regression.py index 0afd430..84238a7 100644 --- a/05_linear_regression.py +++ b/05_linear_regression.py @@ -1,13 +1,12 @@ - +from torch import nn import torch -from torch.autograd import Variable - -x_data = Variable(torch.Tensor([[1.0], [2.0], [3.0]])) -y_data = Variable(torch.Tensor([[2.0], [4.0], [6.0]])) +from torch import tensor +x_data = tensor([[1.0], [2.0], [3.0]]) +y_data = tensor([[2.0], [4.0], [6.0]]) -class Model(torch.nn.Module): +class Model(nn.Module): def __init__(self): """ In the constructor we instantiate two nn.Linear module @@ -24,24 +23,24 @@ def forward(self, x): y_pred = self.linear(x) return y_pred + # our model model = Model() - # Construct our loss function and an Optimizer. The call to model.parameters() # in the SGD constructor will contain the learnable parameters of the two # nn.Linear modules which are members of the model. -criterion = torch.nn.MSELoss(size_average=False) +criterion = torch.nn.MSELoss(reduction='sum') optimizer = torch.optim.SGD(model.parameters(), lr=0.01) # Training loop for epoch in range(500): - # Forward pass: Compute predicted y by passing x to the model + # 1) Forward pass: Compute predicted y by passing x to the model y_pred = model(x_data) - # Compute and print loss + # 2) Compute and print loss loss = criterion(y_pred, y_data) - print(epoch, loss.data[0]) + print(f'Epoch: {epoch} | Loss: {loss.item()} ') # Zero gradients, perform a backward pass, and update the weights. optimizer.zero_grad() @@ -50,6 +49,6 @@ def forward(self, x): # After training -hour_var = Variable(torch.Tensor([[4.0]])) +hour_var = tensor([[4.0]]) y_pred = model(hour_var) -print("predict (after training)", 4, model(hour_var).data[0][0]) +print("Prediction (after training)", 4, model(hour_var).data[0][0].item()) diff --git a/06_logistic_regression.py b/06_logistic_regression.py index 4d16c83..db903c6 100644 --- a/06_logistic_regression.py +++ b/06_logistic_regression.py @@ -1,47 +1,48 @@ - -import torch -from torch.autograd import Variable +from torch import tensor +from torch import nn +from torch import sigmoid import torch.nn.functional as F +import torch.optim as optim -x_data = Variable(torch.Tensor([[1.0], [2.0], [3.0], [4.0]])) -y_data = Variable(torch.Tensor([[0.], [0.], [1.], [1.]])) - +# Training data and ground truth +x_data = tensor([[1.0], [2.0], [3.0], [4.0]]) +y_data = tensor([[0.], [0.], [1.], [1.]]) -class Model(torch.nn.Module): +class Model(nn.Module): def __init__(self): """ In the constructor we instantiate nn.Linear module """ super(Model, self).__init__() - self.linear = torch.nn.Linear(1, 1) # One in and one out + self.linear = nn.Linear(1, 1) # One in and one out def forward(self, x): """ In the forward function we accept a Variable of input data and we must return a Variable of output data. """ - y_pred = F.sigmoid(self.linear(x)) + y_pred = sigmoid(self.linear(x)) return y_pred + # our model model = Model() - # Construct our loss function and an Optimizer. The call to model.parameters() # in the SGD constructor will contain the learnable parameters of the two # nn.Linear modules which are members of the model. -criterion = torch.nn.BCELoss(size_average=True) -optimizer = torch.optim.SGD(model.parameters(), lr=0.01) +criterion = nn.BCELoss(reduction='mean') +optimizer = optim.SGD(model.parameters(), lr=0.01) # Training loop for epoch in range(1000): - # Forward pass: Compute predicted y by passing x to the model + # Forward pass: Compute predicted y by passing x to the model y_pred = model(x_data) # Compute and print loss loss = criterion(y_pred, y_data) - print(epoch, loss.data[0]) + print(f'Epoch {epoch + 1}/1000 | Loss: {loss.item():.4f}') # Zero gradients, perform a backward pass, and update the weights. optimizer.zero_grad() @@ -49,7 +50,8 @@ def forward(self, x): optimizer.step() # After training -hour_var = Variable(torch.Tensor([[1.0]])) -print("predict 1 hour ", 1.0, model(hour_var).data[0][0] > 0.5) -hour_var = Variable(torch.Tensor([[7.0]])) -print("predict 7 hours", 7.0, model(hour_var).data[0][0] > 0.5) +print(f'\nLet\'s predict the hours need to score above 50%\n{"=" * 50}') +hour_var = model(tensor([[1.0]])) +print(f'Prediction after 1 hour of training: {hour_var.item():.4f} | Above 50%: {hour_var.item() > 0.5}') +hour_var = model(tensor([[7.0]])) +print(f'Prediction after 7 hours of training: {hour_var.item():.4f} | Above 50%: { hour_var.item() > 0.5}') diff --git a/07_diabets_logistic.py b/07_diabets_logistic.py index 14535cf..0a510ac 100644 --- a/07_diabets_logistic.py +++ b/07_diabets_logistic.py @@ -1,28 +1,23 @@ - -import torch -from torch.autograd import Variable +from torch import nn, optim, from_numpy import numpy as np xy = np.loadtxt('./data/diabetes.csv.gz', delimiter=',', dtype=np.float32) -x_data = Variable(torch.from_numpy(xy[:, 0:-1])) -y_data = Variable(torch.from_numpy(xy[:, [-1]])) - -print(x_data.data.shape) -print(y_data.data.shape) +x_data = from_numpy(xy[:, 0:-1]) +y_data = from_numpy(xy[:, [-1]]) +print(f'X\'s shape: {x_data.shape} | Y\'s shape: {y_data.shape}') -class Model(torch.nn.Module): - +class Model(nn.Module): def __init__(self): """ In the constructor we instantiate two nn.Linear module """ super(Model, self).__init__() - self.l1 = torch.nn.Linear(8, 6) - self.l2 = torch.nn.Linear(6, 4) - self.l3 = torch.nn.Linear(4, 1) + self.l1 = nn.Linear(8, 6) + self.l2 = nn.Linear(6, 4) + self.l3 = nn.Linear(4, 1) - self.sigmoid = torch.nn.Sigmoid() + self.sigmoid = nn.Sigmoid() def forward(self, x): """ @@ -35,6 +30,7 @@ def forward(self, x): y_pred = self.sigmoid(self.l3(out2)) return y_pred + # our model model = Model() @@ -42,17 +38,17 @@ def forward(self, x): # Construct our loss function and an Optimizer. The call to model.parameters() # in the SGD constructor will contain the learnable parameters of the two # nn.Linear modules which are members of the model. -criterion = torch.nn.BCELoss(size_average=True) -optimizer = torch.optim.SGD(model.parameters(), lr=0.1) +criterion = nn.BCELoss(reduction='mean') +optimizer = optim.SGD(model.parameters(), lr=0.1) # Training loop for epoch in range(100): - # Forward pass: Compute predicted y by passing x to the model + # Forward pass: Compute predicted y by passing x to the model y_pred = model(x_data) # Compute and print loss loss = criterion(y_pred, y_data) - print(epoch, loss.data[0]) + print(f'Epoch: {epoch + 1}/100 | Loss: {loss.item():.4f}') # Zero gradients, perform a backward pass, and update the weights. optimizer.zero_grad() diff --git a/08_1_dataset_loader.py b/08_1_dataset_loader.py index aba0797..2921894 100644 --- a/08_1_dataset_loader.py +++ b/08_1_dataset_loader.py @@ -1,11 +1,9 @@ # References # https://github.com/yunjey/pytorch-tutorial/blob/master/tutorials/01-basics/pytorch_basics/main.py # http://pytorch.org/tutorials/beginner/data_loading_tutorial.html#dataset-class -import torch -import numpy as np -from torch.autograd import Variable from torch.utils.data import Dataset, DataLoader - +from torch import from_numpy, tensor +import numpy as np class DiabetesDataset(Dataset): """ Diabetes dataset.""" @@ -15,8 +13,8 @@ def __init__(self): xy = np.loadtxt('./data/diabetes.csv.gz', delimiter=',', dtype=np.float32) self.len = xy.shape[0] - self.x_data = torch.from_numpy(xy[:, 0:-1]) - self.y_data = torch.from_numpy(xy[:, [-1]]) + self.x_data = from_numpy(xy[:, 0:-1]) + self.y_data = from_numpy(xy[:, [-1]]) def __getitem__(self, index): return self.x_data[index], self.y_data[index] @@ -37,7 +35,7 @@ def __len__(self): inputs, labels = data # wrap them in Variable - inputs, labels = Variable(inputs), Variable(labels) + inputs, labels = tensor(inputs), tensor(labels) # Run your training process - print(epoch, i, "inputs", inputs.data, "labels", labels.data) + print(f'Epoch: {i} | Inputs {inputs.data} | Labels {labels.data}') diff --git a/08_2_dataset_loade_logistic.py b/08_2_dataset_loade_logistic.py index 43ba9f4..ec43dbf 100644 --- a/08_2_dataset_loade_logistic.py +++ b/08_2_dataset_loade_logistic.py @@ -1,22 +1,20 @@ # References # https://github.com/yunjey/pytorch-tutorial/blob/master/tutorials/01-basics/pytorch_basics/main.py # http://pytorch.org/tutorials/beginner/data_loading_tutorial.html#dataset-class -import torch -import numpy as np -from torch.autograd import Variable from torch.utils.data import Dataset, DataLoader +from torch import nn, from_numpy, optim +import numpy as np class DiabetesDataset(Dataset): """ Diabetes dataset.""" - # Initialize your data, download, etc. def __init__(self): xy = np.loadtxt('./data/diabetes.csv.gz', delimiter=',', dtype=np.float32) self.len = xy.shape[0] - self.x_data = torch.from_numpy(xy[:, 0:-1]) - self.y_data = torch.from_numpy(xy[:, [-1]]) + self.x_data = from_numpy(xy[:, 0:-1]) + self.y_data = from_numpy(xy[:, [-1]]) def __getitem__(self, index): return self.x_data[index], self.y_data[index] @@ -32,18 +30,18 @@ def __len__(self): num_workers=2) -class Model(torch.nn.Module): +class Model(nn.Module): def __init__(self): """ In the constructor we instantiate two nn.Linear module """ super(Model, self).__init__() - self.l1 = torch.nn.Linear(8, 6) - self.l2 = torch.nn.Linear(6, 4) - self.l3 = torch.nn.Linear(4, 1) + self.l1 = nn.Linear(8, 6) + self.l2 = nn.Linear(6, 4) + self.l3 = nn.Linear(4, 1) - self.sigmoid = torch.nn.Sigmoid() + self.sigmoid = nn.Sigmoid() def forward(self, x): """ @@ -56,15 +54,15 @@ def forward(self, x): y_pred = self.sigmoid(self.l3(out2)) return y_pred + # our model model = Model() - # Construct our loss function and an Optimizer. The call to model.parameters() # in the SGD constructor will contain the learnable parameters of the two # nn.Linear modules which are members of the model. -criterion = torch.nn.BCELoss(size_average=True) -optimizer = torch.optim.SGD(model.parameters(), lr=0.1) +criterion = nn.BCELoss(reduction='sum') +optimizer = optim.SGD(model.parameters(), lr=0.1) # Training loop for epoch in range(2): @@ -72,15 +70,12 @@ def forward(self, x): # get the inputs inputs, labels = data - # wrap them in Variable - inputs, labels = Variable(inputs), Variable(labels) - # Forward pass: Compute predicted y by passing x to the model y_pred = model(inputs) # Compute and print loss loss = criterion(y_pred, labels) - print(epoch, i, loss.data[0]) + print(f'Epoch {epoch + 1} | Batch: {i+1} | Loss: {loss.item():.4f}') # Zero gradients, perform a backward pass, and update the weights. optimizer.zero_grad() diff --git a/09_01_softmax_loss.py b/09_01_softmax_loss.py index aed1267..ffea7a9 100644 --- a/09_01_softmax_loss.py +++ b/09_01_softmax_loss.py @@ -1,23 +1,16 @@ -import torch -import torch.nn as nn -import torch.nn.functional as F -import torch.optim as optim -from torchvision import datasets, transforms -from torch.autograd import Variable - +from torch import nn, tensor, max +import numpy as np # Cross entropy example -import numpy as np # One hot # 0: 1 0 0 # 1: 0 1 0 # 2: 0 0 1 Y = np.array([1, 0, 0]) - Y_pred1 = np.array([0.7, 0.2, 0.1]) Y_pred2 = np.array([0.1, 0.3, 0.6]) -print("loss1 = ", np.sum(-Y * np.log(Y_pred1))) -print("loss2 = ", np.sum(-Y * np.log(Y_pred2))) +print(f'Loss1: {np.sum(-Y * np.log(Y_pred1)):.4f}') +print(f'Loss2: {np.sum(-Y * np.log(Y_pred2)):.4f}') # Softmax + CrossEntropy (logSoftmax + NLLLoss) loss = nn.CrossEntropyLoss() @@ -25,38 +18,35 @@ # target is of size nBatch # each element in target has to have 0 <= value < nClasses (0-2) # Input is class, not one-hot -Y = Variable(torch.LongTensor([0]), requires_grad=False) +Y = tensor([0], requires_grad=False) # input is of size nBatch x nClasses = 1 x 4 # Y_pred are logits (not softmax) -Y_pred1 = Variable(torch.Tensor([[2.0, 1.0, 0.1]])) -Y_pred2 = Variable(torch.Tensor([[0.5, 2.0, 0.3]])) +Y_pred1 = tensor([[2.0, 1.0, 0.1]]) +Y_pred2 = tensor([[0.5, 2.0, 0.3]]) l1 = loss(Y_pred1, Y) l2 = loss(Y_pred2, Y) -print("PyTorch Loss1 = ", l1.data, "\nPyTorch Loss2=", l2.data) - -print("Y_pred1=", torch.max(Y_pred1.data, 1)[1]) -print("Y_pred2=", torch.max(Y_pred2.data, 1)[1]) +print(f'PyTorch Loss1: {l1.item():.4f} \nPyTorch Loss2: {l2.item():.4f}') +print(f'Y_pred1: {max(Y_pred1.data, 1)[1].item()}') +print(f'Y_pred2: {max(Y_pred2.data, 1)[1].item()}') # target is of size nBatch # each element in target has to have 0 <= value < nClasses (0-2) # Input is class, not one-hot -Y = Variable(torch.LongTensor([2, 0, 1]), requires_grad=False) +Y = tensor([2, 0, 1], requires_grad=False) # input is of size nBatch x nClasses = 2 x 4 # Y_pred are logits (not softmax) -Y_pred1 = Variable(torch.Tensor([[0.1, 0.2, 0.9], - [1.1, 0.1, 0.2], - [0.2, 2.1, 0.1]])) - +Y_pred1 = tensor([[0.1, 0.2, 0.9], + [1.1, 0.1, 0.2], + [0.2, 2.1, 0.1]]) -Y_pred2 = Variable(torch.Tensor([[0.8, 0.2, 0.3], - [0.2, 0.3, 0.5], - [0.2, 0.2, 0.5]])) +Y_pred2 = tensor([[0.8, 0.2, 0.3], + [0.2, 0.3, 0.5], + [0.2, 0.2, 0.5]]) l1 = loss(Y_pred1, Y) l2 = loss(Y_pred2, Y) - -print("Batch Loss1 = ", l1.data, "\nBatch Loss2=", l2.data) +print(f'Batch Loss1: {l1.item():.4f} \nBatch Loss2: {l2.data:.4f}') diff --git a/09_2_softmax_mnist.py b/09_2_softmax_mnist.py index 48d2483..9e035a3 100644 --- a/09_2_softmax_mnist.py +++ b/09_2_softmax_mnist.py @@ -1,14 +1,15 @@ # https://github.com/pytorch/examples/blob/master/mnist/main.py from __future__ import print_function -import torch -import torch.nn as nn -import torch.nn.functional as F -import torch.optim as optim +from torch import nn, optim, cuda +from torch.utils import data from torchvision import datasets, transforms -from torch.autograd import Variable +import torch.nn.functional as F +import time # Training settings batch_size = 64 +device = 'cuda' if cuda.is_available() else 'cpu' +print(f'Training MNIST Model on {device}\n{"=" * 44}') # MNIST Dataset train_dataset = datasets.MNIST(root='./mnist_data/', @@ -21,11 +22,11 @@ transform=transforms.ToTensor()) # Data Loader (Input Pipeline) -train_loader = torch.utils.data.DataLoader(dataset=train_dataset, +train_loader = data.DataLoader(dataset=train_dataset, batch_size=batch_size, shuffle=True) -test_loader = torch.utils.data.DataLoader(dataset=test_dataset, +test_loader = data.DataLoader(dataset=test_dataset, batch_size=batch_size, shuffle=False) @@ -50,7 +51,7 @@ def forward(self, x): model = Net() - +model.to(device) criterion = nn.CrossEntropyLoss() optimizer = optim.SGD(model.parameters(), lr=0.01, momentum=0.5) @@ -58,16 +59,16 @@ def forward(self, x): def train(epoch): model.train() for batch_idx, (data, target) in enumerate(train_loader): - data, target = Variable(data), Variable(target) + data, target = data.to(device), target.to(device) optimizer.zero_grad() output = model(data) loss = criterion(output, target) loss.backward() optimizer.step() if batch_idx % 10 == 0: - print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.format( + print('Train Epoch: {} | Batch Status: {}/{} ({:.0f}%) | Loss: {:.6f}'.format( epoch, batch_idx * len(data), len(train_loader.dataset), - 100. * batch_idx / len(train_loader), loss.data[0])) + 100. * batch_idx / len(train_loader), loss.item())) def test(): @@ -75,20 +76,30 @@ def test(): test_loss = 0 correct = 0 for data, target in test_loader: - data, target = Variable(data, volatile=True), Variable(target) + data, target = data.to(device), target.to(device) output = model(data) # sum up batch loss - test_loss += criterion(output, target).data[0] + test_loss += criterion(output, target).item() # get the index of the max pred = output.data.max(1, keepdim=True)[1] correct += pred.eq(target.data.view_as(pred)).cpu().sum() test_loss /= len(test_loader.dataset) - print('\nTest set: Average loss: {:.4f}, Accuracy: {}/{} ({:.0f}%)\n'.format( - test_loss, correct, len(test_loader.dataset), - 100. * correct / len(test_loader.dataset))) + print(f'===========================\nTest set: Average loss: {test_loss:.4f}, Accuracy: {correct}/{len(test_loader.dataset)} ' + f'({100. * correct / len(test_loader.dataset):.0f}%)') + + +if __name__ == '__main__': + since = time.time() + for epoch in range(1, 10): + epoch_start = time.time() + train(epoch) + m, s = divmod(time.time() - epoch_start, 60) + print(f'Training time: {m:.0f}m {s:.0f}s') + test() + m, s = divmod(time.time() - epoch_start, 60) + print(f'Testing time: {m:.0f}m {s:.0f}s') + m, s = divmod(time.time() - since, 60) + print(f'Total Time: {m:.0f}m {s:.0f}s\nModel was trained on {device}!') -for epoch in range(1, 10): - train(epoch) - test() diff --git a/10_1_cnn_mnist.py b/10_1_cnn_mnist.py index 547c477..3f851f9 100644 --- a/10_1_cnn_mnist.py +++ b/10_1_cnn_mnist.py @@ -66,7 +66,7 @@ def train(epoch): if batch_idx % 10 == 0: print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.format( epoch, batch_idx * len(data), len(train_loader.dataset), - 100. * batch_idx / len(train_loader), loss.data[0])) + 100. * batch_idx / len(train_loader), loss.item())) def test(): @@ -77,7 +77,7 @@ def test(): data, target = Variable(data, volatile=True), Variable(target) output = model(data) # sum up batch loss - test_loss += F.nll_loss(output, target, size_average=False).data[0] + test_loss += F.nll_loss(output, target, size_average=False).data # get the index of the max log-probability pred = output.data.max(1, keepdim=True)[1] correct += pred.eq(target.data.view_as(pred)).cpu().sum() diff --git a/12_1_rnn_basics.py b/12_1_rnn_basics.py index 2ec10a2..d26ceb0 100644 --- a/12_1_rnn_basics.py +++ b/12_1_rnn_basics.py @@ -11,9 +11,8 @@ # One cell RNN input_dim (4) -> output_dim (2). sequence: 5 cell = nn.RNN(input_size=4, hidden_size=2, batch_first=True) -# (num_layers * num_directions, batch, hidden_size) -# (batch, num_layers * num_directions, hidden_size) for batch_first=True -hidden = (Variable(torch.randn(1, 1, 2))) +# (num_layers * num_directions, batch, hidden_size) whether batch_first=True or False +hidden = Variable(torch.randn(1, 1, 2)) # Propagate input through RNN # Input: (batch, seq_len, input_size) when batch_first=True @@ -32,6 +31,9 @@ print("sequence input size", inputs.size(), "out size", out.size()) +# hidden : (num_layers * num_directions, batch, hidden_size) whether batch_first=True or False +hidden = Variable(torch.randn(1, 3, 2)) + # One cell RNN input_dim (4) -> output_dim (2). sequence: 5, batch 3 # 3 batches 'hello', 'eolll', 'lleel' # rank = (3, 5, 4) @@ -50,7 +52,7 @@ cell = nn.RNN(input_size=4, hidden_size=2) # The given dimensions dim0 and dim1 are swapped. -inputs = inputs.transpose(3, dim1=1, dim2=2) +inputs = inputs.transpose(dim0=0, dim1=1) # Propagate input through RNN # Input: (seq_len, batch_size, input_size) when batch_first=False (default) # S x B x I diff --git a/12_2_hello_rnn.py b/12_2_hello_rnn.py index 848a43a..d196472 100644 --- a/12_2_hello_rnn.py +++ b/12_2_hello_rnn.py @@ -44,14 +44,14 @@ def forward(self, hidden, x): # Propagate input through RNN # Input: (batch, seq_len, input_size) - # hidden: (batch, num_layers * num_directions, hidden_size) + # hidden: (num_layers * num_directions, batch, hidden_size) out, hidden = self.rnn(x, hidden) return hidden, out.view(-1, num_classes) def init_hidden(self): # Initialize hidden and cell states - # (batch, num_layers * num_directions, hidden_size) for batch_first=True - return Variable(torch.zeros(batch_size, num_layers, hidden_size)) + # (num_layers * num_directions, batch, hidden_size) + return Variable(torch.zeros(num_layers, batch_size, hidden_size)) # Instantiate RNN model @@ -75,9 +75,9 @@ def init_hidden(self): hidden, output = model(hidden, input) val, idx = output.max(1) sys.stdout.write(idx2char[idx.data[0]]) - loss += criterion(output, label) + loss += criterion(output, torch.LongTensor([label])) - print(", epoch: %d, loss: %1.3f" % (epoch + 1, loss.data[0])) + print(", epoch: %d, loss: %1.3f" % (epoch + 1, loss)) loss.backward() optimizer.step() diff --git a/12_3_hello_rnn_seq.py b/12_3_hello_rnn_seq.py index 29f90d5..5a0c2f2 100644 --- a/12_3_hello_rnn_seq.py +++ b/12_3_hello_rnn_seq.py @@ -46,16 +46,16 @@ def __init__(self, num_classes, input_size, hidden_size, num_layers): def forward(self, x): # Initialize hidden and cell states - # (batch, num_layers * num_directions, hidden_size) for batch_first=True + # (num_layers * num_directions, batch, hidden_size) for batch_first=True h_0 = Variable(torch.zeros( - x.size(0), self.num_layers, self.hidden_size)) + self.num_layers, x.size(0), self.hidden_size)) # Reshape input x.view(x.size(0), self.sequence_length, self.input_size) # Propagate input through RNN # Input: (batch, seq_len, input_size) - # h_0: (batch, num_layers * num_directions, hidden_size) + # h_0: (num_layers * num_directions, batch, hidden_size) out, _ = self.rnn(x, h_0) return out.view(-1, num_classes) diff --git a/12_4_hello_rnn_emb.py b/12_4_hello_rnn_emb.py index 71fa85c..aa783a1 100644 --- a/12_4_hello_rnn_emb.py +++ b/12_4_hello_rnn_emb.py @@ -27,8 +27,10 @@ class Model(nn.Module): - def __init__(self): + def __init__(self, num_layers, hidden_size): super(Model, self).__init__() + self.num_layers = num_layers + self.hidden_size = hidden_size self.embedding = nn.Embedding(input_size, embedding_size) self.rnn = nn.RNN(input_size=embedding_size, hidden_size=5, batch_first=True) @@ -36,22 +38,22 @@ def __init__(self): def forward(self, x): # Initialize hidden and cell states - # (batch, num_layers * num_directions, hidden_size) for batch_first=True + # (num_layers * num_directions, batch, hidden_size) h_0 = Variable(torch.zeros( - x.size(0), num_layers, hidden_size)) + self.num_layers, x.size(0), self.hidden_size)) emb = self.embedding(x) emb = emb.view(batch_size, sequence_length, -1) # Propagate embedding through RNN # Input: (batch, seq_len, embedding_size) - # h_0: (batch, num_layers * num_directions, hidden_size) + # h_0: (num_layers * num_directions, batch, hidden_size) out, _ = self.rnn(emb, h_0) return self.fc(out.view(-1, num_classes)) # Instantiate RNN model -model = Model() +model = Model(num_layers, hidden_size) print(model) # Set loss and optimizer function @@ -69,7 +71,7 @@ def forward(self, x): _, idx = outputs.max(1) idx = idx.data.numpy() result_str = [idx2char[c] for c in idx.squeeze()] - print("epoch: %d, loss: %1.3f" % (epoch + 1, loss.data[0])) + print("epoch: %d, loss: %1.3f" % (epoch + 1, loss.item())) print("Predicted string: ", ''.join(result_str)) print("Learning finished!") diff --git a/requirements.txt b/requirements.txt index 0d2815e..db9a191 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,6 +1,6 @@ #nonsml: digitalgenius/ubuntu-pytorch #varunagrawal/pytorch -httplib2==0.10.3 +httplib2==0.18.0 matplotlib==2.0.0 numpy==1.13.3 torch