numpy和pytorch实现NIN模型及测试

zjZSTU · zjZSTU · commit 5b98abfb52d2 · 2019-06-21T17:05:51.000+08:00
diff --git a/models/NIN.py b/models/NIN.py
@@ -50,7 +50,7 @@ def __init__(self, in_channels=1, out_channels=10, momentum=0, nesterov=False, p
         self.relu8 = nn.ReLU()
         self.relu9 = nn.ReLU()
 
-        self.dropout = nn.Dropout()
+        self.dropout = nn.Dropout2d()
 
         self.p_h = p_h
         self.U1 = None
@@ -85,7 +85,7 @@ def forward(self, inputs):
 
     def backward(self, grad_out):
         # grad_out.shape = [N, C]
-        assert len(grad_out) == 2
+        assert len(grad_out.shape) == 2
         da11 = self.gap.backward(grad_out)
 
         dz11 = self.relu9.backward(da11)
diff --git a/models/pytorch/NIN.py b/models/pytorch/NIN.py
@@ -0,0 +1,70 @@
+# -*- coding: utf-8 -*-
+
+# @Time    : 19-6-21 下午2:56
+# @Author  : zj
+
+import torch
+import torch.nn as nn
+
+__all__ = ['NIN', 'nin']
+
+model_urls = {
+    'nin': ''
+}
+
+
+class NIN(nn.Module):
+
+    def __init__(self, in_channels=1, out_channels=10):
+        super(NIN, self).__init__()
+
+        self.features1 = nn.Sequential(
+            nn.Conv2d(in_channels, 192, (5, 5), stride=1, padding=2),
+            nn.ReLU(),
+            nn.Conv2d(192, 160, (1, 1), stride=1, padding=0),
+            nn.ReLU(),
+            nn.Conv2d(160, 96, (1, 1), stride=1, padding=0),
+            nn.ReLU(),
+            nn.MaxPool2d(2, stride=2),
+            nn.Dropout2d()
+        )
+        self.features2 = nn.Sequential(
+            nn.Conv2d(96, 192, (5, 5), stride=1, padding=2),
+            nn.ReLU(),
+            nn.Conv2d(192, 192, (1, 1), stride=1, padding=0),
+            nn.ReLU(),
+            nn.Conv2d(192, 192, (1, 1), stride=1, padding=0),
+            nn.ReLU(),
+            nn.MaxPool2d(2, stride=2),
+            nn.Dropout2d()
+        )
+        self.features3 = nn.Sequential(
+            nn.Conv2d(192, 192, (3, 3), stride=1, padding=1),
+            nn.ReLU(),
+            nn.Conv2d(192, 192, (1, 1), stride=1, padding=0),
+            nn.ReLU(),
+            nn.Conv2d(192, out_channels, (1, 1), stride=1, padding=0),
+            nn.ReLU(),
+        )
+
+        self.gap = nn.AvgPool2d(8)
+
+    def forward(self, inputs):
+        x = self.features1(inputs)
+        x = self.features2(x)
+        x = self.features3(x)
+        x = self.gap(x)
+
+        return x.view(x.shape[0], x.shape[1])
+
+
+def nin(pretrained=False, **kwargs):
+    """
+    创建模型对象
+    """
+
+    model = NIN(**kwargs)
+    # if pretrained:
+        # params = load_params(model_urls['nin'])
+        # model.set_params(params)
+    return model
diff --git a/models/pytorch/__init__.py b/models/pytorch/__init__.py
@@ -0,0 +1,3 @@
+
+
+from .NIN import *
diff --git a/nn/__init__.py b/nn/__init__.py
@@ -4,6 +4,7 @@
 from .FC import FC
 from .ReLU import ReLU
 from .Dropout import Dropout
+from .Dropout2d import Dropout2d
 from .CrossEntropyLoss import CrossEntropyLoss
 from .Conv2d import Conv2d
 from .MaxPool import MaxPool
diff --git a/src/3_nn_cifar10.py b/src/3_nn_cifar10.py
@@ -32,7 +32,7 @@
 data_path = '/home/lab305/Documents/data/decompress_cifar_10'
 
 if __name__ == '__main__':
-    x_train, x_test, y_train, y_test = vision.data.load_cifar10(data_path, shuffle=True)
+    x_train, x_test, y_train, y_test = vision.data.load_cifar10(data_path, shuffle=True, is_flatten=True)
 
     x_train = x_train / 255 - 0.5
     x_test = x_test / 255 - 0.5
diff --git a/src/nin_cifar10.py b/src/nin_cifar10.py
@@ -0,0 +1,89 @@
+# -*- coding: utf-8 -*-
+
+# @Time    : 19-6-21 下午2:45
+# @Author  : zj
+
+import nn
+import models
+import models.utils as utils
+import vision.data
+import numpy as np
+import time
+
+data_path = '/home/lab305/Documents/data/decompress_cifar_10'
+
+epochs = 100
+batch_size = 128
+momentum = 0.9
+learning_rate = 1e-3
+reg = 1e-3
+p_h = 0.5
+
+
+def nin_train():
+    x_train, x_test, y_train, y_test = vision.data.load_cifar10(data_path, shuffle=True)
+
+    # 标准化
+    x_train = x_train / 255.0 - 0.5
+    x_test = x_test / 255.0 - 0.5
+
+    net = models.nin(in_channels=3, p_h=p_h)
+    criterion = nn.CrossEntropyLoss()
+
+    accuracy = vision.Accuracy()
+
+    loss_list = []
+    train_list = []
+    test_list = []
+    best_train_accuracy = 0.995
+    best_test_accuracy = 0.995
+
+    range_list = np.arange(0, x_train.shape[0] - batch_size, step=batch_size)
+    for i in range(epochs):
+        total_loss = 0
+        num = 0
+        start = time.time()
+        for j in range_list:
+            data = x_train[j:j + batch_size]
+            labels = y_train[j:j + batch_size]
+
+            scores = net(data)
+            loss = criterion(scores, labels)
+            total_loss += loss
+            num += 1
+
+            grad_out = criterion.backward()
+            net.backward(grad_out)
+            net.update(lr=learning_rate, reg=reg)
+        end = time.time()
+        print('one epoch need time: %.3f' % (end - start))
+        print('epoch: %d loss: %f' % (i + 1, total_loss / num))
+        loss_list.append(total_loss / num)
+
+        if (i % 20) == 19:
+            # # 每隔20次降低学习率
+            # learning_rate *= 0.5
+
+            train_accuracy = accuracy.compute_v2(x_train, y_train, net, batch_size=batch_size)
+            test_accuracy = accuracy.compute_v2(x_test, y_test, net, batch_size=batch_size)
+            train_list.append(train_accuracy)
+            test_list.append(test_accuracy)
+
+            print(loss_list)
+            print(train_list)
+            print(test_list)
+            if train_accuracy > best_train_accuracy and test_accuracy > best_test_accuracy:
+                path = 'nin-epochs-%d.pkl' % (i + 1)
+                utils.save_params(net.get_params(), path=path)
+                break
+
+    draw = vision.Draw()
+    draw(loss_list, xlabel='迭代/20次')
+    draw.multi_plot((train_list, test_list), ('训练集', '测试集'), title='精度图', xlabel='迭代/20次', ylabel='精度值')
+
+
+if __name__ == '__main__':
+    start = time.time()
+    nin_train()
+    end = time.time()
+    print('training need time: %.3f' % (end - start))
diff --git a/src/nin_cifar10_pytorch.py b/src/nin_cifar10_pytorch.py
@@ -0,0 +1,87 @@
+# -*- coding: utf-8 -*-
+
+# @Time    : 19-6-21 下午3:41
+# @Author  : zj
+
+import torch
+import torch.nn as nn
+import torch.optim as optim
+import time
+import vision.data
+import models.pytorch
+
+epochs = 100
+batch_size = 128
+lr = 1e-3
+momentum = 0.9
+
+data_path = '/home/lab305/Documents/data/cifar_10'
+
+
+def train():
+    train_loader, test_loader = vision.data.load_cifar10_pytorch(data_path, batch_size=batch_size, shuffle=True)
+
+    # device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
+    device = torch.device("cpu")
+
+    net = models.pytorch.nin(in_channels=3).to(device)
+    criterion = nn.CrossEntropyLoss().to(device)
+    optimer = optim.SGD(net.parameters(), lr=lr, momentum=momentum, nesterov=True)
+    # stepLR = StepLR(optimer, 100, 0.5)
+
+    best_train_accuracy = 0.995
+    best_test_accuracy = 0
+
+    accuracy = vision.Accuracy()
+
+    loss_list = []
+    train_list = []
+    for i in range(epochs):
+        num = 0
+        total_loss = 0
+        start = time.time()
+        # 训练阶段
+        net.train()
+        for j, item in enumerate(train_loader, 0):
+            data, labels = item
+            data = data.to(device)
+            labels = labels.to(device)
+
+            scores = net.forward(data)
+            loss = criterion.forward(scores, labels)
+            total_loss += loss.item()
+
+            optimer.zero_grad()
+            loss.backward()
+            optimer.step()
+            num += 1
+        end = time.time()
+        # stepLR.step()
+
+        avg_loss = total_loss / num
+        loss_list.append(float('%.8f' % avg_loss))
+        print('epoch: %d time: %.2f loss: %.8f' % (i + 1, end - start, avg_loss))
+
+        if i % 20 == 19:
+            # 验证阶段
+            net.eval()
+            train_accuracy = accuracy.compute_pytorch(train_loader, net, device)
+            train_list.append(float('%.4f' % train_accuracy))
+            if best_train_accuracy < train_accuracy:
+                best_train_accuracy = train_accuracy
+
+                test_accuracy = accuracy.compute_pytorch(test_loader, net, device)
+                if best_test_accuracy < test_accuracy:
+                    best_test_accuracy = test_accuracy
+
+            print('best train accuracy: %.2f %%   best test accuracy: %.2f %%' % (
+                best_train_accuracy * 100, best_test_accuracy * 100))
+            print(loss_list)
+            print(train_list)
+
+
+if __name__ == '__main__':
+    start = time.time()
+    train()
+    end = time.time()
+    print('training need time: %.3f' % (end - start))
diff --git a/vision/Accuracy.py b/vision/Accuracy.py
@@ -5,6 +5,7 @@
 
 
 import numpy as np
+import torch
 
 
 class Accuracy(object):
@@ -34,3 +35,17 @@ def compute_v2(self, data_array, labels_array, net, batch_size=128):
             num += 1
 
         return total_accuracy / num
+
+    def compute_pytorch(self, loader, net, device):
+        total_accuracy = 0
+        num = 0
+        for item in loader:
+            data, labels = item
+            data = data.to(device)
+            labels = labels.to(device)
+
+            scores = net.forward(data)
+            predicted = torch.argmax(scores, dim=1)
+            total_accuracy += torch.mean((predicted == labels).float()).item()
+            num += 1
+        return total_accuracy / num
diff --git a/vision/data/cifar10.py b/vision/data/cifar10.py
@@ -3,6 +3,10 @@
 # @Time    : 19-6-20 下午7:22
 # @Author  : zj
 
+import torchvision.transforms as transforms
+import torchvision.datasets as datasets
+from torch.utils.data import DataLoader
+
 import numpy as np
 import os
 from .utils import *
@@ -14,7 +18,7 @@
 dst_size = (32, 32)
 
 
-def load_cifar10(cifar10_path, shuffle=True):
+def load_cifar10(cifar10_path, shuffle=True, is_flatten=False):
     """
     加载cifar10
     """
@@ -41,8 +45,11 @@ def load_cifar10(cifar10_path, shuffle=True):
             file_path = os.path.join(data_dir, filename)
             img = read_image(file_path)
             if img is not None:
-                x_test.append(img.reshape(-1))
                 y_test.append(i)
+                if is_flatten:
+                    x_test.append(img.reshape(-1))
+                else:
+                    x_test.append(np.transpose(img, (2, 0, 1)))
 
     train_file_list = np.array(train_file_list)
     if shuffle:
@@ -52,7 +59,26 @@ def load_cifar10(cifar10_path, shuffle=True):
     for file_path in train_file_list:
         img = read_image(file_path)
         if img is not None:
-            x_train.append(img.reshape(-1))
             y_train.append(int(os.path.split(file_path)[0].split('/')[-1]))
+            if is_flatten:
+                x_train.append(img.reshape(-1))
+            else:
+                x_train.append(np.transpose(img, (2, 0, 1)))
 
     return np.array(x_train), np.array(x_test), np.array(y_train), np.array(y_test)
+
+
+def load_cifar10_pytorch(cifar10_path, batch_size=128, shuffle=False):
+    transform = transforms.Compose([
+        transforms.Resize((227, 227)),
+        transforms.ToTensor(),
+        transforms.Normalize(mean=(0.5, 0.5, 0.5), std=(0.5, 0.5, 0.5))
+    ])
+
+    train_data_set = datasets.CIFAR10(root=cifar10_path, train=True, download=True, transform=transform)
+    test_data_set = datasets.CIFAR10(root=cifar10_path, train=False, download=True, transform=transform)
+
+    train_loader = DataLoader(train_data_set, batch_size=batch_size, shuffle=shuffle, num_workers=2)
+    test_loader = DataLoader(test_data_set, batch_size=batch_size, shuffle=shuffle, num_workers=2)
+
+    return train_loader, test_loader