diff --git a/README.md b/README.md index 72a5fac..f211699 100644 --- a/README.md +++ b/README.md @@ -2,9 +2,6 @@ Gradually warm-up(increasing) learning rate for pytorch's optimizer. Proposed in 'Accurate, Large Minibatch SGD: Training ImageNet in 1 Hour'. -example tensorboard -Example : Gradual Warmup for 100 epoch, after that, use cosine-annealing. - ## Install ``` @@ -13,31 +10,6 @@ $ pip install git+https://github.com/ildoonet/pytorch-gradual-warmup-lr.git ## Usage -See [run.py](warmup_scheduler/run.py) file. - -```python -import torch -from torch.optim.lr_scheduler import StepLR, ExponentialLR -from torch.optim.sgd import SGD - -from warmup_scheduler import GradualWarmupScheduler - - -if __name__ == '__main__': - model = [torch.nn.Parameter(torch.randn(2, 2, requires_grad=True))] - optim = SGD(model, 0.1) +Example : Gradual Warmup for 5 epoch, after that, use cosine-annealing. See [run.py](warmup_scheduler/run.py) file. - # scheduler_warmup is chained with schduler_steplr - scheduler_steplr = StepLR(optim, step_size=10, gamma=0.1) - scheduler_warmup = GradualWarmupScheduler(optim, multiplier=1, total_epoch=5, after_scheduler=scheduler_steplr) - - # this zero gradient update is needed to avoid a warning message, issue #8. - optim.zero_grad() - optim.step() - - for epoch in range(1, 20): - scheduler_warmup.step(epoch) - print(epoch, optim.param_groups[0]['lr']) - - optim.step() # backward pass (update network) -``` +![](./asset/warmup_cosineannealing.png) \ No newline at end of file diff --git a/asset/warmup_cosineannealing.png b/asset/warmup_cosineannealing.png new file mode 100644 index 0000000..9b462e9 Binary files /dev/null and b/asset/warmup_cosineannealing.png differ diff --git a/warmup_scheduler/run.py b/warmup_scheduler/run.py index a1dbf3d..6d11432 100644 --- a/warmup_scheduler/run.py +++ b/warmup_scheduler/run.py @@ -1,24 +1,47 @@ import torch -from torch.optim.lr_scheduler import StepLR, ExponentialLR +from torch.optim.lr_scheduler import CosineAnnealingLR from torch.optim.sgd import SGD from warmup_scheduler import GradualWarmupScheduler +def plot(lr_list): + import matplotlib.pyplot as plt + from matplotlib.pyplot import MultipleLocator + + f = plt.figure() + + x_major_locator = MultipleLocator(1) + ax = plt.gca() + ax.xaxis.set_major_locator(x_major_locator) + + x = range(1, len(lr_list) + 1) + plt.plot(x, lr_list) + plt.show() + + if __name__ == '__main__': model = [torch.nn.Parameter(torch.randn(2, 2, requires_grad=True))] optim = SGD(model, 0.1) - # scheduler_warmup is chained with schduler_steplr - scheduler_steplr = StepLR(optim, step_size=10, gamma=0.1) - scheduler_warmup = GradualWarmupScheduler(optim, multiplier=1, total_epoch=5, after_scheduler=scheduler_steplr) + epochs = 20 + # scheduler_warmup is chained with lr_schduler + lr_schduler = CosineAnnealingLR(optim, T_max=epochs - 5, eta_min=0.02) + scheduler_warmup = GradualWarmupScheduler(optim, multiplier=1, total_epoch=5, after_scheduler=lr_schduler) # this zero gradient update is needed to avoid a warning message, issue #8. optim.zero_grad() optim.step() + scheduler_warmup.step() + + lr_list = list() + for epoch in range(epochs): + current_lr = optim.param_groups[0]['lr'] + + optim.step() + scheduler_warmup.step() - for epoch in range(1, 20): - scheduler_warmup.step(epoch) - print(epoch, optim.param_groups[0]['lr']) + print(epoch + 1, current_lr) + lr_list.append(current_lr) - optim.step() # backward pass (update network) + plot(lr_list) diff --git a/warmup_scheduler/scheduler.py b/warmup_scheduler/scheduler.py index 8b84082..2ebdc70 100644 --- a/warmup_scheduler/scheduler.py +++ b/warmup_scheduler/scheduler.py @@ -28,7 +28,7 @@ def get_lr(self): if not self.finished: self.after_scheduler.base_lrs = [base_lr * self.multiplier for base_lr in self.base_lrs] self.finished = True - return self.after_scheduler.get_lr() + return self.after_scheduler.get_last_lr() return [base_lr * self.multiplier for base_lr in self.base_lrs] if self.multiplier == 1.0: