tests/test_regularization.py

#!/usr/bin/env python3

import logging
import random

import numpy as np
import pandas as pd
import pytest
import torch

from neuralprophet import NeuralProphet, df_utils
from neuralprophet.utils import reg_func_abs
from tests.utils.dataset_generators import (
    generate_event_dataset,
    generate_holiday_dataset,
    generate_lagged_regressor_dataset,
)

log = logging.getLogger("NP.test")
log.setLevel("ERROR")
log.parent.setLevel("ERROR")

# Fix random seeds
torch.manual_seed(0)
random.seed(0)
np.random.seed(0)

# Variables
REGULARIZATION = 0.01
# Map holiday name to a y value for dataset generation
Y_HOLIDAYS_OVERRIDE = {
    "Washington's Birthday": 10,
    "Labor Day": 10,
    "Christmas Day": 10,
}
Y_EVENTS_OVERRIDE = {
    "2022-01-13": 10,
    "2022-01-14": 10,
    "2022-01-15": 10,
}


def test_reg_func_abs():
    log.info("testing: reg func abs")
    assert pytest.approx(1) == reg_func_abs(torch.Tensor([1]))
    assert pytest.approx(0) == reg_func_abs(torch.Tensor([0]))
    assert pytest.approx(1) == reg_func_abs(torch.Tensor([-1]))

    assert pytest.approx(1) == reg_func_abs(torch.Tensor([1, 1, 1]))
    assert pytest.approx(0) == reg_func_abs(torch.Tensor([0, 0, 0]))
    assert pytest.approx(1) == reg_func_abs(torch.Tensor([-1, -1, -1]))

    assert pytest.approx(0.6666666) == reg_func_abs(torch.Tensor([-1, 0, 1]))
    assert pytest.approx(20) == reg_func_abs(torch.Tensor([-12, 4, 0, -1, 1, 102]))


def test_regularization_holidays():
    log.info("testing: regularization of holidays")
    df = generate_holiday_dataset(y_holidays_override=Y_HOLIDAYS_OVERRIDE)
    df, _, _, _ = df_utils.check_multiple_series_id(df)
    df, _, _ = df_utils.check_dataframe(df, check_y=False)

    m = NeuralProphet(
        epochs=20,
        batch_size=32,
        learning_rate=0.1,
        yearly_seasonality=False,
        weekly_seasonality=False,
        daily_seasonality=False,
        growth="off",
    )
    m = m.add_country_holidays(
        "US",
        regularization=0.0001,
    )
    m.fit(df, freq="D")

    to_reduce = []
    to_preserve = []
    for country_holiday in m.config_country_holidays.holiday_names:
        event_params = m.model.get_event_weights(country_holiday)
        weight_list = [param.detach().numpy() for _, param in event_params.items()]
        if country_holiday in Y_HOLIDAYS_OVERRIDE.keys():
            to_reduce.append(weight_list[0][0][0])
        else:
            to_preserve.append(weight_list[0][0][0])
    # print(f"To reduce (< 0.2) {to_reduce}")
    # print(f"To preserve (> 0.5) {to_preserve}")
    assert np.mean(to_reduce) < 0.2
    assert np.mean(to_preserve) > 0.5


def test_regularization_events():
    log.info("testing: regularization of events")
    df, events = generate_event_dataset(y_events_override=Y_EVENTS_OVERRIDE)
    df, _, _, id_list = df_utils.check_multiple_series_id(df)
    df, _, _ = df_utils.check_dataframe(df, check_y=False)

    m = NeuralProphet(
        epochs=50,
        batch_size=8,
        learning_rate=0.1,
        yearly_seasonality=False,
        weekly_seasonality=False,
        daily_seasonality=False,
        growth="off",
    )
    m = m.add_events(
        ["event_%i" % index for index, _ in enumerate(events)],
        regularization=0.1,
    )
    events_df = pd.concat(
        [
            pd.DataFrame(
                {
                    "event": "event_%i" % index,
                    "ds": pd.to_datetime([event]),
                }
            )
            for index, event in enumerate(events)
        ]
    )
    history_df = m.create_df_with_events(df, events_df)
    m.fit(history_df, freq="D")

    to_reduce = []
    to_preserve = []
    for index, event in enumerate(events):
        weight_list = m.model.get_event_weights("event_%i" % index)
        for _, param in weight_list.items():
            if event in Y_EVENTS_OVERRIDE.keys():
                to_reduce.append(param.detach().numpy()[0][0])
            else:
                to_preserve.append(param.detach().numpy()[0][0])
    # print(f"To reduce (< 0.2) {to_reduce}")
    # print(f"To preserve (> 0.5) {to_preserve}")
    assert np.mean(to_reduce) < 0.2
    assert np.mean(to_preserve) > 0.5


def test_regularization_lagged_regressor():
    """
    Test case for  regularization feature of lagged regressors. Utlizes a
    synthetic dataset with 4 noise-based lagged regressors (a, b, c, d).
    The first and last lagged regressors (a, d) are expected to have a weight
    close to 1. The middle lagged regressors (b, c) meanwhile are expected to
    have a weight close to 0, due to the regularization. All other model
    components are turned off to avoid side effects.
    """
    log.info("testing: regularization lagged regressors")
    df, lagged_regressors = generate_lagged_regressor_dataset(periods=100)
    df, _, _, id_list = df_utils.check_multiple_series_id(df)
    df, _, _ = df_utils.check_dataframe(df, check_y=False)

    m = NeuralProphet(
        epochs=30,
        batch_size=8,
        learning_rate=0.1,
        yearly_seasonality=False,
        weekly_seasonality=False,
        daily_seasonality=False,
        growth="off",
        normalize="off",
    )
    m = m.add_lagged_regressor(
        n_lags=3,
        names=[lagged_regressor for lagged_regressor, _ in lagged_regressors],
        regularization=0.1,
    )
    m.fit(df, freq="D")

    lagged_regressors_config = dict(lagged_regressors)

    weights = m.model.get_covar_weights()
    for name in m.config_lagged_regressors.regressors.keys():
        weight_average = np.average(weights[name].detach().numpy())

        lagged_regressor_weight = lagged_regressors_config[name]

        if lagged_regressor_weight > 0.9:
            assert weight_average > 0.5
        else:
            assert weight_average < 0.35  # Note: this should be < 0.1, but due to fitting issues, relaxed temporarily.

        log.info(
            "Lagged regressor: %s, average weight: %f, expected weight: %f",
            name,
            weight_average,
            lagged_regressors_config[name],
        )