Source code for metaforecast.ensembles.static

import pandas as pd

from metaforecast.ensembles.windowing import Windowing



[docs]
class BestOnTrain(Windowing):
    """Select single best model based on training performance.

    A simple baseline ensemble that selects the single best-performing model
    based on training data accuracy. While not technically an ensemble since
    it uses only one model, it serves as an important baseline.

    Examples
    --------
    >>> from datasetsforecast.m3 import M3
    >>> from neuralforecast import NeuralForecast
    >>> from neuralforecast.models import NHITS, NBEATS, MLP
    >>> from metaforecast.ensembles import BestOnTrain
    >>>
    >>> df, *_ = M3.load('.', group='Monthly')
    >>>
    >>> # ensemble members setup
    >>> CONFIG = {'input_size': 12,
    >>>           'h': 12,
    >>>           'accelerator': 'cpu',
    >>>           'max_steps': 10, }
    >>>
    >>> models = [
    >>>     NBEATS(**CONFIG, stack_types=3 * ["identity"]),
    >>>     NHITS(**CONFIG),
    >>>     MLP(**CONFIG),
    >>>     MLP(num_layers=3, **CONFIG),
    >>> ]
    >>>
    >>> nf = NeuralForecast(models=models, freq='M')
    >>>
    >>> # cv to build meta-data
    >>> n_windows = df['unique_id'].value_counts().min()
    >>> n_windows = int(n_windows // 2)
    >>> fcst_cv = nf.cross_validation(df=df, n_windows=n_windows, step_size=1)
    >>> fcst_cv = fcst_cv.reset_index()
    >>> fcst_cv = fcst_cv.groupby(['unique_id', 'cutoff']).head(1).drop(columns='cutoff')
    >>>
    >>> # fitting combination rule
    >>> ensemble = BestOnTrain()
    >>> ensemble.fit(fcst_cv)
    >>>
    >>> # re-fitting models
    >>> nf.fit(df=df)
    >>>
    >>> # forecasting and combining
    >>> fcst = nf.predict()
    >>> fcst_ensemble = ensemble.predict(fcst.reset_index())
    """


[docs]
    def __init__(self, select_by_uid: bool = True):
        """Initialize best model selector.

        Parameters
        ----------
        select_by_uid : bool, default=True
            Strategy for selecting best performing model:
            - True: Select best model separately for each series
            - False: Select single best model across all series

        Notes
        -----
        Per-series selection (select_by_uid=True) allows for more granular model
        choice but requires sufficient data per series for reliable selection.
        Global selection may be more robust when individual series are short.

        """

        super().__init__(
            freq="",
            select_best=True,
            trim_ratio=1,
            weight_by_uid=select_by_uid,
        )

        self.alias = "BestOnTrain"

        self.use_window = False
        self.select_by_uid = select_by_uid



[docs]
    def update_weights(self, **kwargs):
        raise NotImplementedError





[docs]
class LossOnTrain(Windowing):
    """Weight ensemble members based on training set performance.

    An ensemble method that assigns static weights to models
    based on their training error. Unlike dynamic ensembles, weights
    are fixed after training and don't adapt to changing patterns.

    Notes
    -----
    Weights are computed as inverse of training error, giving higher
    weights to more accurate models. This static weighting assumes
    relative model performance remains stable over time.

    Examples
    --------
    >>> from datasetsforecast.m3 import M3
    >>> from neuralforecast import NeuralForecast
    >>> from neuralforecast.models import NHITS, NBEATS, MLP
    >>> from metaforecast.ensembles import LossOnTrain
    >>>
    >>> df, *_ = M3.load('.', group='Monthly')
    >>>
    >>> # ensemble members setup
    >>> CONFIG = {'input_size': 12,
    >>>           'h': 12,
    >>>           'accelerator': 'cpu',
    >>>           'max_steps': 10, }
    >>>
    >>> models = [
    >>>     NBEATS(**CONFIG, stack_types=3 * ["identity"]),
    >>>     NHITS(**CONFIG),
    >>>     MLP(**CONFIG),
    >>>     MLP(num_layers=3, **CONFIG),
    >>> ]
    >>>
    >>> nf = NeuralForecast(models=models, freq='M')
    >>>
    >>> # cv to build meta-data
    >>> n_windows = df['unique_id'].value_counts().min()
    >>> n_windows = int(n_windows // 2)
    >>> fcst_cv = nf.cross_validation(df=df, n_windows=n_windows, step_size=1)
    >>> fcst_cv = fcst_cv.reset_index()
    >>> fcst_cv = fcst_cv.groupby(['unique_id', 'cutoff']).head(1).drop(columns='cutoff')
    >>>
    >>> # fitting combination rule
    >>> ensemble = LossOnTrain(trim_ratio=0.8)
    >>> ensemble.fit(fcst_cv)
    >>>
    >>> # re-fitting models
    >>> nf.fit(df=df)
    >>>
    >>> # forecasting and combining
    >>> fcst = nf.predict()
    >>> fcst_ensemble = ensemble.predict(fcst.reset_index())

    """


[docs]
    def __init__(self, trim_ratio: float, weight_by_uid: bool = True):
        """Initialize static ensemble with training-based weights.

        Parameters
        ----------
        weight_by_uid : bool, default=True
            Strategy for computing model weights:
            - True: Separate weights per series
            - False: Global weights across all series

        trim_ratio : float, default=1.0
            Proportion of models to retain in ensemble, between 0 and 1:
            - 1.0: Keep all models
            - 0.5: Keep top 50% of models
            Models are selected based on training performance

        Notes
        -----
        Weight computation involves:
        1. Calculate training error for each model
        2. Convert errors to weights (inverse relationship)
        3. If trim_ratio < 1, select top performing models
        4. Normalize weights to sum to 1

        """
        super().__init__(
            freq="",
            select_best=False,
            trim_ratio=trim_ratio,
            weight_by_uid=weight_by_uid,
        )

        self.alias = "LossOnTrain"

        self.use_window = False



[docs]
    def update_weights(self, **kwargs):
        raise NotImplementedError





[docs]
class EqAverage(Windowing):
    """Combine forecasts using simple average with optional trimming.

    A robust ensemble method that equally weights retained models after
    removing poor performers. Research shows this simple approach often
    performs competitively with more complex weighting schemes.

    References
    ----------
    Jose, V. R. R., & Winkler, R. L. (2008).
    "Simple robust averages of forecasts: Some empirical results."
    International Journal of Forecasting, 24(1), 163-169.

    Examples
    --------
    >>> from datasetsforecast.m3 import M3
    >>> from neuralforecast import NeuralForecast
    >>> from neuralforecast.models import NHITS, NBEATS, MLP
    >>> from metaforecast.ensembles import EqAverage
    >>>
    >>> df, *_ = M3.load('.', group='Monthly')
    >>>
    >>> # ensemble members setup
    >>> CONFIG = {'input_size': 12,
    >>>           'h': 12,
    >>>           'accelerator': 'cpu',
    >>>           'max_steps': 10, }
    >>>
    >>> models = [
    >>>     NBEATS(**CONFIG, stack_types=3 * ["identity"]),
    >>>     NHITS(**CONFIG),
    >>>     MLP(**CONFIG),
    >>>     MLP(num_layers=3, **CONFIG),
    >>> ]
    >>>
    >>> nf = NeuralForecast(models=models, freq='M')
    >>>
    >>> # cv to build meta-data
    >>> n_windows = df['unique_id'].value_counts().min()
    >>> n_windows = int(n_windows // 2)
    >>> fcst_cv = nf.cross_validation(df=df, n_windows=n_windows, step_size=1)
    >>> fcst_cv = fcst_cv.reset_index()
    >>> fcst_cv = fcst_cv.groupby(['unique_id', 'cutoff']).head(1).drop(columns='cutoff')
    >>>
    >>> # fitting combination rule
    >>> ensemble =  EqAverage()
    >>> ensemble.fit(fcst_cv)
    >>>
    >>> # re-fitting models
    >>> nf.fit(df=df)
    >>>
    >>> # forecasting and combining
    >>> fcst = nf.predict()
    >>> fcst_ensemble = ensemble.predict(fcst.reset_index())
    """


[docs]
    def __init__(self, trim_ratio: float = 1, select_by_uid: bool = True):
        """Initialize equal-weights ensemble with optional trimming.

        Parameters
        ----------
        select_by_uid : bool, default=True
            Strategy for model selection in trimming:
            - True: Select best models separately for each series
            - False: Select best models across all series
            Per-series selection allows more granular model choice
            but requires sufficient data per series.

        trim_ratio : float, default=1.0
            Proportion of models to retain in ensemble, between 0 and 1:
            - 1.0: Keep all models (simple average)
            - 0.5: Keep top 50% of models
            - Lower values create more selective ensembles

        Notes
        -----
        Models are selected based on validation performance before
        applying equal weights. As shown in [1], moderate trimming
        often improves forecast accuracy while maintaining the
        robustness benefits of equal weighting.

        References
        ----------
        [1] Jose, V. R. R., & Winkler, R. L. (2008).
        "Simple robust averages of forecasts: Some empirical results."
        International Journal of Forecasting, 24(1), 163-169.
        """
        super().__init__(
            freq="",
            select_best=False,
            trim_ratio=trim_ratio,
            weight_by_uid=select_by_uid,
        )

        self.alias = "EqAverage"

        self.use_window = False



[docs]
    def update_weights(self, **kwargs):
        raise NotImplementedError


    @staticmethod
    def _weights_from_errors(scores: pd.Series) -> pd.Series:
        weights = pd.Series({k: 1 / len(scores) for k in scores.index})

        return weights