multitask.multi.MultiTask

multitask.multi.MultiTask(
    config=None,
    *,
    task='lazy',
    dataframe=None,
    data_test=None,
    cache_home=None,
    dry_run=False,
    show_progress=False,
    log_level=logging.INFO,
    **overrides,
)

Orchestrates a multi-target time-series forecasting pipeline.

Data must be provided as a pandas DataFrame via dataframe. A test dataset can optionally be provided via data_test.

The typical usage flow is:

  1. Instantiate with config (or omit to auto-construct ConfigMulti()).
  2. Call prepare_data to load, resample, and validate data.
  3. Call detect_outliers to apply hard bounds and IsolationForest.
  4. Call impute to fill gaps.
  5. Call build_exogenous_features to construct weather / calendar / day-night / holiday covariates.
  6. Call run (or individual run_task_* methods) to train, predict, and aggregate.

Available tasks: "lazy", "defaults", "predict", "clean". Tasks requiring auto-tuning ("optuna", "spotoptim") raise ValueError — use the spotforecast2 sibling package for those.

Parameters

Name Type Description Default
config Optional[PipelineConfig] A PipelineConfig-conforming object (e.g. ConfigMulti). When None, a fresh ConfigMulti() is constructed. None
task str Pipeline task mode — "lazy", "defaults", "predict", or "clean". Defaults to "lazy". 'lazy'
dataframe Optional[pd.DataFrame] Pre-loaded input DataFrame with training data. The DataFrame must contain a datetime column matching config.index_name plus at least one numeric target column. Optional for the "clean" task, required for all others. None
data_test Optional[pd.DataFrame] Pre-loaded input DataFrame with test data. Optional. None
cache_home Optional[Path] Cache directory override. When not None, replaces config.cache_home for this task instance. None
dry_run bool If True, do not clean cache or save models. False
show_progress bool Whether to print progress messages during pipeline execution. False
log_level int Logging level for the pipeline logger. logging.INFO
**overrides Any Forwarded to config.set_params(**overrides) — a convenience for one-line tweaks without building a fresh config. Mutates the caller’s config object. {}

Examples

import tempfile
import pandas as pd
import numpy as np
from spotforecast2_safe.multitask import MultiTask
from spotforecast2_safe.configurator.config_multi import ConfigMulti

rng = np.random.default_rng(0)
idx = pd.date_range("2023-01-01", periods=24 * 14, freq="h", tz="UTC")
df = pd.DataFrame({"a": rng.normal(100, 10, len(idx))}, index=idx)
df.index.name = "DateTime"

with tempfile.TemporaryDirectory() as tmp:
    cfg = ConfigMulti(predict_size=6, use_exogenous_features=False, cache_home=tmp)
    mt = MultiTask(cfg, dataframe=df)
    print(f"DataFrame stored: {mt._dataframe is not None}")
    print(f"Task: {mt.TASK}")
DataFrame stored: True
Task: lazy

Methods

Name Description
run Run the task specified by task (or self.TASK).
run_task_clean Remove all cached data from the pipeline cache directory.
run_task_defaults Defaults fitting — no tuning, no cached params.
run_task_lazy Lazy Fitting with default LightGBM parameters.
run_task_predict Predict-only using previously saved models.

run

multitask.multi.MultiTask.run(task=None, show=False, **kwargs)

Run the task specified by task (or self.TASK).

This dispatcher selects a task only; per-task options (for example use_tuned_params or max_age_days) must be passed to the corresponding run_task_* method directly.

Parameters

Name Type Description Default
task Optional[str] Override the task mode. None uses self.TASK. None
show bool If True, invoke the visualisation hooks. False

Returns

Name Type Description
Dict[str, Any] Aggregated prediction package. Per-target results are stored
Dict[str, Any] on self.results[<task_key>].

Raises

Name Type Description
TypeError If unexpected keyword arguments are supplied (fail-safe: they would otherwise be silently ignored).
ValueError If task is "optuna" or "spotoptim" (auto-tuning not available in this package), or if task is not one of the supported task names.
RuntimeError If prepare_data has not been called (for training and prediction tasks).

Examples

import tempfile
import warnings
import numpy as np
import pandas as pd
from spotforecast2_safe.multitask.multi import MultiTask
from spotforecast2_safe.configurator.config_multi import ConfigMulti

rng = np.random.default_rng(0)
idx = pd.date_range("2023-01-01", periods=24 * 14, freq="h", tz="UTC")
df = pd.DataFrame({"a": rng.normal(100, 10, len(idx))}, index=idx)
df.index.name = "DateTime"

with tempfile.TemporaryDirectory() as tmp:
    cfg = ConfigMulti(
        predict_size=6,
        use_exogenous_features=False,
        use_outlier_detection=False,
        auto_save_models=False,
        number_folds=2,
        cache_home=tmp,
    )
    with warnings.catch_warnings():
        warnings.simplefilter("ignore")
        mt = MultiTask(cfg, dataframe=df, task="lazy")
        mt.prepare_data().detect_outliers().impute().build_exogenous_features()
        result = mt.run(task="lazy")

assert "future_pred" in result
assert result["future_pred"].shape == (6,)
print(f"Dispatched to 'lazy'; horizon: {result['future_pred'].shape[0]} h")

# run() rejects unknown tasks and auto-tuning tasks
with tempfile.TemporaryDirectory() as tmp:
    cfg2 = ConfigMulti(
        predict_size=6,
        use_exogenous_features=False,
        use_outlier_detection=False,
        auto_save_models=False,
        cache_home=tmp,
    )
    mt2 = MultiTask(cfg2, task="lazy")
    try:
        mt2.run(task="optuna")
    except ValueError as exc:
        print(f"ValueError: {str(exc)[:60]}")
Dispatched to 'lazy'; horizon: 6 h
ValueError: Task 'optuna' requires auto-tuning, which is not available i

run_task_clean

multitask.multi.MultiTask.run_task_clean(
    show=False,
    dry_run=False,
    cache_home=None,
)

Remove all cached data from the pipeline cache directory.

Does not require prepare_data() to be called first.

Parameters

Name Type Description Default
show bool Accepted for API consistency. Not used by the clean task. False
dry_run bool If True, report what would be deleted without actually removing anything. False
cache_home Optional[Path] Override the directory to clean. None uses the cache directory configured on this instance. None

Returns

Name Type Description
Dict[str, Any] Dict with keys status, cache_dir, and deleted_items.

Raises

Name Type Description
RuntimeError If the cache directory cannot be removed.

Examples

import tempfile
from pathlib import Path
from spotforecast2_safe.multitask.multi import MultiTask
from spotforecast2_safe.configurator.config_multi import ConfigMulti

with tempfile.TemporaryDirectory() as tmp:
    cache = Path(tmp) / "sf2_cache"
    cache.mkdir()
    (cache / "models").mkdir()
    (cache / "tuning_results").mkdir()
    cfg = ConfigMulti(cache_home=cache)
    mt = MultiTask(cfg, task="clean")
    result = mt.run_task_clean(dry_run=True)

assert result["status"] == "dry_run"
assert "models" in result["deleted_items"]
print(f"Status: {result['status']}")
print(f"Would remove: {sorted(result['deleted_items'])}")
[clean] Dry run — would delete: /tmp/tmptzu1tk4f/sf2_cache
  Would remove: logging
  Would remove: models
  Would remove: tuning_results
Status: dry_run
Would remove: ['logging', 'models', 'tuning_results']

run_task_defaults

multitask.multi.MultiTask.run_task_defaults(show=False)

Defaults fitting — no tuning, no cached params.

Distinct from run_task_lazy only in that it never consults the tuning-result cache. Use this for deterministic baselines and for ENTSO-E “Approach 2: Training without Tuning”.

Parameters

Name Type Description Default
show bool If True, invoke the visualisation hooks. False

Returns

Name Type Description
Dict[str, Any] Aggregated prediction package. Per-target results in
Dict[str, Any] self.results["defaults"].

Examples

import tempfile
import warnings
import numpy as np
import pandas as pd
from spotforecast2_safe.multitask.multi import MultiTask
from spotforecast2_safe.configurator.config_multi import ConfigMulti

rng = np.random.default_rng(0)
idx = pd.date_range("2023-01-01", periods=24 * 14, freq="h", tz="UTC")
df = pd.DataFrame({"a": rng.normal(100, 10, len(idx))}, index=idx)
df.index.name = "DateTime"

with tempfile.TemporaryDirectory() as tmp:
    cfg = ConfigMulti(
        predict_size=6,
        use_exogenous_features=False,
        use_outlier_detection=False,
        auto_save_models=False,
        number_folds=2,
        cache_home=tmp,
    )
    with warnings.catch_warnings():
        warnings.simplefilter("ignore")
        mt = MultiTask(cfg, dataframe=df, task="defaults")
        mt.prepare_data().detect_outliers().impute().build_exogenous_features()
        result = mt.run_task_defaults()

assert "future_pred" in result
assert result["future_pred"].shape == (6,)
print(f"Prediction horizon: {result['future_pred'].shape[0]} hours")
print(f"validation_passed: {result['validation_passed']}")
Prediction horizon: 6 hours
validation_passed: True

run_task_lazy

multitask.multi.MultiTask.run_task_lazy(show=False)

Lazy Fitting with default LightGBM parameters.

Parameters

Name Type Description Default
show bool If True, invoke the visualisation hooks. False

Returns

Name Type Description
Dict[str, Any] Aggregated prediction package. Per-target results in
Dict[str, Any] self.results["lazy"].

Examples

import tempfile
import warnings
import numpy as np
import pandas as pd
from spotforecast2_safe.multitask.multi import MultiTask
from spotforecast2_safe.configurator.config_multi import ConfigMulti

rng = np.random.default_rng(0)
idx = pd.date_range("2023-01-01", periods=24 * 14, freq="h", tz="UTC")
df = pd.DataFrame({"a": rng.normal(100, 10, len(idx))}, index=idx)
df.index.name = "DateTime"

with tempfile.TemporaryDirectory() as tmp:
    cfg = ConfigMulti(
        predict_size=6,
        use_exogenous_features=False,
        use_outlier_detection=False,
        auto_save_models=False,
        number_folds=2,
        cache_home=tmp,
    )
    with warnings.catch_warnings():
        warnings.simplefilter("ignore")
        mt = MultiTask(cfg, dataframe=df)
        mt.prepare_data().detect_outliers().impute().build_exogenous_features()
        result = mt.run_task_lazy()

assert "future_pred" in result
assert result["future_pred"].shape == (6,)
print(f"Prediction horizon: {result['future_pred'].shape[0]} hours")
print(f"Result keys: {sorted(k for k in result if k != 'forecaster')}")
Prediction horizon: 6 hours
Result keys: ['future_actual', 'future_pred', 'metrics_future', 'metrics_future_one_day', 'metrics_train', 'train_actual', 'train_pred', 'validation_passed']

run_task_predict

multitask.multi.MultiTask.run_task_predict(
    show=False,
    task_name=None,
    max_age_days=None,
)

Predict-only using previously saved models.

Loads fitted models from the cache directory and produces predictions without any training. Raises RuntimeError if no saved models are found.

Parameters

Name Type Description Default
show bool If True, invoke the visualisation hooks. False
task_name Optional[str] Restrict model loading to a specific source task ("lazy", "defaults", "optuna", or "spotoptim"). None loads the most recent model regardless of source. None
max_age_days Optional[float] Maximum age in days for saved models. None accepts any age. None

Returns

Name Type Description
Dict[str, Any] Aggregated prediction package. Per-target results in
Dict[str, Any] self.results["predict"].

Raises

Name Type Description
RuntimeError If no saved models are found.

Examples

import tempfile
import warnings
import numpy as np
import pandas as pd
from spotforecast2_safe.multitask.multi import MultiTask
from spotforecast2_safe.configurator.config_multi import ConfigMulti

rng = np.random.default_rng(0)
idx = pd.date_range("2023-01-01", periods=24 * 14, freq="h", tz="UTC")
df = pd.DataFrame({"a": rng.normal(100, 10, len(idx))}, index=idx)
df.index.name = "DateTime"

with tempfile.TemporaryDirectory() as tmp:
    cfg = ConfigMulti(
        predict_size=6,
        use_exogenous_features=False,
        use_outlier_detection=False,
        auto_save_models=True,
        number_folds=2,
        cache_home=tmp,
    )
    with warnings.catch_warnings():
        warnings.simplefilter("ignore")
        # Train and persist models first.
        train_mt = MultiTask(cfg, dataframe=df, task="lazy")
        train_mt.prepare_data().detect_outliers().impute().build_exogenous_features()
        train_mt.run_task_lazy()

        # Load saved models and predict without re-training.
        pred_mt = MultiTask(cfg, dataframe=df, task="predict")
        pred_mt.prepare_data().detect_outliers().impute().build_exogenous_features()
        result = pred_mt.run_task_predict()

assert "future_pred" in result
assert result["future_pred"].shape == (6,)
print(f"Prediction horizon: {result['future_pred'].shape[0]} hours")
Prediction horizon: 6 hours