multitask.multi.MultiTask(
config= None ,
* ,
task= 'lazy' ,
dataframe= None ,
data_test= None ,
cache_home= None ,
dry_run= False ,
show_progress= False ,
log_level= logging.INFO,
** overrides,
)
Orchestrates a multi-target time-series forecasting pipeline.
Data must be provided as a pandas DataFrame via dataframe. A test dataset can optionally be provided via data_test.
The typical usage flow is:
Instantiate with config (or omit to auto-construct ConfigMulti()).
Call prepare_data to load, resample, and validate data.
Call detect_outliers to apply hard bounds and IsolationForest.
Call impute to fill gaps.
Call build_exogenous_features to construct weather / calendar / day-night / holiday covariates.
Call run (or individual run_task_* methods) to train, predict, and aggregate.
Available tasks: "lazy", "defaults", "predict", "clean". Tasks requiring auto-tuning ("optuna", "spotoptim") raise ValueError — use the spotforecast2 sibling package for those.
Parameters
config
Optional [PipelineConfig]
A PipelineConfig-conforming object (e.g. ConfigMulti). When None, a fresh ConfigMulti() is constructed.
None
task
str
Pipeline task mode — "lazy", "defaults", "predict", or "clean". Defaults to "lazy".
'lazy'
dataframe
Optional [pd .DataFrame ]
Pre-loaded input DataFrame with training data. The DataFrame must contain a datetime column matching config.index_name plus at least one numeric target column. Optional for the "clean" task, required for all others.
None
data_test
Optional [pd .DataFrame ]
Pre-loaded input DataFrame with test data. Optional.
None
cache_home
Optional [Path ]
Cache directory override. When not None, replaces config.cache_home for this task instance.
None
dry_run
bool
If True, do not clean cache or save models.
False
show_progress
bool
Whether to print progress messages during pipeline execution.
False
log_level
int
Logging level for the pipeline logger.
logging.INFO
**overrides
Any
Forwarded to config.set_params(**overrides) — a convenience for one-line tweaks without building a fresh config. Mutates the caller’s config object.
{}
Examples
import tempfile
import pandas as pd
import numpy as np
from spotforecast2_safe.multitask import MultiTask
from spotforecast2_safe.configurator.config_multi import ConfigMulti
rng = np.random.default_rng(0 )
idx = pd.date_range("2023-01-01" , periods= 24 * 14 , freq= "h" , tz= "UTC" )
df = pd.DataFrame({"a" : rng.normal(100 , 10 , len (idx))}, index= idx)
df.index.name = "DateTime"
with tempfile.TemporaryDirectory() as tmp:
cfg = ConfigMulti(predict_size= 6 , use_exogenous_features= False , cache_home= tmp)
mt = MultiTask(cfg, dataframe= df)
print (f"DataFrame stored: { mt. _dataframe is not None } " )
print (f"Task: { mt. TASK} " )
DataFrame stored: True
Task: lazy
Methods
run
multitask.multi.MultiTask.run(task= None , show= False , ** kwargs)
Run the task specified by task (or self.TASK).
This dispatcher selects a task only; per-task options (for example use_tuned_params or max_age_days) must be passed to the corresponding run_task_* method directly.
Parameters
task
Optional [str ]
Override the task mode. None uses self.TASK.
None
show
bool
If True, invoke the visualisation hooks.
False
Returns
Dict [str , Any ]
Aggregated prediction package. Per-target results are stored
Dict [str , Any ]
on self.results[<task_key>].
Raises
TypeError
If unexpected keyword arguments are supplied (fail-safe: they would otherwise be silently ignored).
ValueError
If task is "optuna" or "spotoptim" (auto-tuning not available in this package), or if task is not one of the supported task names.
RuntimeError
If prepare_data has not been called (for training and prediction tasks).
Examples
import tempfile
import warnings
import numpy as np
import pandas as pd
from spotforecast2_safe.multitask.multi import MultiTask
from spotforecast2_safe.configurator.config_multi import ConfigMulti
rng = np.random.default_rng(0 )
idx = pd.date_range("2023-01-01" , periods= 24 * 14 , freq= "h" , tz= "UTC" )
df = pd.DataFrame({"a" : rng.normal(100 , 10 , len (idx))}, index= idx)
df.index.name = "DateTime"
with tempfile.TemporaryDirectory() as tmp:
cfg = ConfigMulti(
predict_size= 6 ,
use_exogenous_features= False ,
use_outlier_detection= False ,
auto_save_models= False ,
number_folds= 2 ,
cache_home= tmp,
)
with warnings.catch_warnings():
warnings.simplefilter("ignore" )
mt = MultiTask(cfg, dataframe= df, task= "lazy" )
mt.prepare_data().detect_outliers().impute().build_exogenous_features()
result = mt.run(task= "lazy" )
assert "future_pred" in result
assert result["future_pred" ].shape == (6 ,)
print (f"Dispatched to 'lazy'; horizon: { result['future_pred' ]. shape[0 ]} h" )
# run() rejects unknown tasks and auto-tuning tasks
with tempfile.TemporaryDirectory() as tmp:
cfg2 = ConfigMulti(
predict_size= 6 ,
use_exogenous_features= False ,
use_outlier_detection= False ,
auto_save_models= False ,
cache_home= tmp,
)
mt2 = MultiTask(cfg2, task= "lazy" )
try :
mt2.run(task= "optuna" )
except ValueError as exc:
print (f"ValueError: { str (exc)[:60 ]} " )
Dispatched to 'lazy'; horizon: 6 h
ValueError: Task 'optuna' requires auto-tuning, which is not available i
run_task_clean
multitask.multi.MultiTask.run_task_clean(
show= False ,
dry_run= False ,
cache_home= None ,
)
Remove all cached data from the pipeline cache directory.
Does not require prepare_data() to be called first.
Parameters
show
bool
Accepted for API consistency. Not used by the clean task.
False
dry_run
bool
If True, report what would be deleted without actually removing anything.
False
cache_home
Optional [Path ]
Override the directory to clean. None uses the cache directory configured on this instance.
None
Returns
Dict [str , Any ]
Dict with keys status, cache_dir, and deleted_items.
Examples
import tempfile
from pathlib import Path
from spotforecast2_safe.multitask.multi import MultiTask
from spotforecast2_safe.configurator.config_multi import ConfigMulti
with tempfile.TemporaryDirectory() as tmp:
cache = Path(tmp) / "sf2_cache"
cache.mkdir()
(cache / "models" ).mkdir()
(cache / "tuning_results" ).mkdir()
cfg = ConfigMulti(cache_home= cache)
mt = MultiTask(cfg, task= "clean" )
result = mt.run_task_clean(dry_run= True )
assert result["status" ] == "dry_run"
assert "models" in result["deleted_items" ]
print (f"Status: { result['status' ]} " )
print (f"Would remove: { sorted (result['deleted_items' ])} " )
[clean] Dry run — would delete: /tmp/tmptzu1tk4f/sf2_cache
Would remove: logging
Would remove: models
Would remove: tuning_results
Status: dry_run
Would remove: ['logging', 'models', 'tuning_results']
run_task_defaults
multitask.multi.MultiTask.run_task_defaults(show= False )
Defaults fitting — no tuning, no cached params.
Distinct from run_task_lazy only in that it never consults the tuning-result cache. Use this for deterministic baselines and for ENTSO-E “Approach 2: Training without Tuning”.
Parameters
show
bool
If True, invoke the visualisation hooks.
False
Returns
Dict [str , Any ]
Aggregated prediction package. Per-target results in
Dict [str , Any ]
self.results["defaults"].
Examples
import tempfile
import warnings
import numpy as np
import pandas as pd
from spotforecast2_safe.multitask.multi import MultiTask
from spotforecast2_safe.configurator.config_multi import ConfigMulti
rng = np.random.default_rng(0 )
idx = pd.date_range("2023-01-01" , periods= 24 * 14 , freq= "h" , tz= "UTC" )
df = pd.DataFrame({"a" : rng.normal(100 , 10 , len (idx))}, index= idx)
df.index.name = "DateTime"
with tempfile.TemporaryDirectory() as tmp:
cfg = ConfigMulti(
predict_size= 6 ,
use_exogenous_features= False ,
use_outlier_detection= False ,
auto_save_models= False ,
number_folds= 2 ,
cache_home= tmp,
)
with warnings.catch_warnings():
warnings.simplefilter("ignore" )
mt = MultiTask(cfg, dataframe= df, task= "defaults" )
mt.prepare_data().detect_outliers().impute().build_exogenous_features()
result = mt.run_task_defaults()
assert "future_pred" in result
assert result["future_pred" ].shape == (6 ,)
print (f"Prediction horizon: { result['future_pred' ]. shape[0 ]} hours" )
print (f"validation_passed: { result['validation_passed' ]} " )
Prediction horizon: 6 hours
validation_passed: True
run_task_lazy
multitask.multi.MultiTask.run_task_lazy(show= False )
Lazy Fitting with default LightGBM parameters.
Parameters
show
bool
If True, invoke the visualisation hooks.
False
Returns
Dict [str , Any ]
Aggregated prediction package. Per-target results in
Dict [str , Any ]
self.results["lazy"].
Examples
import tempfile
import warnings
import numpy as np
import pandas as pd
from spotforecast2_safe.multitask.multi import MultiTask
from spotforecast2_safe.configurator.config_multi import ConfigMulti
rng = np.random.default_rng(0 )
idx = pd.date_range("2023-01-01" , periods= 24 * 14 , freq= "h" , tz= "UTC" )
df = pd.DataFrame({"a" : rng.normal(100 , 10 , len (idx))}, index= idx)
df.index.name = "DateTime"
with tempfile.TemporaryDirectory() as tmp:
cfg = ConfigMulti(
predict_size= 6 ,
use_exogenous_features= False ,
use_outlier_detection= False ,
auto_save_models= False ,
number_folds= 2 ,
cache_home= tmp,
)
with warnings.catch_warnings():
warnings.simplefilter("ignore" )
mt = MultiTask(cfg, dataframe= df)
mt.prepare_data().detect_outliers().impute().build_exogenous_features()
result = mt.run_task_lazy()
assert "future_pred" in result
assert result["future_pred" ].shape == (6 ,)
print (f"Prediction horizon: { result['future_pred' ]. shape[0 ]} hours" )
print (f"Result keys: { sorted (k for k in result if k != 'forecaster' )} " )
Prediction horizon: 6 hours
Result keys: ['future_actual', 'future_pred', 'metrics_future', 'metrics_future_one_day', 'metrics_train', 'train_actual', 'train_pred', 'validation_passed']
run_task_predict
multitask.multi.MultiTask.run_task_predict(
show= False ,
task_name= None ,
max_age_days= None ,
)
Predict-only using previously saved models.
Loads fitted models from the cache directory and produces predictions without any training. Raises RuntimeError if no saved models are found.
Parameters
show
bool
If True, invoke the visualisation hooks.
False
task_name
Optional [str ]
Restrict model loading to a specific source task ("lazy", "defaults", "optuna", or "spotoptim"). None loads the most recent model regardless of source.
None
max_age_days
Optional [float ]
Maximum age in days for saved models. None accepts any age.
None
Returns
Dict [str , Any ]
Aggregated prediction package. Per-target results in
Dict [str , Any ]
self.results["predict"].
Examples
import tempfile
import warnings
import numpy as np
import pandas as pd
from spotforecast2_safe.multitask.multi import MultiTask
from spotforecast2_safe.configurator.config_multi import ConfigMulti
rng = np.random.default_rng(0 )
idx = pd.date_range("2023-01-01" , periods= 24 * 14 , freq= "h" , tz= "UTC" )
df = pd.DataFrame({"a" : rng.normal(100 , 10 , len (idx))}, index= idx)
df.index.name = "DateTime"
with tempfile.TemporaryDirectory() as tmp:
cfg = ConfigMulti(
predict_size= 6 ,
use_exogenous_features= False ,
use_outlier_detection= False ,
auto_save_models= True ,
number_folds= 2 ,
cache_home= tmp,
)
with warnings.catch_warnings():
warnings.simplefilter("ignore" )
# Train and persist models first.
train_mt = MultiTask(cfg, dataframe= df, task= "lazy" )
train_mt.prepare_data().detect_outliers().impute().build_exogenous_features()
train_mt.run_task_lazy()
# Load saved models and predict without re-training.
pred_mt = MultiTask(cfg, dataframe= df, task= "predict" )
pred_mt.prepare_data().detect_outliers().impute().build_exogenous_features()
result = pred_mt.run_task_predict()
assert "future_pred" in result
assert result["future_pred" ].shape == (6 ,)
print (f"Prediction horizon: { result['future_pred' ]. shape[0 ]} hours" )
Prediction horizon: 6 hours