import numpy as np
import pandas as pd
from spotriver.evaluation.eval_oml import eval_oml_iter_progressive, plot_oml_iter_progressive
from spotriver.evaluation.eval_bml import eval_bml_horizon, eval_bml_landmark, eval_bml_window, eval_oml_horizon, plot_bml_oml_horizon_predictions, plot_bml_oml_horizon_metrics
from spotriver.utils.data_conversion import convert_to_df
from river import metrics as river_metrics, compose, feature_extraction, linear_model, preprocessing, stats
from river import stream as river_stream
from river import preprocessing as river_preprocessing
from river.datasets import synth
from river.tree import HoeffdingTreeRegressor, HoeffdingAdaptiveTreeRegressor
from sklearn.tree import DecisionTreeRegressor
from sklearn.linear_model import LinearRegression
from sklearn.datasets import make_regression
from sklearn import preprocessing as preprocessing_sklearn
from sklearn import tree as sklearn_tree
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import MinMaxScaler
from sklearn.pipeline import make_pipeline
from sklearn.metrics import mean_absolute_error
import os
if not os.path.exists('./figures'):
'./figures') os.makedirs(
spotriver Examples
Friedman Drift
Consider Global Recurring Abrupt Drift:
def _global_recurring_abrupt_gen(self, x, index: int):
if index < self._change_point1 or index >= self._change_point2:
# The initial concept is recurring
return (
10 * math.sin(math.pi * x[0] * x[1]) + 20 * (x[2] - 0.5) ** 2 + 10 * x[3] + 5 * x[4]
)else:
# Drift: the positions of the features are swapped
return (
10 * math.sin(math.pi * x[3] * x[5]) + 20 * (x[1] - 0.5) ** 2 + 10 * x[0] + 5 * x[2]
)
Metric and Horizon
= mean_absolute_error
metric = 7*24
horizon = 10
k = int(k*100_000)
n_total = int(k*25_000)
p_1 = int(k*50_000)
p_2 =(p_1, p_2)
position= 1_000
n_train = n_train + p_1 - 12
a = a + 12 b
Data: Friedman-Drift
= synth.FriedmanDrift(
dataset ='gra',
drift_type=position,
position=123
seed
)= {key: [] for key in list(dataset.take(1))[0][0].keys()}
data_dict "y"] = []
data_dict[for x, y in dataset.take(n_total):
for key, value in x.items():
data_dict[key].append(value)"y"].append(y)
data_dict[= pd.DataFrame(data_dict)
df # Add column names x1 until x10 to the first 10 columns of the dataframe and the column name y to the last column
= [f"x{i}" for i in range(1, 11)] + ["y"] df.columns
= df[:n_train]
train = df[n_train:]
test = "y" target_column
BML: Linear Regression
= LinearRegression()
bml_lm # Add a MinMaxScaler to the pipeline
= make_pipeline(MinMaxScaler(), bml_lm) bml_lm
= eval_bml_horizon(model = bml_lm,
df_eval_bml_lm, df_true_bml_lm = train,
train = test,
test =target_column,
target_column=horizon, include_remainder=True, metric=metric) horizon
BML: Decision Tree Regressor
= DecisionTreeRegressor(random_state=0)
bml_tree # Add a MinMaxScaler to the pipeline
= make_pipeline(MinMaxScaler(), bml_tree)
bml_tree = eval_bml_horizon(model = bml_tree,
df_eval_bml_tree, df_true_bml_tree = train,
train = test,
test =target_column,
target_column=horizon, include_remainder=True, metric=metric) horizon
OML: Linear Regression
= preprocessing.StandardScaler()
oml_lm |= linear_model.LinearRegression()
oml_lm
= eval_oml_horizon(model=oml_lm, train=train, test=test, target_column="y", horizon=horizon, metric=metric) df_eval_oml_lm, df_true_oml_lm
OML: HTR
= (preprocessing.StandardScaler() | HoeffdingTreeRegressor())
htr_model = eval_oml_horizon(model=htr_model, train=train, test=test, target_column="y", horizon=horizon, oml_grace_period=100, metric=metric) df_eval_htr, df_true_htr
OML: HATR
= (preprocessing.StandardScaler() | HoeffdingAdaptiveTreeRegressor())
hatr_model = eval_oml_horizon(model=hatr_model, train=train, test=test, target_column="y", horizon=horizon, oml_grace_period=100,metric=metric) df_eval_hatr, df_true_hatr
Plot
=["bml_lm", "bml_tree", "oml_lm", "htr", "hatr"]
df_labels= [df_eval_bml_lm, df_eval_bml_tree, df_eval_oml_lm, df_eval_htr, df_eval_hatr], log_y=False, log_x=False, df_labels=df_labels, cumulative=True, metric=metric, figsize=(10, 5), filename="./figures/ch09_friedman_1_000_000_metrics.pdf")
plot_bml_oml_horizon_metrics(df_eval = [df_true_bml_lm[a:b], df_true_bml_tree[a:b], df_true_oml_lm[a:b], df_true_htr[a:b], df_true_hatr[a:b]], target_column="y", df_labels=df_labels, filename="./figures/ch09_friedman_1_000_000_predictions.pdf") plot_bml_oml_horizon_predictions(df_true
Further Examples
Examples can be found in the Hyperparameter Tuning Cookbook, e.g., Documentation of the Sequential Parameter Optimization.