Skip to content

hyperriver

HyperRiver

Hyperparameter Tuning for River.

Source code in spotRiver/fun/hyperriver.py
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
class HyperRiver:
    """
    Hyperparameter Tuning for River.
    """

    def __init__(self, weights=np.array([1, 0, 0]), seed=126, log_level=50) -> None:
        """Initialize the class.

        Args:
            weights (np.array):
                An array of weights for error, r_time, and memory.
                Defaults to [1, 0, 0], which considers only the error.
            seed (int):
                seed. Defaults to 126.
            log_level (int):
                The level of logging to use. 0 = no logging, 50 = print only important information.
                Defaults to 50.

        Returns:
            (NoneType): None

        Examples:
            >>> from spotRiver.fun.hyperriver import HyperRiver
            >>> import pandas as pd
            >>> hr = HyperRiver(weights=[1, 2, 3])
            >>> df_eval = pd.DataFrame( [[1, 2, 3], [3, 4, 5]], columns=['Metric', 'CompTime (s)', 'Memory (MB)'])
            >>> hr.compute_y(df_eval)
                20.0
        """
        self.seed = seed
        self.rng = default_rng(seed=self.seed)
        self.fun_control = {
            "seed": None,
            "data": None,
            "step": 10_000,
            "horizon": None,
            "grace_period": None,
            "metric_river": None,
            "metric_sklearn": mean_absolute_error,
            "weights": weights,
            "log_level": log_level,
            "var_name": [],
            "var_type": [],
            "prep_model": None,
        }
        self.log_level = self.fun_control["log_level"]
        logger.setLevel(self.log_level)
        logger.info(f"Starting the logger at level {self.log_level} for module {__name__}:")

    def compute_y(self, df_eval):
        """Compute the objective function value as a weighted sum of
            the errors, running time, and memory usage.

        Args:
            df_eval (pd.DataFrame):
                DataFrame with the evaluation results. Columns must have the following names:
                - "Metric": The evaluation metric.
                - "CompTime (s)": The running time.
                - "Memory (MB)": The memory usage.

        Returns:
            (float): objective function value. Weighted mean of the errors, running time, and memory usage.

        Examples:
            >>> from spotRiver.fun.hyperriver import HyperRiver
                hr = HyperRiver()
                # set the weights
                hr.fun_control["weights"] = [1, 1, 1]
                df_eval = pd.DataFrame( [[1, 2, 3], [3, 4, 5]], columns=['Metric', 'CompTime (s)', 'Memory (MB)'])
                hr.compute_y(df_eval)
        """
        # take the mean of the MAEs/ACCs of the predicted values and ignore the NaN values
        df_eval = df_eval.dropna()
        y_error = df_eval["Metric"].mean()
        logger.debug("y_error in compute_y: %s", y_error)
        y_r_time = df_eval["CompTime (s)"].mean()
        logger.debug("y_r_time in compute_y: %s", y_r_time)
        y_memory = df_eval["Memory (MB)"].mean()
        logger.debug("y_memory in compute_y: %s", y_memory)
        weights = self.fun_control["weights"]
        logger.debug("weights in compute_y: %s", weights)
        y = weights[0] * y_error + weights[1] * y_r_time + weights[2] * y_memory
        logger.debug("weighted res in compute_y: %s", y)
        return y

    def check_X_shape(self, X):
        """
        Check the shape of X.

        Args:
            X (np.ndarray): The input data.

        Returns:
            (NoneType): None

        Examples:
            >>> X = np.array([[1, 2, 3], [4, 5, 6]])
            >>> check_X_shape(X)
            >>> X = np.array([1, 2, 3])
            >>> check_X_shape(X)
            Traceback (most recent call last):
            ...
            Exception

        """
        try:
            X.shape[1]
        except ValueError:
            X = np.array([X])
        if X.shape[1] != len(self.fun_control["var_name"]):
            raise Exception

    def evaluate_model(self, model: object, fun_control: dict) -> Tuple[pd.DataFrame, pd.DataFrame]:
        """
        Evaluates a model using the eval_oml_horizon function from
        spotRiver.evaluation.eval_bml.

        Args:
            model (object): The model to be evaluated.
            fun_control (dict): A dictionary containing the following keys:
                - train (pd.DataFrame): The training data.
                - test (pd.DataFrame): The testing data.
                - target_column (str): The name of the target column.
                - horizon (int): The horizon value.
                - oml_grace_period (int): The oml_grace_period value.
                - metric_sklearn (str): The metric to be used for evaluation.

        Returns:
            (Tuple[pd.DataFrame, pd.DataFrame]): A tuple containing two dataframes:
                - df_eval: The evaluation dataframe.
                - df_preds: The predictions dataframe.

        Examples:
            >>> model = SomeModel()
            >>> fun_control = {
            ...     "train": train_data,
            ...     "test": test_data,
            ...     "target_column": "target",
            ...     "horizon": 5,
            ...     "oml_grace_period": 10,
            ...     "metric_sklearn": "accuracy"
            ... }
            >>> df_eval, df_preds = evaluate_model(model, fun_control)
        """
        try:
            df_eval, df_preds = eval_oml_horizon(
                model=model,
                train=fun_control["train"],
                test=fun_control["test"],
                target_column=fun_control["target_column"],
                horizon=fun_control["horizon"],
                oml_grace_period=fun_control["oml_grace_period"],
                metric=fun_control["metric_sklearn"],
            )
        except Exception as err:
            print(f"Error in fun_oml_horizon(). Call to eval_oml_horizon failed. {err=}, {type(err)=}")
        return df_eval, df_preds

    def get_river_df_eval_preds(self, model):
        """Get the evaluation and prediction dataframes for a river model.

        Args:
            model (object): The model to be evaluated.

        Returns:
            (Tuple[pd.DataFrame, pd.DataFrame]): A tuple containing two dataframes:
                - df_eval: The evaluation dataframe.
                - df_preds: The predictions dataframe.

        Examples:
            >>> model = SomeModel()
            >>> df_eval, df_preds = get_river_df_eval_preds(model)
        """
        try:
            df_eval, df_preds = self.evaluate_model(model, self.fun_control)
        except Exception as err:
            print(f"Error in get_river_df_eval_preds(). Call to evaluate_model failed. {err=}, {type(err)=}")
            print("Setting df_eval and df.preds to np.nan")
            df_eval = np.nan
            df_preds = np.nan
        return df_eval, df_preds

    def fun_oml_horizon(self, X: np.ndarray, fun_control: Optional[Dict[str, Any]] = None) -> np.ndarray:
        """
        The objective function for hyperparameter tuning. Prepares the data and calls the evaluate_model function.

        This function takes in input data and a dictionary of control parameters to compute the objective function values for hyperparameter tuning.

        Args:
            X (np.ndarray): The input data.
            fun_control (dict, optional): A dictionary containing the following keys:
                - train (pd.DataFrame): The training data.
                - test (pd.DataFrame): The testing data.
                - target_column (str): The name of the target column.
                - horizon (int): The horizon value.
                - oml_grace_period (int): The oml_grace_period value.
                - metric_sklearn (str): The metric to be used for evaluation.

        Returns:
            (np.ndarray): The objective function values.

        Examples:
            >>> fun_oml_horizon(X,
                                fun_control={'train': train_data,
                                             'test': test_data,
                                              'target_column': 'y',
                                              'horizon': 5,
                                              'oml_grace_period': 10,
                                              'metric_sklearn': 'accuracy'})
            array([0.8, 0.85, 0.9])
        """
        logger.debug("X from fun_oml_horizon: %s", X)
        logger.debug("fun_control from fun_oml_horizon: %s", fun_control)
        # List of objective function values, filled with append below
        # List is required, if several configurations are evaluated, e.g.,
        # from the initial design
        z_res = []
        self.fun_control.update(fun_control)
        self.check_X_shape(X)
        var_dict = assign_values(X, self.fun_control["var_name"])
        for config in generate_one_config_from_var_dict(var_dict, self.fun_control):
            logger.debug("config from fun_oml_horizon: %s", config)
            if self.fun_control["prep_model"] is not None:
                model = compose.Pipeline(self.fun_control["prep_model"], self.fun_control["core_model"](**config))
            else:
                model = self.fun_control["core_model"](**config)
            try:
                df_eval, _ = self.evaluate_model(model, self.fun_control)
                y = self.compute_y(df_eval)
            except Exception as err:
                y = np.nan
                print(f"Error in fun_oml_horizon(). Call to evaluate or compute_y failed. {err=}, {type(err)=}")
                print("Setting y to np.nan.")
            # Changed in v0.2.21:
            # Score is not divided by the number of samples
            # z_res.append(y / self.fun_control["n_samples"])
            z_res.append(y)
        return np.array(z_res)

__init__(weights=np.array([1, 0, 0]), seed=126, log_level=50)

Initialize the class.

Parameters:

Name Type Description Default
weights array

An array of weights for error, r_time, and memory. Defaults to [1, 0, 0], which considers only the error.

array([1, 0, 0])
seed int

seed. Defaults to 126.

126
log_level int

The level of logging to use. 0 = no logging, 50 = print only important information. Defaults to 50.

50

Returns:

Type Description
NoneType

None

Examples:

>>> from spotRiver.fun.hyperriver import HyperRiver
>>> import pandas as pd
>>> hr = HyperRiver(weights=[1, 2, 3])
>>> df_eval = pd.DataFrame( [[1, 2, 3], [3, 4, 5]], columns=['Metric', 'CompTime (s)', 'Memory (MB)'])
>>> hr.compute_y(df_eval)
    20.0
Source code in spotRiver/fun/hyperriver.py
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
def __init__(self, weights=np.array([1, 0, 0]), seed=126, log_level=50) -> None:
    """Initialize the class.

    Args:
        weights (np.array):
            An array of weights for error, r_time, and memory.
            Defaults to [1, 0, 0], which considers only the error.
        seed (int):
            seed. Defaults to 126.
        log_level (int):
            The level of logging to use. 0 = no logging, 50 = print only important information.
            Defaults to 50.

    Returns:
        (NoneType): None

    Examples:
        >>> from spotRiver.fun.hyperriver import HyperRiver
        >>> import pandas as pd
        >>> hr = HyperRiver(weights=[1, 2, 3])
        >>> df_eval = pd.DataFrame( [[1, 2, 3], [3, 4, 5]], columns=['Metric', 'CompTime (s)', 'Memory (MB)'])
        >>> hr.compute_y(df_eval)
            20.0
    """
    self.seed = seed
    self.rng = default_rng(seed=self.seed)
    self.fun_control = {
        "seed": None,
        "data": None,
        "step": 10_000,
        "horizon": None,
        "grace_period": None,
        "metric_river": None,
        "metric_sklearn": mean_absolute_error,
        "weights": weights,
        "log_level": log_level,
        "var_name": [],
        "var_type": [],
        "prep_model": None,
    }
    self.log_level = self.fun_control["log_level"]
    logger.setLevel(self.log_level)
    logger.info(f"Starting the logger at level {self.log_level} for module {__name__}:")

check_X_shape(X)

Check the shape of X.

Parameters:

Name Type Description Default
X ndarray

The input data.

required

Returns:

Type Description
NoneType

None

Examples:

>>> X = np.array([[1, 2, 3], [4, 5, 6]])
>>> check_X_shape(X)
>>> X = np.array([1, 2, 3])
>>> check_X_shape(X)
Traceback (most recent call last):
...
Exception
Source code in spotRiver/fun/hyperriver.py
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
def check_X_shape(self, X):
    """
    Check the shape of X.

    Args:
        X (np.ndarray): The input data.

    Returns:
        (NoneType): None

    Examples:
        >>> X = np.array([[1, 2, 3], [4, 5, 6]])
        >>> check_X_shape(X)
        >>> X = np.array([1, 2, 3])
        >>> check_X_shape(X)
        Traceback (most recent call last):
        ...
        Exception

    """
    try:
        X.shape[1]
    except ValueError:
        X = np.array([X])
    if X.shape[1] != len(self.fun_control["var_name"]):
        raise Exception

compute_y(df_eval)

Compute the objective function value as a weighted sum of the errors, running time, and memory usage.

Parameters:

Name Type Description Default
df_eval DataFrame

DataFrame with the evaluation results. Columns must have the following names: - “Metric”: The evaluation metric. - “CompTime (s)”: The running time. - “Memory (MB)”: The memory usage.

required

Returns:

Type Description
float

objective function value. Weighted mean of the errors, running time, and memory usage.

Examples:

>>> from spotRiver.fun.hyperriver import HyperRiver
    hr = HyperRiver()
    # set the weights
    hr.fun_control["weights"] = [1, 1, 1]
    df_eval = pd.DataFrame( [[1, 2, 3], [3, 4, 5]], columns=['Metric', 'CompTime (s)', 'Memory (MB)'])
    hr.compute_y(df_eval)
Source code in spotRiver/fun/hyperriver.py
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
def compute_y(self, df_eval):
    """Compute the objective function value as a weighted sum of
        the errors, running time, and memory usage.

    Args:
        df_eval (pd.DataFrame):
            DataFrame with the evaluation results. Columns must have the following names:
            - "Metric": The evaluation metric.
            - "CompTime (s)": The running time.
            - "Memory (MB)": The memory usage.

    Returns:
        (float): objective function value. Weighted mean of the errors, running time, and memory usage.

    Examples:
        >>> from spotRiver.fun.hyperriver import HyperRiver
            hr = HyperRiver()
            # set the weights
            hr.fun_control["weights"] = [1, 1, 1]
            df_eval = pd.DataFrame( [[1, 2, 3], [3, 4, 5]], columns=['Metric', 'CompTime (s)', 'Memory (MB)'])
            hr.compute_y(df_eval)
    """
    # take the mean of the MAEs/ACCs of the predicted values and ignore the NaN values
    df_eval = df_eval.dropna()
    y_error = df_eval["Metric"].mean()
    logger.debug("y_error in compute_y: %s", y_error)
    y_r_time = df_eval["CompTime (s)"].mean()
    logger.debug("y_r_time in compute_y: %s", y_r_time)
    y_memory = df_eval["Memory (MB)"].mean()
    logger.debug("y_memory in compute_y: %s", y_memory)
    weights = self.fun_control["weights"]
    logger.debug("weights in compute_y: %s", weights)
    y = weights[0] * y_error + weights[1] * y_r_time + weights[2] * y_memory
    logger.debug("weighted res in compute_y: %s", y)
    return y

evaluate_model(model, fun_control)

Evaluates a model using the eval_oml_horizon function from spotRiver.evaluation.eval_bml.

Parameters:

Name Type Description Default
model object

The model to be evaluated.

required
fun_control dict

A dictionary containing the following keys: - train (pd.DataFrame): The training data. - test (pd.DataFrame): The testing data. - target_column (str): The name of the target column. - horizon (int): The horizon value. - oml_grace_period (int): The oml_grace_period value. - metric_sklearn (str): The metric to be used for evaluation.

required

Returns:

Type Description
Tuple[DataFrame, DataFrame]

A tuple containing two dataframes: - df_eval: The evaluation dataframe. - df_preds: The predictions dataframe.

Examples:

>>> model = SomeModel()
>>> fun_control = {
...     "train": train_data,
...     "test": test_data,
...     "target_column": "target",
...     "horizon": 5,
...     "oml_grace_period": 10,
...     "metric_sklearn": "accuracy"
... }
>>> df_eval, df_preds = evaluate_model(model, fun_control)
Source code in spotRiver/fun/hyperriver.py
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
def evaluate_model(self, model: object, fun_control: dict) -> Tuple[pd.DataFrame, pd.DataFrame]:
    """
    Evaluates a model using the eval_oml_horizon function from
    spotRiver.evaluation.eval_bml.

    Args:
        model (object): The model to be evaluated.
        fun_control (dict): A dictionary containing the following keys:
            - train (pd.DataFrame): The training data.
            - test (pd.DataFrame): The testing data.
            - target_column (str): The name of the target column.
            - horizon (int): The horizon value.
            - oml_grace_period (int): The oml_grace_period value.
            - metric_sklearn (str): The metric to be used for evaluation.

    Returns:
        (Tuple[pd.DataFrame, pd.DataFrame]): A tuple containing two dataframes:
            - df_eval: The evaluation dataframe.
            - df_preds: The predictions dataframe.

    Examples:
        >>> model = SomeModel()
        >>> fun_control = {
        ...     "train": train_data,
        ...     "test": test_data,
        ...     "target_column": "target",
        ...     "horizon": 5,
        ...     "oml_grace_period": 10,
        ...     "metric_sklearn": "accuracy"
        ... }
        >>> df_eval, df_preds = evaluate_model(model, fun_control)
    """
    try:
        df_eval, df_preds = eval_oml_horizon(
            model=model,
            train=fun_control["train"],
            test=fun_control["test"],
            target_column=fun_control["target_column"],
            horizon=fun_control["horizon"],
            oml_grace_period=fun_control["oml_grace_period"],
            metric=fun_control["metric_sklearn"],
        )
    except Exception as err:
        print(f"Error in fun_oml_horizon(). Call to eval_oml_horizon failed. {err=}, {type(err)=}")
    return df_eval, df_preds

fun_oml_horizon(X, fun_control=None)

The objective function for hyperparameter tuning. Prepares the data and calls the evaluate_model function.

This function takes in input data and a dictionary of control parameters to compute the objective function values for hyperparameter tuning.

Parameters:

Name Type Description Default
X ndarray

The input data.

required
fun_control dict

A dictionary containing the following keys: - train (pd.DataFrame): The training data. - test (pd.DataFrame): The testing data. - target_column (str): The name of the target column. - horizon (int): The horizon value. - oml_grace_period (int): The oml_grace_period value. - metric_sklearn (str): The metric to be used for evaluation.

None

Returns:

Type Description
ndarray

The objective function values.

Examples:

>>> fun_oml_horizon(X,
                    fun_control={'train': train_data,
                                 'test': test_data,
                                  'target_column': 'y',
                                  'horizon': 5,
                                  'oml_grace_period': 10,
                                  'metric_sklearn': 'accuracy'})
array([0.8, 0.85, 0.9])
Source code in spotRiver/fun/hyperriver.py
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
def fun_oml_horizon(self, X: np.ndarray, fun_control: Optional[Dict[str, Any]] = None) -> np.ndarray:
    """
    The objective function for hyperparameter tuning. Prepares the data and calls the evaluate_model function.

    This function takes in input data and a dictionary of control parameters to compute the objective function values for hyperparameter tuning.

    Args:
        X (np.ndarray): The input data.
        fun_control (dict, optional): A dictionary containing the following keys:
            - train (pd.DataFrame): The training data.
            - test (pd.DataFrame): The testing data.
            - target_column (str): The name of the target column.
            - horizon (int): The horizon value.
            - oml_grace_period (int): The oml_grace_period value.
            - metric_sklearn (str): The metric to be used for evaluation.

    Returns:
        (np.ndarray): The objective function values.

    Examples:
        >>> fun_oml_horizon(X,
                            fun_control={'train': train_data,
                                         'test': test_data,
                                          'target_column': 'y',
                                          'horizon': 5,
                                          'oml_grace_period': 10,
                                          'metric_sklearn': 'accuracy'})
        array([0.8, 0.85, 0.9])
    """
    logger.debug("X from fun_oml_horizon: %s", X)
    logger.debug("fun_control from fun_oml_horizon: %s", fun_control)
    # List of objective function values, filled with append below
    # List is required, if several configurations are evaluated, e.g.,
    # from the initial design
    z_res = []
    self.fun_control.update(fun_control)
    self.check_X_shape(X)
    var_dict = assign_values(X, self.fun_control["var_name"])
    for config in generate_one_config_from_var_dict(var_dict, self.fun_control):
        logger.debug("config from fun_oml_horizon: %s", config)
        if self.fun_control["prep_model"] is not None:
            model = compose.Pipeline(self.fun_control["prep_model"], self.fun_control["core_model"](**config))
        else:
            model = self.fun_control["core_model"](**config)
        try:
            df_eval, _ = self.evaluate_model(model, self.fun_control)
            y = self.compute_y(df_eval)
        except Exception as err:
            y = np.nan
            print(f"Error in fun_oml_horizon(). Call to evaluate or compute_y failed. {err=}, {type(err)=}")
            print("Setting y to np.nan.")
        # Changed in v0.2.21:
        # Score is not divided by the number of samples
        # z_res.append(y / self.fun_control["n_samples"])
        z_res.append(y)
    return np.array(z_res)

get_river_df_eval_preds(model)

Get the evaluation and prediction dataframes for a river model.

Parameters:

Name Type Description Default
model object

The model to be evaluated.

required

Returns:

Type Description
Tuple[DataFrame, DataFrame]

A tuple containing two dataframes: - df_eval: The evaluation dataframe. - df_preds: The predictions dataframe.

Examples:

>>> model = SomeModel()
>>> df_eval, df_preds = get_river_df_eval_preds(model)
Source code in spotRiver/fun/hyperriver.py
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
def get_river_df_eval_preds(self, model):
    """Get the evaluation and prediction dataframes for a river model.

    Args:
        model (object): The model to be evaluated.

    Returns:
        (Tuple[pd.DataFrame, pd.DataFrame]): A tuple containing two dataframes:
            - df_eval: The evaluation dataframe.
            - df_preds: The predictions dataframe.

    Examples:
        >>> model = SomeModel()
        >>> df_eval, df_preds = get_river_df_eval_preds(model)
    """
    try:
        df_eval, df_preds = self.evaluate_model(model, self.fun_control)
    except Exception as err:
        print(f"Error in get_river_df_eval_preds(). Call to evaluate_model failed. {err=}, {type(err)=}")
        print("Setting df_eval and df.preds to np.nan")
        df_eval = np.nan
        df_preds = np.nan
    return df_eval, df_preds