Skip to content

effects

plot_all_partial_dependence(df, df_target, model='GradientBoostingRegressor', nrows=5, ncols=6, figsize=(20, 15))

Generates Partial Dependence Plots (PDPs) for every feature in a DataFrame against a target variable, arranged in a grid.

Parameters:

Name Type Description Default
df DataFrame

DataFrame containing the features.

required
df_target Series

Series containing the target variable.

required
model str

Name of the model class to use (e.g., “GradientBoostingRegressor”). Defaults to “GradientBoostingRegressor”.

'GradientBoostingRegressor'
nrows int

Number of rows in the grid of subplots. Defaults to 5.

5
ncols int

Number of columns in the grid of subplots. Defaults to 6.

6
figsize tuple

Figure size (width, height) in inches. Defaults to (20, 15).

(20, 15)

Returns:

Type Description
None

None

Examples:

>>> form spotpython.utils.effects import plot_all_partial_dependence
>>> from sklearn.datasets import load_boston
>>> import pandas as pd
>>> data = load_boston()
>>> df = pd.DataFrame(data.data, columns=data.feature_names)
>>> df_target = pd.Series(data.target, name="target")
>>> plot_all_partial_dependence(df, df_target, model="GradientBoostingRegressor", nrows=5, ncols=6, figsize=(20, 15))
Source code in spotpython/utils/effects.py
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
def plot_all_partial_dependence(df, df_target, model="GradientBoostingRegressor", nrows=5, ncols=6, figsize=(20, 15)) -> None:
    """
    Generates Partial Dependence Plots (PDPs) for every feature in a DataFrame against a target variable,
    arranged in a grid.

    Args:
        df (pd.DataFrame): DataFrame containing the features.
        df_target (pd.Series): Series containing the target variable.
        model (str, optional): Name of the model class to use (e.g., "GradientBoostingRegressor").
                               Defaults to "GradientBoostingRegressor".
        nrows (int, optional): Number of rows in the grid of subplots. Defaults to 5.
        ncols (int, optional): Number of columns in the grid of subplots. Defaults to 6.
        figsize (tuple, optional): Figure size (width, height) in inches. Defaults to (20, 15).

    Returns:
        None

    Examples:
        >>> form spotpython.utils.effects import plot_all_partial_dependence
        >>> from sklearn.datasets import load_boston
        >>> import pandas as pd
        >>> data = load_boston()
        >>> df = pd.DataFrame(data.data, columns=data.feature_names)
        >>> df_target = pd.Series(data.target, name="target")
        >>> plot_all_partial_dependence(df, df_target, model="GradientBoostingRegressor", nrows=5, ncols=6, figsize=(20, 15))

    """

    # Separate features and target
    X = df
    y = df_target  # Target variable is now a Series

    # Split data
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

    # Instantiate the model
    if model == "GradientBoostingRegressor":
        gb_model = GradientBoostingRegressor(random_state=42)
    elif model == "RandomForestRegressor":
        from sklearn.ensemble import RandomForestRegressor

        gb_model = RandomForestRegressor(random_state=42)
    elif model == "DecisionTreeRegressor":
        from sklearn.tree import DecisionTreeRegressor

        gb_model = DecisionTreeRegressor(random_state=42)
    else:
        raise ValueError(f"Unsupported model: {model}")

    # Train model
    gb_model.fit(X_train, y_train)

    # Create subplots
    fig, axes = plt.subplots(nrows=nrows, ncols=ncols, figsize=figsize)
    axes = axes.flatten()  # Flatten the 2D array of axes for easy iteration

    # Generate PDP for each feature
    features = X.columns
    for i, feature in enumerate(features):
        ax = axes[i]  # Select the axis for the current feature
        PartialDependenceDisplay.from_estimator(gb_model, X_train, [feature], ax=ax)
        ax.set_title(feature)  # Set the title of the subplot to the feature name

    # Remove empty subplots if the number of features is less than nrows * ncols
    for i in range(len(features), nrows * ncols):
        fig.delaxes(axes[i])

    plt.tight_layout()  # Adjust subplot parameters for a tight layout
    plt.show()

randorient(k, p, xi, seed=None)

Generates a random orientation of a sampling matrix. This function creates a random sampling matrix for a given number of dimensions (k), number of levels (p), and step length (xi). The resulting matrix is used for screening designs in the context of experimental design.

Parameters:

Name Type Description Default
k int

Number of dimensions.

required
p int

Number of levels.

required
xi float

Step length.

required
seed int

Seed for the random number generator. Defaults to None.

None

Returns:

Type Description
ndarray

np.ndarray: A random sampling matrix of shape (k+1, k).

Example

randorient(k=2, p=3, xi=0.5) array([[0. , 0. ], [0.5, 0.5], [1. , 1. ]])

Source code in spotpython/utils/effects.py
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
def randorient(k, p, xi, seed=None) -> np.ndarray:
    """Generates a random orientation of a sampling matrix.
    This function creates a random sampling matrix for a given number of
    dimensions (k), number of levels (p), and step length (xi). The
    resulting matrix is used for screening designs in the context of
    experimental design.

    Args:
        k (int): Number of dimensions.
        p (int): Number of levels.
        xi (float): Step length.
        seed (int, optional): Seed for the random number generator.
            Defaults to None.

    Returns:
        np.ndarray: A random sampling matrix of shape (k+1, k).

    Example:
        >>> randorient(k=2, p=3, xi=0.5)
        array([[0. , 0. ],
               [0.5, 0.5],
               [1. , 1. ]])
    """
    # Initialize random number generator with the provided seed
    if seed is not None:
        rng = np.random.default_rng(seed)
    else:
        rng = np.random.default_rng()

    # Step length
    Delta = xi / (p - 1)

    m = k + 1

    # A truncated p-level grid in one dimension
    xs = np.arange(0, 1 - Delta, 1 / (p - 1))
    xsl = len(xs)
    if xsl < 1:
        print(f"xi = {xi}.")
        print(f"p = {p}.")
        print(f"Delta = {Delta}.")
        print(f"p - 1 = {p - 1}.")
        raise ValueError(f"The number of levels xsl is {xsl}, but it must be greater than 0.")

    # Basic sampling matrix
    B = np.vstack((np.zeros((1, k)), np.tril(np.ones((k, k)))))

    # Randomization

    # Matrix with +1s and -1s on the diagonal with equal probability
    Dstar = np.diag(2 * rng.integers(0, 2, size=k) - 1)

    # Random base value
    xstar = xs[rng.integers(0, xsl, size=k)]

    # Permutation matrix
    Pstar = np.zeros((k, k))
    rp = rng.permutation(k)
    for i in range(k):
        Pstar[i, rp[i]] = 1

    # A random orientation of the sampling matrix
    Bstar = (np.ones((m, 1)) @ xstar.reshape(1, -1) + (Delta / 2) * ((2 * B - np.ones((m, k))) @ Dstar + np.ones((m, k)))) @ Pstar

    return Bstar

screening_plot(X, fun, xi, p, labels, bounds=None, show=True)

Generates a plot with elementary effect screening metrics.

This function calculates the mean and standard deviation of the elementary effects for a given set of design variables and plots the results.

Parameters:

Name Type Description Default
X ndarray

The screening plan matrix, typically structured within a [0,1]^k box.

required
fun object

The objective function to evaluate at each design point in the screening plan.

required
xi float

The elementary effect step length factor.

required
p int

Number of discrete levels along each dimension.

required
labels list of str

A list of variable names corresponding to the design variables.

required
bounds ndarray

A 2xk matrix where the first row contains lower bounds and the second row contains upper bounds for each variable.

None
show bool

If True, the plot is displayed. Defaults to True.

True

Returns:

Name Type Description
None None

The function generates a plot of the results.

Examples:

>>> import numpy as np
    from spotpython.utils.effects import screening, screeningplan
    from spotpython.fun.objectivefunctions import Analytical
    fun = Analytical()
    k = 10
    p = 10
    xi = 1
    r = 25
    X = screeningplan(k=k, p=p, xi=xi, r=r)  # shape (r x (k+1), k)
    # Provide real-world bounds from the wing weight docs (2 x 10).
    value_range = np.array([
        [150, 220,   6, -10, 16, 0.5, 0.08, 2.5, 1700, 0.025],
        [200, 300,  10,  10, 45, 1.0, 0.18, 6.0, 2500, 0.08 ],
    ])
    labels = [
        "S_W", "W_fw", "A", "Lambda",
        "q",   "lambda", "tc", "N_z",
        "W_dg", "W_p"
    ]
    screening(
        X=X,
        fun=fun.fun_wingwt,
        bounds=value_range,
        xi=xi,
        p=p,
        labels=labels,
        print=False,
    )
Source code in spotpython/utils/effects.py
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
def screening_plot(X, fun, xi, p, labels, bounds=None, show=True) -> None:
    """Generates a plot with elementary effect screening metrics.

    This function calculates the mean and standard deviation of the
    elementary effects for a given set of design variables and plots
    the results.

    Args:
        X (np.ndarray):
            The screening plan matrix, typically structured within a [0,1]^k box.
        fun (object):
            The objective function to evaluate at each design point in the screening plan.
        xi (float):
            The elementary effect step length factor.
        p (int):
            Number of discrete levels along each dimension.
        labels (list of str):
            A list of variable names corresponding to the design variables.
        bounds (np.ndarray):
            A 2xk matrix where the first row contains lower bounds and
            the second row contains upper bounds for each variable.
        show (bool):
            If True, the plot is displayed. Defaults to True.

    Returns:
        None: The function generates a plot of the results.

    Examples:
        >>> import numpy as np
            from spotpython.utils.effects import screening, screeningplan
            from spotpython.fun.objectivefunctions import Analytical
            fun = Analytical()
            k = 10
            p = 10
            xi = 1
            r = 25
            X = screeningplan(k=k, p=p, xi=xi, r=r)  # shape (r x (k+1), k)
            # Provide real-world bounds from the wing weight docs (2 x 10).
            value_range = np.array([
                [150, 220,   6, -10, 16, 0.5, 0.08, 2.5, 1700, 0.025],
                [200, 300,  10,  10, 45, 1.0, 0.18, 6.0, 2500, 0.08 ],
            ])
            labels = [
                "S_W", "W_fw", "A", "Lambda",
                "q",   "lambda", "tc", "N_z",
                "W_dg", "W_p"
            ]
            screening(
                X=X,
                fun=fun.fun_wingwt,
                bounds=value_range,
                xi=xi,
                p=p,
                labels=labels,
                print=False,
            )
    """
    k = X.shape[1]
    sm, ssd = _screening(X=X, fun=fun, xi=xi, p=p, labels=labels, bounds=bounds)
    plt.figure()
    for i in range(k):
        plt.text(sm[i], ssd[i], labels[i], fontsize=10)
    plt.axis([min(sm), 1.1 * max(sm), min(ssd), 1.1 * max(ssd)])
    plt.xlabel("Sample means")
    plt.ylabel("Sample standard deviations")
    plt.gca().tick_params(labelsize=10)
    plt.grid(True)
    if show:
        plt.show()

screening_print(X, fun, xi, p, labels, bounds=None)

Generates a DataFrame with elementary effect screening metrics.

This function calculates the mean and standard deviation of the elementary effects for a given set of design variables and returns the results as a Pandas DataFrame.

Parameters:

Name Type Description Default
X ndarray

The screening plan matrix, typically structured within a [0,1]^k box.

required
fun object

The objective function to evaluate at each design point in the screening plan.

required
xi float

The elementary effect step length factor.

required
p int

Number of discrete levels along each dimension.

required
labels list of str

A list of variable names corresponding to the design variables.

required
bounds ndarray

A 2xk matrix where the first row contains lower bounds and the second row contains upper bounds for each variable.

None

Returns:

Type Description
DataFrame

pd.DataFrame: A DataFrame containing three columns: - ‘varname’: The name of each variable. - ‘mean’: The mean of the elementary effects for each variable. - ‘sd’: The standard deviation of the elementary effects for each variable.

DataFrame

or None: If print is set to False, a plot of the results is generated instead of returning a DataFrame.

Examples:

>>> import numpy as np
    from spotpython.utils.effects import screening, screeningplan
    from spotpython.fun.objectivefunctions import Analytical
    fun = Analytical()
    k = 10
    p = 10
    xi = 1
    r = 25
    X = screeningplan(k=k, p=p, xi=xi, r=r)  # shape (r x (k+1), k)
    # Provide real-world bounds from the wing weight docs (2 x 10).
    value_range = np.array([
        [150, 220,   6, -10, 16, 0.5, 0.08, 2.5, 1700, 0.025],
        [200, 300,  10,  10, 45, 1.0, 0.18, 6.0, 2500, 0.08 ],
    ])
    labels = [
        "S_W", "W_fw", "A", "Lambda",
        "q",   "lambda", "tc", "N_z",
        "W_dg", "W_p"
    ]
    screening(
        X=X,
        fun=fun.fun_wingwt,
        bounds=value_range,
        xi=xi,
        p=p,
        labels=labels,
        print=False,
    )
Source code in spotpython/utils/effects.py
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
def screening_print(X, fun, xi, p, labels, bounds=None) -> pd.DataFrame:
    """Generates a DataFrame with elementary effect screening metrics.

    This function calculates the mean and standard deviation of the
    elementary effects for a given set of design variables and returns
    the results as a Pandas DataFrame.

    Args:
        X (np.ndarray): The screening plan matrix, typically structured
            within a [0,1]^k box.
        fun (object): The objective function to evaluate at each
            design point in the screening plan.
        xi (float): The elementary effect step length factor.
        p (int): Number of discrete levels along each dimension.
        labels (list of str): A list of variable names corresponding to
            the design variables.
        bounds (np.ndarray): A 2xk matrix where the first row contains
            lower bounds and the second row contains upper bounds for
            each variable.

    Returns:
        pd.DataFrame: A DataFrame containing three columns:
            - 'varname': The name of each variable.
            - 'mean': The mean of the elementary effects for each variable.
            - 'sd': The standard deviation of the elementary effects for
            each variable.
        or None: If print is set to False, a plot of the results is
            generated instead of returning a DataFrame.

    Examples:
        >>> import numpy as np
            from spotpython.utils.effects import screening, screeningplan
            from spotpython.fun.objectivefunctions import Analytical
            fun = Analytical()
            k = 10
            p = 10
            xi = 1
            r = 25
            X = screeningplan(k=k, p=p, xi=xi, r=r)  # shape (r x (k+1), k)
            # Provide real-world bounds from the wing weight docs (2 x 10).
            value_range = np.array([
                [150, 220,   6, -10, 16, 0.5, 0.08, 2.5, 1700, 0.025],
                [200, 300,  10,  10, 45, 1.0, 0.18, 6.0, 2500, 0.08 ],
            ])
            labels = [
                "S_W", "W_fw", "A", "Lambda",
                "q",   "lambda", "tc", "N_z",
                "W_dg", "W_p"
            ]
            screening(
                X=X,
                fun=fun.fun_wingwt,
                bounds=value_range,
                xi=xi,
                p=p,
                labels=labels,
                print=False,
            )
    """
    sm, ssd = _screening(X=X, fun=fun, xi=xi, p=p, labels=labels, bounds=bounds)
    idx = np.argsort(-np.abs(sm))
    sorted_labels = [labels[i] for i in idx]
    sm = sm[idx]
    ssd = ssd[idx]
    df = pd.DataFrame({"varname": sorted_labels, "mean": sm, "sd": ssd})
    return df