32  Explainable AI with SpotPython and Pytorch

from spotpython.data.diabetes import Diabetes
from spotpython.hyperdict.light_hyper_dict import LightHyperDict
from spotpython.fun.hyperlight import HyperLight
from spotpython.utils.init import (fun_control_init, surrogate_control_init, design_control_init)
from spotpython.utils.eda import gen_design_table
from spotpython.spot import spot
from spotpython.utils.file import get_experiment_filename
from spotpython.hyperparameters.values import set_hyperparameter
from math import inf

PREFIX="602_12"

data_set = Diabetes()

fun_control = fun_control_init(
    save_experiment=True,
    PREFIX=PREFIX,
    fun_evals=inf,
    max_time=60,
    data_set = data_set,
    core_model_name="light.regression.NNLinearRegressor",
    hyperdict=LightHyperDict,
    _L_in=10,
    _L_out=1)

fun = HyperLight().fun


set_hyperparameter(fun_control, "optimizer", [ "Adadelta", "Adam", "Adamax"])
set_hyperparameter(fun_control, "l1", [3,7])
set_hyperparameter(fun_control, "epochs", [10,12])
set_hyperparameter(fun_control, "batch_size", [4,11])
set_hyperparameter(fun_control, "dropout_prob", [0.0, 0.025])
set_hyperparameter(fun_control, "patience", [2,9])

design_control = design_control_init(init_size=7)

spot_tuner = spot.Spot(fun=fun,fun_control=fun_control, design_control=design_control)
module_name: light
submodule_name: regression
model_name: NNLinearRegressor

32.1 Running the Hyperparameter Tuning or Loading the Existing Model

from spotpython.utils.file import get_experiment_filename, load_experiment
import os
overwrite = False
filename = get_experiment_filename(PREFIX)
if os.path.exists(filename) and not overwrite:
    (spot_tuner, fun_control, design_control,
    surrogate_control, optimizer_control) = load_experiment(filename)
else:
    print("File does not exist or overwrite is True. Starting new experiment.")
    res = spot_tuner.run()
# only needed for spotpython version < 0.16.0
fun_control.update({"_L_cond": None})
File does not exist or overwrite is True. Starting new experiment.

In fun(): config:
{'act_fn': ELU(),
 'batch_norm': False,
 'batch_size': 128,
 'dropout_prob': 0.01495680533337224,
 'epochs': 2048,
 'initialization': 'kaiming_normal',
 'l1': 16,
 'lr_mult': 0.20841207421887742,
 'optimizer': 'Adam',
 'patience': 512}
train_model result: {'val_loss': nan, 'hp_metric': nan}

In fun(): config:
{'act_fn': Tanh(),
 'batch_norm': True,
 'batch_size': 128,
 'dropout_prob': 0.022750213555036855,
 'epochs': 2048,
 'initialization': 'Default',
 'l1': 64,
 'lr_mult': 8.688381162942978,
 'optimizer': 'Adadelta',
 'patience': 16}
train_model result: {'val_loss': 4204.67822265625, 'hp_metric': 4204.67822265625}

In fun(): config:
{'act_fn': ReLU(),
 'batch_norm': False,
 'batch_size': 32,
 'dropout_prob': 0.0035182324401420283,
 'epochs': 4096,
 'initialization': 'kaiming_uniform',
 'l1': 16,
 'lr_mult': 5.090825861997777,
 'optimizer': 'Adam',
 'patience': 4}
train_model result: {'val_loss': 3351.265869140625, 'hp_metric': 3351.265869140625}

In fun(): config:
{'act_fn': Tanh(),
 'batch_norm': False,
 'batch_size': 1024,
 'dropout_prob': 0.00972190967750383,
 'epochs': 1024,
 'initialization': 'kaiming_normal',
 'l1': 32,
 'lr_mult': 7.458466974783385,
 'optimizer': 'Adamax',
 'patience': 32}
train_model result: {'val_loss': 18344.134765625, 'hp_metric': 18344.134765625}

In fun(): config:
{'act_fn': ReLU(),
 'batch_norm': False,
 'batch_size': 2048,
 'dropout_prob': 0.020311634799104406,
 'epochs': 4096,
 'initialization': 'kaiming_uniform',
 'l1': 128,
 'lr_mult': 4.322386097111191,
 'optimizer': 'Adam',
 'patience': 32}
train_model result: {'val_loss': 2971.34619140625, 'hp_metric': 2971.34619140625}

In fun(): config:
{'act_fn': Swish(),
 'batch_norm': True,
 'batch_size': 512,
 'dropout_prob': 0.012759647245418036,
 'epochs': 2048,
 'initialization': 'xavier_uniform',
 'l1': 8,
 'lr_mult': 1.747498108806452,
 'optimizer': 'Adadelta',
 'patience': 64}
train_model result: {'val_loss': 7397.6943359375, 'hp_metric': 7397.6943359375}

In fun(): config:
{'act_fn': LeakyReLU(),
 'batch_norm': True,
 'batch_size': 32,
 'dropout_prob': 0.007063445400286148,
 'epochs': 2048,
 'initialization': 'xavier_normal',
 'l1': 32,
 'lr_mult': 6.507632456733881,
 'optimizer': 'Adamax',
 'patience': 128}
train_model result: {'val_loss': 4669.767578125, 'hp_metric': 4669.767578125}

In fun(): config:
{'act_fn': ReLU(),
 'batch_norm': False,
 'batch_size': 2048,
 'dropout_prob': 0.024910619021621488,
 'epochs': 4096,
 'initialization': 'kaiming_uniform',
 'l1': 128,
 'lr_mult': 4.112322886985134,
 'optimizer': 'Adam',
 'patience': 64}
train_model result: {'val_loss': 3030.671875, 'hp_metric': 3030.671875}
No spot_writer available.
spotpython tuning: 2971.34619140625 [----------] 0.08% 

In fun(): config:
{'act_fn': ReLU(),
 'batch_norm': False,
 'batch_size': 2048,
 'dropout_prob': 0.005134831168514179,
 'epochs': 4096,
 'initialization': 'kaiming_normal',
 'l1': 128,
 'lr_mult': 5.01142390200848,
 'optimizer': 'Adam',
 'patience': 4}
train_model result: {'val_loss': 4968.0068359375, 'hp_metric': 4968.0068359375}
No spot_writer available.
spotpython tuning: 2971.34619140625 [----------] 0.11% 

In fun(): config:
{'act_fn': ReLU(),
 'batch_norm': False,
 'batch_size': 2048,
 'dropout_prob': 0.010365601790047169,
 'epochs': 4096,
 'initialization': 'kaiming_uniform',
 'l1': 128,
 'lr_mult': 4.788101037853595,
 'optimizer': 'Adam',
 'patience': 4}
train_model result: {'val_loss': 7068.80712890625, 'hp_metric': 7068.80712890625}
No spot_writer available.
spotpython tuning: 2971.34619140625 [----------] 0.13% 

In fun(): config:
{'act_fn': ReLU(),
 'batch_norm': False,
 'batch_size': 2048,
 'dropout_prob': 0.008899331350384364,
 'epochs': 4096,
 'initialization': 'kaiming_normal',
 'l1': 128,
 'lr_mult': 4.811760848527402,
 'optimizer': 'Adam',
 'patience': 32}
train_model result: {'val_loss': 2920.383544921875, 'hp_metric': 2920.383544921875}
No spot_writer available.
spotpython tuning: 2920.383544921875 [----------] 0.22% 

In fun(): config:
{'act_fn': ReLU(),
 'batch_norm': False,
 'batch_size': 32,
 'dropout_prob': 0.01359972255597248,
 'epochs': 4096,
 'initialization': 'kaiming_normal',
 'l1': 16,
 'lr_mult': 4.629880475521667,
 'optimizer': 'Adam',
 'patience': 16}
train_model result: {'val_loss': 17562.990234375, 'hp_metric': 17562.990234375}
No spot_writer available.
spotpython tuning: 2920.383544921875 [#---------] 7.15% 

In fun(): config:
{'act_fn': Swish(),
 'batch_norm': True,
 'batch_size': 128,
 'dropout_prob': 0.02275021330992626,
 'epochs': 2048,
 'initialization': 'Default',
 'l1': 32,
 'lr_mult': 8.688381149011716,
 'optimizer': 'Adadelta',
 'patience': 256}
train_model result: {'val_loss': 4051.220458984375, 'hp_metric': 4051.220458984375}
No spot_writer available.
spotpython tuning: 2920.383544921875 [#---------] 7.77% 

In fun(): config:
{'act_fn': ReLU(),
 'batch_norm': False,
 'batch_size': 2048,
 'dropout_prob': 0.014585379954234952,
 'epochs': 4096,
 'initialization': 'kaiming_normal',
 'l1': 128,
 'lr_mult': 4.512379805585112,
 'optimizer': 'Adam',
 'patience': 512}
train_model result: {'val_loss': 3962.65771484375, 'hp_metric': 3962.65771484375}
No spot_writer available.
spotpython tuning: 2920.383544921875 [#---------] 8.25% 

In fun(): config:
{'act_fn': Tanh(),
 'batch_norm': False,
 'batch_size': 1024,
 'dropout_prob': 0.012759532652183535,
 'epochs': 2048,
 'initialization': 'xavier_uniform',
 'l1': 32,
 'lr_mult': 1.7474981302463117,
 'optimizer': 'Adadelta',
 'patience': 32}
train_model result: {'val_loss': 12340.388671875, 'hp_metric': 12340.388671875}
No spot_writer available.
spotpython tuning: 2920.383544921875 [#---------] 9.93% 

In fun(): config:
{'act_fn': LeakyReLU(),
 'batch_norm': True,
 'batch_size': 32,
 'dropout_prob': 0.007034103149109228,
 'epochs': 2048,
 'initialization': 'xavier_normal',
 'l1': 32,
 'lr_mult': 6.507632373720256,
 'optimizer': 'Adamax',
 'patience': 128}
train_model result: {'val_loss': 5325.04150390625, 'hp_metric': 5325.04150390625}
No spot_writer available.
spotpython tuning: 2920.383544921875 [#---------] 11.30% 

In fun(): config:
{'act_fn': LeakyReLU(),
 'batch_norm': True,
 'batch_size': 16,
 'dropout_prob': 0.025,
 'epochs': 2048,
 'initialization': 'xavier_normal',
 'l1': 8,
 'lr_mult': 0.1,
 'optimizer': 'Adamax',
 'patience': 32}
train_model result: {'val_loss': 23938.8515625, 'hp_metric': 23938.8515625}
No spot_writer available.
spotpython tuning: 2920.383544921875 [#---------] 11.44% 

In fun(): config:
{'act_fn': LeakyReLU(),
 'batch_norm': True,
 'batch_size': 64,
 'dropout_prob': 0.025,
 'epochs': 2048,
 'initialization': 'kaiming_normal',
 'l1': 128,
 'lr_mult': 10.0,
 'optimizer': 'Adamax',
 'patience': 512}
train_model result: {'val_loss': 4698.60546875, 'hp_metric': 4698.60546875}
No spot_writer available.
spotpython tuning: 2920.383544921875 [#---------] 13.35% 

In fun(): config:
{'act_fn': LeakyReLU(),
 'batch_norm': True,
 'batch_size': 16,
 'dropout_prob': 0.025,
 'epochs': 2048,
 'initialization': 'xavier_uniform',
 'l1': 128,
 'lr_mult': 5.0167079396473895,
 'optimizer': 'Adamax',
 'patience': 256}
train_model result: {'val_loss': 4436.0947265625, 'hp_metric': 4436.0947265625}
No spot_writer available.
spotpython tuning: 2920.383544921875 [##--------] 15.20% 

In fun(): config:
{'act_fn': ELU(),
 'batch_norm': True,
 'batch_size': 32,
 'dropout_prob': 0.025,
 'epochs': 2048,
 'initialization': 'kaiming_uniform',
 'l1': 32,
 'lr_mult': 6.589886427269446,
 'optimizer': 'Adamax',
 'patience': 128}
train_model result: {'val_loss': 5446.32666015625, 'hp_metric': 5446.32666015625}
No spot_writer available.
spotpython tuning: 2920.383544921875 [##--------] 16.12% 

In fun(): config:
{'act_fn': LeakyReLU(),
 'batch_norm': True,
 'batch_size': 64,
 'dropout_prob': 0.025,
 'epochs': 2048,
 'initialization': 'xavier_normal',
 'l1': 32,
 'lr_mult': 9.749357956232076,
 'optimizer': 'Adamax',
 'patience': 4}
train_model result: {'val_loss': 19454.69140625, 'hp_metric': 19454.69140625}
No spot_writer available.
spotpython tuning: 2920.383544921875 [##--------] 16.32% 

In fun(): config:
{'act_fn': Tanh(),
 'batch_norm': True,
 'batch_size': 2048,
 'dropout_prob': 0.024284054571530954,
 'epochs': 2048,
 'initialization': 'kaiming_uniform',
 'l1': 128,
 'lr_mult': 4.792003730628598,
 'optimizer': 'Adadelta',
 'patience': 256}
train_model result: {'val_loss': 4396.490234375, 'hp_metric': 4396.490234375}
No spot_writer available.
spotpython tuning: 2920.383544921875 [##--------] 17.05% 

In fun(): config:
{'act_fn': Tanh(),
 'batch_norm': False,
 'batch_size': 2048,
 'dropout_prob': 0.024895483845265028,
 'epochs': 4096,
 'initialization': 'kaiming_uniform',
 'l1': 128,
 'lr_mult': 9.984921100533079,
 'optimizer': 'Adadelta',
 'patience': 512}
train_model result: {'val_loss': 5408.642578125, 'hp_metric': 5408.642578125}
No spot_writer available.
spotpython tuning: 2920.383544921875 [##--------] 17.67% 

In fun(): config:
{'act_fn': LeakyReLU(),
 'batch_norm': True,
 'batch_size': 32,
 'dropout_prob': 0.02499851616313203,
 'epochs': 2048,
 'initialization': 'xavier_uniform',
 'l1': 32,
 'lr_mult': 6.509683101139973,
 'optimizer': 'Adamax',
 'patience': 128}
train_model result: {'val_loss': 3913.798095703125, 'hp_metric': 3913.798095703125}
No spot_writer available.
spotpython tuning: 2920.383544921875 [##--------] 18.82% 

In fun(): config:
{'act_fn': ReLU(),
 'batch_norm': False,
 'batch_size': 2048,
 'dropout_prob': 0.0,
 'epochs': 4096,
 'initialization': 'xavier_uniform',
 'l1': 8,
 'lr_mult': 0.9797832141479108,
 'optimizer': 'Adam',
 'patience': 4}
train_model result: {'val_loss': 23758.373046875, 'hp_metric': 23758.373046875}
No spot_writer available.
spotpython tuning: 2920.383544921875 [##--------] 20.42% 

In fun(): config:
{'act_fn': Tanh(),
 'batch_norm': False,
 'batch_size': 16,
 'dropout_prob': 9.20291806222884e-05,
 'epochs': 1024,
 'initialization': 'xavier_uniform',
 'l1': 128,
 'lr_mult': 2.1834892328465867,
 'optimizer': 'Adadelta',
 'patience': 4}
train_model result: {'val_loss': 4795.78515625, 'hp_metric': 4795.78515625}
No spot_writer available.
spotpython tuning: 2920.383544921875 [##--------] 20.77% 

In fun(): config:
{'act_fn': Tanh(),
 'batch_norm': False,
 'batch_size': 256,
 'dropout_prob': 0.0,
 'epochs': 4096,
 'initialization': 'xavier_uniform',
 'l1': 128,
 'lr_mult': 3.830500287142594,
 'optimizer': 'Adadelta',
 'patience': 4}
train_model result: {'val_loss': 4785.19970703125, 'hp_metric': 4785.19970703125}
No spot_writer available.
spotpython tuning: 2920.383544921875 [##--------] 21.20% 

In fun(): config:
{'act_fn': ReLU(),
 'batch_norm': False,
 'batch_size': 2048,
 'dropout_prob': 0.0,
 'epochs': 1024,
 'initialization': 'kaiming_uniform',
 'l1': 128,
 'lr_mult': 5.793612279467221,
 'optimizer': 'Adadelta',
 'patience': 512}
train_model result: {'val_loss': 4140.59130859375, 'hp_metric': 4140.59130859375}
No spot_writer available.
spotpython tuning: 2920.383544921875 [##--------] 21.91% 

In fun(): config:
{'act_fn': Tanh(),
 'batch_norm': True,
 'batch_size': 128,
 'dropout_prob': 0.024952405928690004,
 'epochs': 2048,
 'initialization': 'xavier_uniform',
 'l1': 128,
 'lr_mult': 5.9708136487904095,
 'optimizer': 'Adadelta',
 'patience': 128}
train_model result: {'val_loss': 4855.0830078125, 'hp_metric': 4855.0830078125}
No spot_writer available.
spotpython tuning: 2920.383544921875 [##--------] 22.77% 

In fun(): config:
{'act_fn': Tanh(),
 'batch_norm': True,
 'batch_size': 2048,
 'dropout_prob': 0.025,
 'epochs': 2048,
 'initialization': 'Default',
 'l1': 16,
 'lr_mult': 3.671189863172685,
 'optimizer': 'Adamax',
 'patience': 64}
train_model result: {'val_loss': 20589.814453125, 'hp_metric': 20589.814453125}
No spot_writer available.
spotpython tuning: 2920.383544921875 [###-------] 25.63% 

In fun(): config:
{'act_fn': LeakyReLU(),
 'batch_norm': True,
 'batch_size': 16,
 'dropout_prob': 0.025,
 'epochs': 2048,
 'initialization': 'Default',
 'l1': 128,
 'lr_mult': 10.0,
 'optimizer': 'Adadelta',
 'patience': 64}
train_model result: {'val_loss': 5243.59912109375, 'hp_metric': 5243.59912109375}
No spot_writer available.
spotpython tuning: 2920.383544921875 [###-------] 26.24% 

In fun(): config:
{'act_fn': LeakyReLU(),
 'batch_norm': True,
 'batch_size': 16,
 'dropout_prob': 0.0013125948605280317,
 'epochs': 2048,
 'initialization': 'kaiming_normal',
 'l1': 8,
 'lr_mult': 4.012565259441038,
 'optimizer': 'Adadelta',
 'patience': 512}
train_model result: {'val_loss': 43501.54296875, 'hp_metric': 43501.54296875}
No spot_writer available.
spotpython tuning: 2920.383544921875 [###-------] 28.38% 

In fun(): config:
{'act_fn': Swish(),
 'batch_norm': True,
 'batch_size': 2048,
 'dropout_prob': 0.014010762240222473,
 'epochs': 2048,
 'initialization': 'xavier_uniform',
 'l1': 8,
 'lr_mult': 1.0706114019027868,
 'optimizer': 'Adadelta',
 'patience': 32}
train_model result: {'val_loss': 21867.333984375, 'hp_metric': 21867.333984375}
No spot_writer available.
spotpython tuning: 2920.383544921875 [###-------] 30.31% 

In fun(): config:
{'act_fn': ReLU(),
 'batch_norm': False,
 'batch_size': 2048,
 'dropout_prob': 0.015508042769497929,
 'epochs': 4096,
 'initialization': 'kaiming_normal',
 'l1': 128,
 'lr_mult': 3.4497193443660668,
 'optimizer': 'Adam',
 'patience': 32}
train_model result: {'val_loss': 2895.96337890625, 'hp_metric': 2895.96337890625}
No spot_writer available.
spotpython tuning: 2895.96337890625 [###-------] 30.44% 

In fun(): config:
{'act_fn': ReLU(),
 'batch_norm': False,
 'batch_size': 2048,
 'dropout_prob': 0.012939825417433172,
 'epochs': 4096,
 'initialization': 'kaiming_normal',
 'l1': 128,
 'lr_mult': 3.5628253344993888,
 'optimizer': 'Adam',
 'patience': 32}
train_model result: {'val_loss': 2927.004150390625, 'hp_metric': 2927.004150390625}
No spot_writer available.
spotpython tuning: 2895.96337890625 [###-------] 30.53% 

In fun(): config:
{'act_fn': ReLU(),
 'batch_norm': False,
 'batch_size': 2048,
 'dropout_prob': 0.012934884962293584,
 'epochs': 4096,
 'initialization': 'kaiming_normal',
 'l1': 128,
 'lr_mult': 3.562805588531447,
 'optimizer': 'Adam',
 'patience': 32}
train_model result: {'val_loss': 2875.336181640625, 'hp_metric': 2875.336181640625}
No spot_writer available.
spotpython tuning: 2875.336181640625 [###-------] 30.64% 

In fun(): config:
{'act_fn': LeakyReLU(),
 'batch_norm': True,
 'batch_size': 64,
 'dropout_prob': 0.025,
 'epochs': 4096,
 'initialization': 'xavier_uniform',
 'l1': 32,
 'lr_mult': 0.1,
 'optimizer': 'Adadelta',
 'patience': 128}
train_model result: {'val_loss': 16162.4052734375, 'hp_metric': 16162.4052734375}
No spot_writer available.
spotpython tuning: 2875.336181640625 [####------] 35.72% 

In fun(): config:
{'act_fn': ReLU(),
 'batch_norm': False,
 'batch_size': 1024,
 'dropout_prob': 0.0,
 'epochs': 4096,
 'initialization': 'kaiming_normal',
 'l1': 128,
 'lr_mult': 3.9109296766715853,
 'optimizer': 'Adam',
 'patience': 32}
train_model result: {'val_loss': 3002.601318359375, 'hp_metric': 3002.601318359375}
No spot_writer available.
spotpython tuning: 2875.336181640625 [####------] 35.81% 

In fun(): config:
{'act_fn': ReLU(),
 'batch_norm': False,
 'batch_size': 32,
 'dropout_prob': 0.003517913411807396,
 'epochs': 4096,
 'initialization': 'kaiming_uniform',
 'l1': 16,
 'lr_mult': 5.090407334732555,
 'optimizer': 'Adam',
 'patience': 4}
train_model result: {'val_loss': 2953.220703125, 'hp_metric': 2953.220703125}
No spot_writer available.
spotpython tuning: 2875.336181640625 [####------] 35.94% 

In fun(): config:
{'act_fn': ReLU(),
 'batch_norm': False,
 'batch_size': 32,
 'dropout_prob': 0.0035182275331424565,
 'epochs': 4096,
 'initialization': 'kaiming_uniform',
 'l1': 16,
 'lr_mult': 5.090826663793708,
 'optimizer': 'Adam',
 'patience': 4}
train_model result: {'val_loss': 16998.376953125, 'hp_metric': 16998.376953125}
No spot_writer available.
spotpython tuning: 2875.336181640625 [####------] 42.93% 

In fun(): config:
{'act_fn': ReLU(),
 'batch_norm': False,
 'batch_size': 32,
 'dropout_prob': 0.0035183118238594924,
 'epochs': 4096,
 'initialization': 'kaiming_uniform',
 'l1': 16,
 'lr_mult': 5.090335023277795,
 'optimizer': 'Adam',
 'patience': 4}
train_model result: {'val_loss': 3082.487060546875, 'hp_metric': 3082.487060546875}
No spot_writer available.
spotpython tuning: 2875.336181640625 [####------] 43.09% 

In fun(): config:
{'act_fn': LeakyReLU(),
 'batch_norm': True,
 'batch_size': 16,
 'dropout_prob': 0.0,
 'epochs': 1024,
 'initialization': 'xavier_normal',
 'l1': 32,
 'lr_mult': 10.0,
 'optimizer': 'Adamax',
 'patience': 128}
train_model result: {'val_loss': 5324.94970703125, 'hp_metric': 5324.94970703125}
No spot_writer available.
spotpython tuning: 2875.336181640625 [####------] 44.57% 

In fun(): config:
{'act_fn': ReLU(),
 'batch_norm': False,
 'batch_size': 32,
 'dropout_prob': 0.003988336407773549,
 'epochs': 4096,
 'initialization': 'kaiming_uniform',
 'l1': 16,
 'lr_mult': 5.09040634601978,
 'optimizer': 'Adam',
 'patience': 4}
train_model result: {'val_loss': 3426.16064453125, 'hp_metric': 3426.16064453125}
No spot_writer available.
spotpython tuning: 2875.336181640625 [####------] 44.67% 

In fun(): config:
{'act_fn': ReLU(),
 'batch_norm': False,
 'batch_size': 32,
 'dropout_prob': 0.003987375756233961,
 'epochs': 4096,
 'initialization': 'kaiming_uniform',
 'l1': 16,
 'lr_mult': 5.090406387359822,
 'optimizer': 'Adam',
 'patience': 4}
train_model result: {'val_loss': 3066.734130859375, 'hp_metric': 3066.734130859375}
No spot_writer available.
spotpython tuning: 2875.336181640625 [####------] 44.81% 

In fun(): config:
{'act_fn': ELU(),
 'batch_norm': True,
 'batch_size': 64,
 'dropout_prob': 0.008178982240931637,
 'epochs': 4096,
 'initialization': 'Default',
 'l1': 16,
 'lr_mult': 5.693214148352549,
 'optimizer': 'Adadelta',
 'patience': 256}
train_model result: {'val_loss': 4856.81689453125, 'hp_metric': 4856.81689453125}
No spot_writer available.
spotpython tuning: 2875.336181640625 [#####-----] 45.57% 

In fun(): config:
{'act_fn': ReLU(),
 'batch_norm': False,
 'batch_size': 32,
 'dropout_prob': 0.0016349337332920138,
 'epochs': 4096,
 'initialization': 'kaiming_uniform',
 'l1': 16,
 'lr_mult': 5.144672499409648,
 'optimizer': 'Adam',
 'patience': 4}
train_model result: {'val_loss': 16934.349609375, 'hp_metric': 16934.349609375}
No spot_writer available.
spotpython tuning: 2875.336181640625 [#####-----] 52.52% 

In fun(): config:
{'act_fn': ReLU(),
 'batch_norm': False,
 'batch_size': 2048,
 'dropout_prob': 0.025,
 'epochs': 4096,
 'initialization': 'kaiming_normal',
 'l1': 128,
 'lr_mult': 3.4930484432633877,
 'optimizer': 'Adadelta',
 'patience': 32}
train_model result: {'val_loss': 3296.848388671875, 'hp_metric': 3296.848388671875}
No spot_writer available.
spotpython tuning: 2875.336181640625 [#####-----] 52.61% 

In fun(): config:
{'act_fn': LeakyReLU(),
 'batch_norm': True,
 'batch_size': 16,
 'dropout_prob': 0.0,
 'epochs': 1024,
 'initialization': 'xavier_normal',
 'l1': 64,
 'lr_mult': 5.9780032075402465,
 'optimizer': 'Adamax',
 'patience': 128}
train_model result: {'val_loss': 5232.22900390625, 'hp_metric': 5232.22900390625}
No spot_writer available.
spotpython tuning: 2875.336181640625 [#####-----] 53.50% 

In fun(): config:
{'act_fn': Tanh(),
 'batch_norm': True,
 'batch_size': 128,
 'dropout_prob': 0.023400133535242455,
 'epochs': 2048,
 'initialization': 'Default',
 'l1': 64,
 'lr_mult': 7.887083505617934,
 'optimizer': 'Adadelta',
 'patience': 64}
train_model result: {'val_loss': 4404.3154296875, 'hp_metric': 4404.3154296875}
No spot_writer available.
spotpython tuning: 2875.336181640625 [#####-----] 53.96% 

In fun(): config:
{'act_fn': Tanh(),
 'batch_norm': True,
 'batch_size': 128,
 'dropout_prob': 0.02357735949115107,
 'epochs': 2048,
 'initialization': 'Default',
 'l1': 64,
 'lr_mult': 7.668922406331757,
 'optimizer': 'Adadelta',
 'patience': 128}
train_model result: {'val_loss': 4183.91162109375, 'hp_metric': 4183.91162109375}
No spot_writer available.
spotpython tuning: 2875.336181640625 [#####-----] 54.60% 

In fun(): config:
{'act_fn': LeakyReLU(),
 'batch_norm': True,
 'batch_size': 32,
 'dropout_prob': 0.005560032034683299,
 'epochs': 2048,
 'initialization': 'xavier_normal',
 'l1': 32,
 'lr_mult': 4.625603061177006,
 'optimizer': 'Adamax',
 'patience': 512}
train_model result: {'val_loss': 4722.8671875, 'hp_metric': 4722.8671875}
No spot_writer available.
spotpython tuning: 2875.336181640625 [######----] 56.76% 

In fun(): config:
{'act_fn': Swish(),
 'batch_norm': True,
 'batch_size': 128,
 'dropout_prob': 0.022615791757535218,
 'epochs': 2048,
 'initialization': 'Default',
 'l1': 32,
 'lr_mult': 8.854596956204444,
 'optimizer': 'Adadelta',
 'patience': 128}
train_model result: {'val_loss': 3968.251708984375, 'hp_metric': 3968.251708984375}
No spot_writer available.
spotpython tuning: 2875.336181640625 [######----] 57.38% 

In fun(): config:
{'act_fn': ReLU(),
 'batch_norm': False,
 'batch_size': 2048,
 'dropout_prob': 0.0,
 'epochs': 1024,
 'initialization': 'kaiming_normal',
 'l1': 128,
 'lr_mult': 0.1,
 'optimizer': 'Adadelta',
 'patience': 64}
train_model result: {'val_loss': 3240.228515625, 'hp_metric': 3240.228515625}
No spot_writer available.
spotpython tuning: 2875.336181640625 [######----] 57.56% 

In fun(): config:
{'act_fn': LeakyReLU(),
 'batch_norm': True,
 'batch_size': 32,
 'dropout_prob': 0.025,
 'epochs': 2048,
 'initialization': 'xavier_uniform',
 'l1': 32,
 'lr_mult': 9.781369395557494,
 'optimizer': 'Adamax',
 'patience': 16}
train_model result: {'val_loss': 5073.10693359375, 'hp_metric': 5073.10693359375}
No spot_writer available.
spotpython tuning: 2875.336181640625 [######----] 57.96% 

In fun(): config:
{'act_fn': LeakyReLU(),
 'batch_norm': True,
 'batch_size': 16,
 'dropout_prob': 0.007259412774214031,
 'epochs': 1024,
 'initialization': 'xavier_normal',
 'l1': 64,
 'lr_mult': 8.259415621922846,
 'optimizer': 'Adamax',
 'patience': 64}
train_model result: {'val_loss': 4578.72216796875, 'hp_metric': 4578.72216796875}
No spot_writer available.
spotpython tuning: 2875.336181640625 [######----] 58.68% 

In fun(): config:
{'act_fn': LeakyReLU(),
 'batch_norm': True,
 'batch_size': 16,
 'dropout_prob': 0.007248807393068826,
 'epochs': 1024,
 'initialization': 'xavier_normal',
 'l1': 64,
 'lr_mult': 8.260602527984258,
 'optimizer': 'Adamax',
 'patience': 64}
train_model result: {'val_loss': 4015.159423828125, 'hp_metric': 4015.159423828125}
No spot_writer available.
spotpython tuning: 2875.336181640625 [######----] 59.32% 

In fun(): config:
{'act_fn': ReLU(),
 'batch_norm': False,
 'batch_size': 32,
 'dropout_prob': 0.025,
 'epochs': 4096,
 'initialization': 'kaiming_uniform',
 'l1': 16,
 'lr_mult': 0.1,
 'optimizer': 'Adam',
 'patience': 256}
train_model result: {'val_loss': 23908.478515625, 'hp_metric': 23908.478515625}
No spot_writer available.
spotpython tuning: 2875.336181640625 [#######---] 66.24% 

In fun(): config:
{'act_fn': ReLU(),
 'batch_norm': False,
 'batch_size': 32,
 'dropout_prob': 0.0,
 'epochs': 4096,
 'initialization': 'kaiming_uniform',
 'l1': 16,
 'lr_mult': 6.4414894226439525,
 'optimizer': 'Adam',
 'patience': 4}
train_model result: {'val_loss': 15438.375, 'hp_metric': 15438.375}
No spot_writer available.
spotpython tuning: 2875.336181640625 [#######---] 72.53% 

In fun(): config:
{'act_fn': LeakyReLU(),
 'batch_norm': True,
 'batch_size': 32,
 'dropout_prob': 0.0,
 'epochs': 4096,
 'initialization': 'xavier_normal',
 'l1': 64,
 'lr_mult': 6.953912107731897,
 'optimizer': 'Adamax',
 'patience': 256}
train_model result: {'val_loss': 4372.67529296875, 'hp_metric': 4372.67529296875}
No spot_writer available.
spotpython tuning: 2875.336181640625 [#######---] 74.67% 

In fun(): config:
{'act_fn': LeakyReLU(),
 'batch_norm': True,
 'batch_size': 16,
 'dropout_prob': 0.024985535195757427,
 'epochs': 1024,
 'initialization': 'xavier_uniform',
 'l1': 32,
 'lr_mult': 6.768291413835145,
 'optimizer': 'Adamax',
 'patience': 512}
train_model result: {'val_loss': 4020.39599609375, 'hp_metric': 4020.39599609375}
No spot_writer available.
spotpython tuning: 2875.336181640625 [########--] 77.33% 

In fun(): config:
{'act_fn': ReLU(),
 'batch_norm': False,
 'batch_size': 16,
 'dropout_prob': 0.01714704835654727,
 'epochs': 1024,
 'initialization': 'kaiming_uniform',
 'l1': 8,
 'lr_mult': 4.314257055277487,
 'optimizer': 'Adam',
 'patience': 4}
train_model result: {'val_loss': 2931.334716796875, 'hp_metric': 2931.334716796875}
No spot_writer available.
spotpython tuning: 2875.336181640625 [########--] 77.50% 

In fun(): config:
{'act_fn': ReLU(),
 'batch_norm': False,
 'batch_size': 2048,
 'dropout_prob': 0.0,
 'epochs': 2048,
 'initialization': 'kaiming_normal',
 'l1': 128,
 'lr_mult': 0.1,
 'optimizer': 'Adam',
 'patience': 512}
train_model result: {'val_loss': 3528.125244140625, 'hp_metric': 3528.125244140625}
No spot_writer available.
spotpython tuning: 2875.336181640625 [########--] 78.97% 

In fun(): config:
{'act_fn': Tanh(),
 'batch_norm': True,
 'batch_size': 2048,
 'dropout_prob': 0.0,
 'epochs': 4096,
 'initialization': 'Default',
 'l1': 32,
 'lr_mult': 8.122105659827985,
 'optimizer': 'Adadelta',
 'patience': 512}
train_model result: {'val_loss': 5406.77490234375, 'hp_metric': 5406.77490234375}
No spot_writer available.
spotpython tuning: 2875.336181640625 [########--] 81.09% 

In fun(): config:
{'act_fn': ELU(),
 'batch_norm': False,
 'batch_size': 64,
 'dropout_prob': 0.006533267855654985,
 'epochs': 4096,
 'initialization': 'kaiming_uniform',
 'l1': 16,
 'lr_mult': 5.299312269081944,
 'optimizer': 'Adam',
 'patience': 8}
train_model result: {'val_loss': 4790.345703125, 'hp_metric': 4790.345703125}
No spot_writer available.
spotpython tuning: 2875.336181640625 [########--] 81.57% 

In fun(): config:
{'act_fn': ELU(),
 'batch_norm': True,
 'batch_size': 16,
 'dropout_prob': 0.0,
 'epochs': 4096,
 'initialization': 'Default',
 'l1': 32,
 'lr_mult': 10.0,
 'optimizer': 'Adadelta',
 'patience': 512}
train_model result: {'val_loss': 3752.741455078125, 'hp_metric': 3752.741455078125}
No spot_writer available.
spotpython tuning: 2875.336181640625 [########--] 84.44% 

In fun(): config:
{'act_fn': ReLU(),
 'batch_norm': False,
 'batch_size': 2048,
 'dropout_prob': 0.011864240606664373,
 'epochs': 2048,
 'initialization': 'kaiming_normal',
 'l1': 128,
 'lr_mult': 3.751661271132917,
 'optimizer': 'Adam',
 'patience': 128}
train_model result: {'val_loss': 4109.21923828125, 'hp_metric': 4109.21923828125}
No spot_writer available.
spotpython tuning: 2875.336181640625 [########--] 84.63% 

In fun(): config:
{'act_fn': ReLU(),
 'batch_norm': False,
 'batch_size': 2048,
 'dropout_prob': 0.0116632707081355,
 'epochs': 4096,
 'initialization': 'kaiming_normal',
 'l1': 128,
 'lr_mult': 1.7676856846802964,
 'optimizer': 'Adam',
 'patience': 128}
train_model result: {'val_loss': 3370.240478515625, 'hp_metric': 3370.240478515625}
No spot_writer available.
spotpython tuning: 2875.336181640625 [########--] 84.90% 

In fun(): config:
{'act_fn': LeakyReLU(),
 'batch_norm': True,
 'batch_size': 64,
 'dropout_prob': 0.0,
 'epochs': 2048,
 'initialization': 'xavier_normal',
 'l1': 32,
 'lr_mult': 10.0,
 'optimizer': 'Adamax',
 'patience': 512}
train_model result: {'val_loss': 4867.486328125, 'hp_metric': 4867.486328125}
No spot_writer available.
spotpython tuning: 2875.336181640625 [#########-] 86.33% 

In fun(): config:
{'act_fn': Tanh(),
 'batch_norm': True,
 'batch_size': 2048,
 'dropout_prob': 0.0036426912631938144,
 'epochs': 4096,
 'initialization': 'kaiming_normal',
 'l1': 128,
 'lr_mult': 0.1,
 'optimizer': 'Adadelta',
 'patience': 128}
train_model result: {'val_loss': 18527.513671875, 'hp_metric': 18527.513671875}
No spot_writer available.
spotpython tuning: 2875.336181640625 [#########-] 88.31% 

In fun(): config:
{'act_fn': LeakyReLU(),
 'batch_norm': True,
 'batch_size': 64,
 'dropout_prob': 0.006786935269683575,
 'epochs': 2048,
 'initialization': 'Default',
 'l1': 64,
 'lr_mult': 7.679727457461166,
 'optimizer': 'Adamax',
 'patience': 256}
train_model result: {'val_loss': 4310.10888671875, 'hp_metric': 4310.10888671875}
No spot_writer available.
spotpython tuning: 2875.336181640625 [#########-] 89.31% 

In fun(): config:
{'act_fn': LeakyReLU(),
 'batch_norm': True,
 'batch_size': 16,
 'dropout_prob': 0.02283083397147253,
 'epochs': 2048,
 'initialization': 'xavier_uniform',
 'l1': 64,
 'lr_mult': 7.188310574986988,
 'optimizer': 'Adamax',
 'patience': 128}
train_model result: {'val_loss': 4615.6923828125, 'hp_metric': 4615.6923828125}
No spot_writer available.
spotpython tuning: 2875.336181640625 [#########-] 90.81% 

In fun(): config:
{'act_fn': ReLU(),
 'batch_norm': False,
 'batch_size': 2048,
 'dropout_prob': 0.0,
 'epochs': 4096,
 'initialization': 'kaiming_normal',
 'l1': 128,
 'lr_mult': 3.305746623626939,
 'optimizer': 'Adam',
 'patience': 32}
train_model result: {'val_loss': 3138.449951171875, 'hp_metric': 3138.449951171875}
No spot_writer available.
spotpython tuning: 2875.336181640625 [#########-] 90.93% 

In fun(): config:
{'act_fn': LeakyReLU(),
 'batch_norm': True,
 'batch_size': 128,
 'dropout_prob': 0.0,
 'epochs': 4096,
 'initialization': 'xavier_normal',
 'l1': 64,
 'lr_mult': 2.3783775225780412,
 'optimizer': 'Adamax',
 'patience': 512}
train_model result: {'val_loss': 5972.1162109375, 'hp_metric': 5972.1162109375}
No spot_writer available.
spotpython tuning: 2875.336181640625 [#########-] 93.15% 

In fun(): config:
{'act_fn': ReLU(),
 'batch_norm': False,
 'batch_size': 16,
 'dropout_prob': 0.025,
 'epochs': 1024,
 'initialization': 'kaiming_uniform',
 'l1': 8,
 'lr_mult': 10.0,
 'optimizer': 'Adam',
 'patience': 4}
train_model result: {'val_loss': 17197.076171875, 'hp_metric': 17197.076171875}
No spot_writer available.
spotpython tuning: 2875.336181640625 [##########] 95.33% 

In fun(): config:
{'act_fn': ReLU(),
 'batch_norm': False,
 'batch_size': 512,
 'dropout_prob': 0.010695124774504131,
 'epochs': 2048,
 'initialization': 'kaiming_normal',
 'l1': 128,
 'lr_mult': 2.5545529888597662,
 'optimizer': 'Adam',
 'patience': 64}
train_model result: {'val_loss': 2967.3583984375, 'hp_metric': 2967.3583984375}
No spot_writer available.
spotpython tuning: 2875.336181640625 [##########] 95.45% 

In fun(): config:
{'act_fn': LeakyReLU(),
 'batch_norm': True,
 'batch_size': 128,
 'dropout_prob': 0.025,
 'epochs': 4096,
 'initialization': 'xavier_uniform',
 'l1': 32,
 'lr_mult': 0.1,
 'optimizer': 'Adamax',
 'patience': 512}
train_model result: {'val_loss': 23474.7421875, 'hp_metric': 23474.7421875}
No spot_writer available.
spotpython tuning: 2875.336181640625 [##########] 100.00% Done...

Experiment saved to spot_602_12_experiment.pickle

32.2 Results from the Hyperparameter Tuning Experiment

  • After the hyperparameter tuning is finished, the following information is available:
    • the spot_tuner object and the associated
    • fun_control dictionary
spot_tuner.plot_progress()

32.2.1 Getting the Best Model, i.e, the Tuned Architecture

  • The method get_tuned_architecture [DOC] returns the best model architecture found during the hyperparameter tuning.
  • It returns the transformed values, i.e., batch_size = 2^x if the hyperparameter batch_size was transformed with the transform_power_2_int function.
from spotpython.hyperparameters.values import get_tuned_architecture
import pprint
config = get_tuned_architecture(spot_tuner, fun_control)
pprint.pprint(config)
{'act_fn': ReLU(),
 'batch_norm': False,
 'batch_size': 2048,
 'dropout_prob': 0.012934884962293584,
 'epochs': 4096,
 'initialization': 'kaiming_normal',
 'l1': 128,
 'lr_mult': 3.562805588531447,
 'optimizer': 'Adam',
 'patience': 32}
  • Note: get_tuned_architecture has the option force_minX which does not have any effect in this case.
from spotpython.hyperparameters.values import get_tuned_architecture
config = get_tuned_architecture(spot_tuner, fun_control, force_minX=True)
pprint.pprint(config)
{'act_fn': ReLU(),
 'batch_norm': False,
 'batch_size': 2048,
 'dropout_prob': 0.012934884962293584,
 'epochs': 4096,
 'initialization': 'kaiming_normal',
 'l1': 128,
 'lr_mult': 3.562805588531447,
 'optimizer': 'Adam',
 'patience': 32}

32.3 Training the Tuned Architecture on the Test Data

  • Since we are interested in the explainability of the model, we will train the tuned architecture on the test data.
  • spotpythons’s test_model function [DOC] is used to train the model on the test data.
  • Note: Until now, we do not use any information about the NN’s weights and biases. Only the architecture, which is available as the config, is used.
  • spotpython used the TensorBoard logger to save the training process in the ./runs directory. Therefore, we have to enable the TensorBoard logger in the fun_control dictionary. To get a clean start, we remove an existing runs folder.
from spotpython.light.testmodel import test_model
from spotpython.light.loadmodel import load_light_from_checkpoint
import os
# if the directory "./runs" exists, delete it
if os.path.exists("./runs"):
    os.system("rm -r ./runs")
fun_control.update({"tensorboard_log": True})
test_model(config, fun_control)
┏━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━━━┓
┃        Test metric               DataLoader 0        ┃
┡━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━━━━┩
│         hp_metric               2866.0341796875      │
│         val_loss                2866.0341796875      │
└───────────────────────────┴───────────────────────────┘
test_model result: {'val_loss': 2866.0341796875, 'hp_metric': 2866.0341796875}
(2866.0341796875, 2866.0341796875)
model = load_light_from_checkpoint(config, fun_control)
config: {'l1': 128, 'epochs': 4096, 'batch_size': 2048, 'act_fn': ReLU(), 'optimizer': 'Adam', 'dropout_prob': 0.012934884962293584, 'lr_mult': 3.562805588531447, 'patience': 32, 'batch_norm': False, 'initialization': 'kaiming_normal'}
Loading model with 128_4096_2048_ReLU_Adam_0.0129_3.5628_32_False_kaiming_normal_TEST from runs/saved_models/128_4096_2048_ReLU_Adam_0.0129_3.5628_32_False_kaiming_normal_TEST/last.ckpt
Model: NNLinearRegressor(
  (layers): Sequential(
    (0): Linear(in_features=10, out_features=128, bias=True)
    (1): ReLU()
    (2): Dropout(p=0.012934884962293584, inplace=False)
    (3): Linear(in_features=128, out_features=64, bias=True)
    (4): ReLU()
    (5): Dropout(p=0.012934884962293584, inplace=False)
    (6): Linear(in_features=64, out_features=64, bias=True)
    (7): ReLU()
    (8): Dropout(p=0.012934884962293584, inplace=False)
    (9): Linear(in_features=64, out_features=32, bias=True)
    (10): ReLU()
    (11): Dropout(p=0.012934884962293584, inplace=False)
    (12): Linear(in_features=32, out_features=32, bias=True)
    (13): ReLU()
    (14): Dropout(p=0.012934884962293584, inplace=False)
    (15): Linear(in_features=32, out_features=32, bias=True)
    (16): ReLU()
    (17): Dropout(p=0.012934884962293584, inplace=False)
    (18): Linear(in_features=32, out_features=16, bias=True)
    (19): ReLU()
    (20): Dropout(p=0.012934884962293584, inplace=False)
    (21): Linear(in_features=16, out_features=16, bias=True)
    (22): ReLU()
    (23): Dropout(p=0.012934884962293584, inplace=False)
    (24): Linear(in_features=16, out_features=16, bias=True)
    (25): ReLU()
    (26): Dropout(p=0.012934884962293584, inplace=False)
    (27): Linear(in_features=16, out_features=16, bias=True)
    (28): ReLU()
    (29): Dropout(p=0.012934884962293584, inplace=False)
    (30): Linear(in_features=16, out_features=1, bias=True)
  )
)

32.3.0.1 Details of the Training Process on the Test Data

  • The test_model method initializes the model with the tuned architecture as follows:
model = fun_control["core_model"](**config, _L_in=_L_in, _L_out=_L_out, _torchmetric=_torchmetric)
  • Then, the Lightning Trainer is initialized with the fun_control dictionary and the model as follows:

        trainer = L.Trainer(
        default_root_dir=os.path.join(fun_control["CHECKPOINT_PATH"], config_id),
        max_epochs=model.hparams.epochs,
        accelerator=fun_control["accelerator"],
        devices=fun_control["devices"],
        logger=TensorBoardLogger(
            save_dir=fun_control["TENSORBOARD_PATH"],
            version=config_id,
            default_hp_metric=True,
            log_graph=fun_control["log_graph"],
        ),
        callbacks=[
            EarlyStopping(monitor="val_loss", patience=config["patience"], mode="min", strict=False, verbose=False),
            ModelCheckpoint(
                dirpath=os.path.join(fun_control["CHECKPOINT_PATH"], config_id), save_last=True
            ), 
        ],
        enable_progress_bar=enable_progress_bar,
    )
    trainer.fit(model=model, datamodule=dm)    
    test_result = trainer.test(datamodule=dm, ckpt_path="last")
  • As shown in the code above, the last checkpoint ist saved.

  • spotpython’s method load_light_from_checkpoint is used to load the last checkpoint and to get the model’s weights and biases. It requires the fun_control dictionary and the config_id as input to find the correct checkpoint.

  • Now, the model is trained and the weights and biases are available.

32.4 Visualizing the Neural Network Architecture

# get the device
from spotpython.utils.device import getDevice
device = getDevice()
from spotpython.plot.xai import viz_net
viz_net(model, device=device)

architecture

32.5 XAI Methods

  • spotpython provides methods to explain the model’s predictions. The following neural network elements can be analyzed:

32.5.1 Weights

  • Weights are the parameters of the neural network that are learned from the data during training. They connect neurons between layers and determine the strength and direction of the signal sent from one neuron to another. The network adjusts the weights during training to minimize the error between the predicted output and the actual output.
  • Interpretation of the weights: A high weight value indicates a strong influence of the input neuron on the output. Positive weights suggest a positive correlation, whereas negative weights suggest an inverse relationship between neurons.

32.5.2 Activations

  • Activations are the outputs produced by neurons after applying an activation function to the weighted sum of inputs. The activation function (e.g., ReLU, sigmoid, tanh) adds non-linearity to the model, allowing it to learn more complex relationships.
  • Interpretation of the activations: The value of activations indicates the intensity of the signal passed to the next layer. Certain activation patterns can highlight which features or parts of the data the network is focusing on.

32.5.3 Gradients

  • Gradients are the partial derivatives of the loss function with respect to different parameters (weights) of the network. During backpropagation, gradients are used to update the weights in the direction that reduces the loss by methods like gradient descent.
  • Interpretation of the gradients: The magnitude of the gradient indicates how much a parameter should change to reduce the error. A large gradient implies a steeper slope and a bigger update, while a small gradient suggests that the parameter is near an optimal point. If gradients are too small (vanishing gradient problem), the network may learn slowly or stop learning. If they are too large (exploding gradient problem), the updates may be unstable.
  • sptpython provides the method get_gradients to get the gradients of the model.
from spotpython.plot.xai import (get_activations, get_gradients, get_weights, visualize_weights, visualize_gradients, visualize_mean_activations, visualize_gradient_distributions, visualize_weights_distributions, visualize_activations_distributions)
batch_size = config["batch_size"]

32.5.4 Getting the Weights

from spotpython.plot.xai import sort_layers
weights, _ = get_weights(model)
# sort_layers(weights)
visualize_weights(model, absolute=True, cmap="GreenYellowRed", figsize=(6, 6))
1280 values in Layer Layer 0. Geometry: (128, 10)

8192 values in Layer Layer 3. Geometry: (64, 128)

4096 values in Layer Layer 6. Geometry: (64, 64)

2048 values in Layer Layer 9. Geometry: (32, 64)

1024 values in Layer Layer 12. Geometry: (32, 32)

1024 values in Layer Layer 15. Geometry: (32, 32)

512 values in Layer Layer 18. Geometry: (16, 32)

256 values in Layer Layer 21. Geometry: (16, 16)

256 values in Layer Layer 24. Geometry: (16, 16)

256 values in Layer Layer 27. Geometry: (16, 16)

16 values in Layer Layer 30. Geometry: (1, 16)

visualize_weights_distributions(model, color=f"C{0}", columns=4)
n:11

32.5.5 Getting the Activations

from spotpython.plot.xai import get_activations
activations, mean_activations, layer_sizes = get_activations(net=model, fun_control=fun_control, batch_size=batch_size, device=device)
train_size: 0.36, val_size: 0.24, test_sie: 0.4 for splitting train & val data.
train samples: 160, val samples: 106 generated for train & val data.
LightDataModule.train_dataloader(). data_train size: 160
visualize_mean_activations(mean_activations, layer_sizes=layer_sizes, absolute=True, cmap="GreenYellowRed", figsize=(6, 6))
128 values in Layer 0. Geometry: (1, 128)

64 values in Layer 3. Geometry: (1, 64)

64 values in Layer 6. Geometry: (1, 64)

32 values in Layer 9. Geometry: (1, 32)

32 values in Layer 12. Geometry: (1, 32)

32 values in Layer 15. Geometry: (1, 32)

16 values in Layer 18. Geometry: (1, 16)

16 values in Layer 21. Geometry: (1, 16)

16 values in Layer 24. Geometry: (1, 16)

16 values in Layer 27. Geometry: (1, 16)

visualize_activations_distributions(activations=activations,
                                    net=model, color="C0", columns=4)

32.5.6 Getting the Gradients

gradients, _ = get_gradients(net=model, fun_control=fun_control, batch_size=batch_size, device=device)
train_size: 0.36, val_size: 0.24, test_sie: 0.4 for splitting train & val data.
train samples: 160, val samples: 106 generated for train & val data.
LightDataModule.train_dataloader(). data_train size: 160
visualize_gradients(model, fun_control, batch_size, absolute=True, cmap="GreenYellowRed", figsize=(6, 6), device=device)
train_size: 0.36, val_size: 0.24, test_sie: 0.4 for splitting train & val data.
train samples: 160, val samples: 106 generated for train & val data.
LightDataModule.train_dataloader(). data_train size: 160
1280 values in Layer layers.0.weight. Geometry: (128, 10)

8192 values in Layer layers.3.weight. Geometry: (64, 128)

4096 values in Layer layers.6.weight. Geometry: (64, 64)

2048 values in Layer layers.9.weight. Geometry: (32, 64)

1024 values in Layer layers.12.weight. Geometry: (32, 32)

1024 values in Layer layers.15.weight. Geometry: (32, 32)

512 values in Layer layers.18.weight. Geometry: (16, 32)

256 values in Layer layers.21.weight. Geometry: (16, 16)

256 values in Layer layers.24.weight. Geometry: (16, 16)

256 values in Layer layers.27.weight. Geometry: (16, 16)

16 values in Layer layers.30.weight. Geometry: (1, 16)

visualize_gradient_distributions(model, fun_control, batch_size=batch_size, color=f"C{0}", device=device, columns=3)
train_size: 0.36, val_size: 0.24, test_sie: 0.4 for splitting train & val data.
train samples: 160, val samples: 106 generated for train & val data.
LightDataModule.train_dataloader(). data_train size: 160
n:11

32.6 Feature Attributions

32.6.1 Integrated Gradients

from spotpython.plot.xai import get_attributions, plot_attributions
df_att = get_attributions(spot_tuner, fun_control, attr_method="IntegratedGradients", n_rel=10)
plot_attributions(df_att, attr_method="IntegratedGradients")
train_model result: {'val_loss': 3005.481201171875, 'hp_metric': 3005.481201171875}
config: {'l1': 128, 'epochs': 4096, 'batch_size': 2048, 'act_fn': ReLU(), 'optimizer': 'Adam', 'dropout_prob': 0.012934884962293584, 'lr_mult': 3.562805588531447, 'patience': 32, 'batch_norm': False, 'initialization': 'kaiming_normal'}
Loading model with 128_4096_2048_ReLU_Adam_0.0129_3.5628_32_False_kaiming_normal_TRAIN from runs/saved_models/128_4096_2048_ReLU_Adam_0.0129_3.5628_32_False_kaiming_normal_TRAIN/last.ckpt
Model: NNLinearRegressor(
  (layers): Sequential(
    (0): Linear(in_features=10, out_features=128, bias=True)
    (1): ReLU()
    (2): Dropout(p=0.012934884962293584, inplace=False)
    (3): Linear(in_features=128, out_features=64, bias=True)
    (4): ReLU()
    (5): Dropout(p=0.012934884962293584, inplace=False)
    (6): Linear(in_features=64, out_features=64, bias=True)
    (7): ReLU()
    (8): Dropout(p=0.012934884962293584, inplace=False)
    (9): Linear(in_features=64, out_features=32, bias=True)
    (10): ReLU()
    (11): Dropout(p=0.012934884962293584, inplace=False)
    (12): Linear(in_features=32, out_features=32, bias=True)
    (13): ReLU()
    (14): Dropout(p=0.012934884962293584, inplace=False)
    (15): Linear(in_features=32, out_features=32, bias=True)
    (16): ReLU()
    (17): Dropout(p=0.012934884962293584, inplace=False)
    (18): Linear(in_features=32, out_features=16, bias=True)
    (19): ReLU()
    (20): Dropout(p=0.012934884962293584, inplace=False)
    (21): Linear(in_features=16, out_features=16, bias=True)
    (22): ReLU()
    (23): Dropout(p=0.012934884962293584, inplace=False)
    (24): Linear(in_features=16, out_features=16, bias=True)
    (25): ReLU()
    (26): Dropout(p=0.012934884962293584, inplace=False)
    (27): Linear(in_features=16, out_features=16, bias=True)
    (28): ReLU()
    (29): Dropout(p=0.012934884962293584, inplace=False)
    (30): Linear(in_features=16, out_features=1, bias=True)
  )
)
train_size: 0.36, val_size: 0.24, test_sie: 0.4 for splitting test data.
test samples: 177 generated for test data.
LightDataModule.test_dataloader(). Test set size: 177

32.6.2 Deep Lift

df_lift = get_attributions(spot_tuner, fun_control, attr_method="DeepLift",n_rel=10)
print(df_lift)
plot_attributions(df_lift,  attr_method="DeepLift")
train_model result: {'val_loss': 3079.11669921875, 'hp_metric': 3079.11669921875}
config: {'l1': 128, 'epochs': 4096, 'batch_size': 2048, 'act_fn': ReLU(), 'optimizer': 'Adam', 'dropout_prob': 0.012934884962293584, 'lr_mult': 3.562805588531447, 'patience': 32, 'batch_norm': False, 'initialization': 'kaiming_normal'}
Loading model with 128_4096_2048_ReLU_Adam_0.0129_3.5628_32_False_kaiming_normal_TRAIN from runs/saved_models/128_4096_2048_ReLU_Adam_0.0129_3.5628_32_False_kaiming_normal_TRAIN/last.ckpt
Model: NNLinearRegressor(
  (layers): Sequential(
    (0): Linear(in_features=10, out_features=128, bias=True)
    (1): ReLU()
    (2): Dropout(p=0.012934884962293584, inplace=False)
    (3): Linear(in_features=128, out_features=64, bias=True)
    (4): ReLU()
    (5): Dropout(p=0.012934884962293584, inplace=False)
    (6): Linear(in_features=64, out_features=64, bias=True)
    (7): ReLU()
    (8): Dropout(p=0.012934884962293584, inplace=False)
    (9): Linear(in_features=64, out_features=32, bias=True)
    (10): ReLU()
    (11): Dropout(p=0.012934884962293584, inplace=False)
    (12): Linear(in_features=32, out_features=32, bias=True)
    (13): ReLU()
    (14): Dropout(p=0.012934884962293584, inplace=False)
    (15): Linear(in_features=32, out_features=32, bias=True)
    (16): ReLU()
    (17): Dropout(p=0.012934884962293584, inplace=False)
    (18): Linear(in_features=32, out_features=16, bias=True)
    (19): ReLU()
    (20): Dropout(p=0.012934884962293584, inplace=False)
    (21): Linear(in_features=16, out_features=16, bias=True)
    (22): ReLU()
    (23): Dropout(p=0.012934884962293584, inplace=False)
    (24): Linear(in_features=16, out_features=16, bias=True)
    (25): ReLU()
    (26): Dropout(p=0.012934884962293584, inplace=False)
    (27): Linear(in_features=16, out_features=16, bias=True)
    (28): ReLU()
    (29): Dropout(p=0.012934884962293584, inplace=False)
    (30): Linear(in_features=16, out_features=1, bias=True)
  )
)
train_size: 0.36, val_size: 0.24, test_sie: 0.4 for splitting test data.
test samples: 177 generated for test data.
LightDataModule.test_dataloader(). Test set size: 177
   Feature Index Feature  DeepLiftAttribution
0              3      bp           169.243088
1              2     bmi           167.737442
2              8  s5_ltg           130.366440
3              6  s3_hdl           124.848328
4              0     age           121.875526
5              9  s6_glu           116.520004
6              1     sex           108.987038
7              4   s1_tc            92.232819
8              5  s2_ldl            77.901260
9              7  s4_tch            64.186813

32.6.3 Feature Ablation

df_fl = get_attributions(spot_tuner, fun_control, attr_method="FeatureAblation",n_rel=10)
train_model result: {'val_loss': 3069.948486328125, 'hp_metric': 3069.948486328125}
config: {'l1': 128, 'epochs': 4096, 'batch_size': 2048, 'act_fn': ReLU(), 'optimizer': 'Adam', 'dropout_prob': 0.012934884962293584, 'lr_mult': 3.562805588531447, 'patience': 32, 'batch_norm': False, 'initialization': 'kaiming_normal'}
Loading model with 128_4096_2048_ReLU_Adam_0.0129_3.5628_32_False_kaiming_normal_TRAIN from runs/saved_models/128_4096_2048_ReLU_Adam_0.0129_3.5628_32_False_kaiming_normal_TRAIN/last.ckpt
Model: NNLinearRegressor(
  (layers): Sequential(
    (0): Linear(in_features=10, out_features=128, bias=True)
    (1): ReLU()
    (2): Dropout(p=0.012934884962293584, inplace=False)
    (3): Linear(in_features=128, out_features=64, bias=True)
    (4): ReLU()
    (5): Dropout(p=0.012934884962293584, inplace=False)
    (6): Linear(in_features=64, out_features=64, bias=True)
    (7): ReLU()
    (8): Dropout(p=0.012934884962293584, inplace=False)
    (9): Linear(in_features=64, out_features=32, bias=True)
    (10): ReLU()
    (11): Dropout(p=0.012934884962293584, inplace=False)
    (12): Linear(in_features=32, out_features=32, bias=True)
    (13): ReLU()
    (14): Dropout(p=0.012934884962293584, inplace=False)
    (15): Linear(in_features=32, out_features=32, bias=True)
    (16): ReLU()
    (17): Dropout(p=0.012934884962293584, inplace=False)
    (18): Linear(in_features=32, out_features=16, bias=True)
    (19): ReLU()
    (20): Dropout(p=0.012934884962293584, inplace=False)
    (21): Linear(in_features=16, out_features=16, bias=True)
    (22): ReLU()
    (23): Dropout(p=0.012934884962293584, inplace=False)
    (24): Linear(in_features=16, out_features=16, bias=True)
    (25): ReLU()
    (26): Dropout(p=0.012934884962293584, inplace=False)
    (27): Linear(in_features=16, out_features=16, bias=True)
    (28): ReLU()
    (29): Dropout(p=0.012934884962293584, inplace=False)
    (30): Linear(in_features=16, out_features=1, bias=True)
  )
)
train_size: 0.36, val_size: 0.24, test_sie: 0.4 for splitting test data.
test samples: 177 generated for test data.
LightDataModule.test_dataloader(). Test set size: 177
print(df_fl)
plot_attributions(df_fl, attr_method="FeatureAblation")
   Feature Index Feature  FeatureAblationAttribution
0              3      bp                  109.434898
1              2     bmi                  101.225060
2              8  s5_ltg                   80.536064
3              0     age                   74.002365
4              6  s3_hdl                   68.412117
5              9  s6_glu                   58.961720
6              1     sex                   56.951256
7              4   s1_tc                   39.758644
8              5  s2_ldl                   36.430054
9              7  s4_tch                    8.257231

32.7 Conductance

from spotpython.plot.xai import plot_conductance_last_layer, get_weights_conductance_last_layer
weights_last, layer_conductance_last = get_weights_conductance_last_layer(spot_tuner, fun_control)
plot_conductance_last_layer(weights_last, layer_conductance_last, figsize=(6, 6))
train_model result: {'val_loss': 2889.78662109375, 'hp_metric': 2889.78662109375}
config: {'l1': 128, 'epochs': 4096, 'batch_size': 2048, 'act_fn': ReLU(), 'optimizer': 'Adam', 'dropout_prob': 0.012934884962293584, 'lr_mult': 3.562805588531447, 'patience': 32, 'batch_norm': False, 'initialization': 'kaiming_normal'}
Loading model with 128_4096_2048_ReLU_Adam_0.0129_3.5628_32_False_kaiming_normal_TRAIN from runs/saved_models/128_4096_2048_ReLU_Adam_0.0129_3.5628_32_False_kaiming_normal_TRAIN/last.ckpt
Model: NNLinearRegressor(
  (layers): Sequential(
    (0): Linear(in_features=10, out_features=128, bias=True)
    (1): ReLU()
    (2): Dropout(p=0.012934884962293584, inplace=False)
    (3): Linear(in_features=128, out_features=64, bias=True)
    (4): ReLU()
    (5): Dropout(p=0.012934884962293584, inplace=False)
    (6): Linear(in_features=64, out_features=64, bias=True)
    (7): ReLU()
    (8): Dropout(p=0.012934884962293584, inplace=False)
    (9): Linear(in_features=64, out_features=32, bias=True)
    (10): ReLU()
    (11): Dropout(p=0.012934884962293584, inplace=False)
    (12): Linear(in_features=32, out_features=32, bias=True)
    (13): ReLU()
    (14): Dropout(p=0.012934884962293584, inplace=False)
    (15): Linear(in_features=32, out_features=32, bias=True)
    (16): ReLU()
    (17): Dropout(p=0.012934884962293584, inplace=False)
    (18): Linear(in_features=32, out_features=16, bias=True)
    (19): ReLU()
    (20): Dropout(p=0.012934884962293584, inplace=False)
    (21): Linear(in_features=16, out_features=16, bias=True)
    (22): ReLU()
    (23): Dropout(p=0.012934884962293584, inplace=False)
    (24): Linear(in_features=16, out_features=16, bias=True)
    (25): ReLU()
    (26): Dropout(p=0.012934884962293584, inplace=False)
    (27): Linear(in_features=16, out_features=16, bias=True)
    (28): ReLU()
    (29): Dropout(p=0.012934884962293584, inplace=False)
    (30): Linear(in_features=16, out_features=1, bias=True)
  )
)
train_model result: {'val_loss': 3011.62109375, 'hp_metric': 3011.62109375}
config: {'l1': 128, 'epochs': 4096, 'batch_size': 2048, 'act_fn': ReLU(), 'optimizer': 'Adam', 'dropout_prob': 0.012934884962293584, 'lr_mult': 3.562805588531447, 'patience': 32, 'batch_norm': False, 'initialization': 'kaiming_normal'}
Loading model with 128_4096_2048_ReLU_Adam_0.0129_3.5628_32_False_kaiming_normal_TRAIN from runs/saved_models/128_4096_2048_ReLU_Adam_0.0129_3.5628_32_False_kaiming_normal_TRAIN/last.ckpt
Model: NNLinearRegressor(
  (layers): Sequential(
    (0): Linear(in_features=10, out_features=128, bias=True)
    (1): ReLU()
    (2): Dropout(p=0.012934884962293584, inplace=False)
    (3): Linear(in_features=128, out_features=64, bias=True)
    (4): ReLU()
    (5): Dropout(p=0.012934884962293584, inplace=False)
    (6): Linear(in_features=64, out_features=64, bias=True)
    (7): ReLU()
    (8): Dropout(p=0.012934884962293584, inplace=False)
    (9): Linear(in_features=64, out_features=32, bias=True)
    (10): ReLU()
    (11): Dropout(p=0.012934884962293584, inplace=False)
    (12): Linear(in_features=32, out_features=32, bias=True)
    (13): ReLU()
    (14): Dropout(p=0.012934884962293584, inplace=False)
    (15): Linear(in_features=32, out_features=32, bias=True)
    (16): ReLU()
    (17): Dropout(p=0.012934884962293584, inplace=False)
    (18): Linear(in_features=32, out_features=16, bias=True)
    (19): ReLU()
    (20): Dropout(p=0.012934884962293584, inplace=False)
    (21): Linear(in_features=16, out_features=16, bias=True)
    (22): ReLU()
    (23): Dropout(p=0.012934884962293584, inplace=False)
    (24): Linear(in_features=16, out_features=16, bias=True)
    (25): ReLU()
    (26): Dropout(p=0.012934884962293584, inplace=False)
    (27): Linear(in_features=16, out_features=16, bias=True)
    (28): ReLU()
    (29): Dropout(p=0.012934884962293584, inplace=False)
    (30): Linear(in_features=16, out_features=1, bias=True)
  )
)
Conductance analysis for layer:  Linear(in_features=16, out_features=1, bias=True)