30  Explainable AI with SpotPython and Pytorch

from spotpython.data.diabetes import Diabetes
from spotpython.hyperdict.light_hyper_dict import LightHyperDict
from spotpython.fun.hyperlight import HyperLight
from spotpython.utils.init import (fun_control_init, surrogate_control_init, design_control_init)
from spotpython.utils.eda import gen_design_table
from spotpython.spot import spot
from spotpython.utils.file import get_experiment_filename
from spotpython.hyperparameters.values import set_hyperparameter
from math import inf

PREFIX="602_5"

data_set = Diabetes()

fun_control = fun_control_init(
    save_experiment=True,
    PREFIX=PREFIX,
    fun_evals=inf,
    max_time=60,
    data_set = data_set,
    core_model_name="light.regression.NNLinearRegressor",
    hyperdict=LightHyperDict,
    _L_in=10,
    _L_out=1)

fun = HyperLight().fun


set_hyperparameter(fun_control, "optimizer", [ "Adadelta", "Adam", "Adamax"])
set_hyperparameter(fun_control, "l1", [3,7])
set_hyperparameter(fun_control, "epochs", [10,12])
set_hyperparameter(fun_control, "batch_size", [4,11])
set_hyperparameter(fun_control, "dropout_prob", [0.0, 0.025])
set_hyperparameter(fun_control, "patience", [2,9])

design_control = design_control_init(init_size=7)

spot_tuner = spot.Spot(fun=fun,fun_control=fun_control, design_control=design_control)
module_name: light
submodule_name: regression
model_name: NNLinearRegressor

30.1 Running the Hyperparameter Tuning or Loading the Existing Model

from spotpython.utils.file import get_experiment_filename, load_experiment
import os
overwrite = False
filename = get_experiment_filename(PREFIX)
if os.path.exists(filename) and not overwrite:
    (spot_tuner, fun_control, design_control,
    surrogate_control, optimizer_control) = load_experiment(filename)
else:
    print("File does not exist or overwrite is True. Starting new experiment.")
    res = spot_tuner.run()
# only needed for spotpython version < 0.16.0
fun_control.update({"_L_cond": None})
File does not exist or overwrite is True. Starting new experiment.
train_model result: {'val_loss': nan, 'hp_metric': nan}
train_model result: {'val_loss': 4332.16064453125, 'hp_metric': 4332.16064453125}
train_model result: {'val_loss': 16998.376953125, 'hp_metric': 16998.376953125}
train_model result: {'val_loss': 18447.498046875, 'hp_metric': 18447.498046875}
train_model result: {'val_loss': 2949.603759765625, 'hp_metric': 2949.603759765625}
train_model result: {'val_loss': 20462.9765625, 'hp_metric': 20462.9765625}
train_model result: {'val_loss': 5773.47705078125, 'hp_metric': 5773.47705078125}
train_model result: {'val_loss': 2926.436279296875, 'hp_metric': 2926.436279296875}
spotpython tuning: 2926.436279296875 [----------] 0.09% 
train_model result: {'val_loss': 4197.92626953125, 'hp_metric': 4197.92626953125}
spotpython tuning: 2926.436279296875 [----------] 0.42% 
train_model result: {'val_loss': 2873.895751953125, 'hp_metric': 2873.895751953125}
spotpython tuning: 2873.895751953125 [----------] 0.53% 
train_model result: {'val_loss': 2905.34326171875, 'hp_metric': 2905.34326171875}
spotpython tuning: 2873.895751953125 [----------] 0.59% 
train_model result: {'val_loss': 2972.44580078125, 'hp_metric': 2972.44580078125}
spotpython tuning: 2873.895751953125 [----------] 0.68% 
train_model result: {'val_loss': 2863.662109375, 'hp_metric': 2863.662109375}
spotpython tuning: 2863.662109375 [----------] 0.78% 
train_model result: {'val_loss': 2996.242919921875, 'hp_metric': 2996.242919921875}
spotpython tuning: 2863.662109375 [----------] 0.87% 
train_model result: {'val_loss': 3848.923583984375, 'hp_metric': 3848.923583984375}
spotpython tuning: 2863.662109375 [----------] 1.10% 
train_model result: {'val_loss': 3031.541015625, 'hp_metric': 3031.541015625}
spotpython tuning: 2863.662109375 [----------] 1.20% 
train_model result: {'val_loss': 2896.262451171875, 'hp_metric': 2896.262451171875}
spotpython tuning: 2863.662109375 [----------] 1.35% 
train_model result: {'val_loss': 3095.585205078125, 'hp_metric': 3095.585205078125}
spotpython tuning: 2863.662109375 [----------] 1.42% 
train_model result: {'val_loss': 2856.231201171875, 'hp_metric': 2856.231201171875}
spotpython tuning: 2856.231201171875 [----------] 1.52% 
train_model result: {'val_loss': 2854.146728515625, 'hp_metric': 2854.146728515625}
spotpython tuning: 2854.146728515625 [----------] 1.62% 
train_model result: {'val_loss': 2960.21142578125, 'hp_metric': 2960.21142578125}
spotpython tuning: 2854.146728515625 [----------] 1.74% 
train_model result: {'val_loss': 3185.703369140625, 'hp_metric': 3185.703369140625}
spotpython tuning: 2854.146728515625 [----------] 1.88% 
train_model result: {'val_loss': 2918.46533203125, 'hp_metric': 2918.46533203125}
spotpython tuning: 2854.146728515625 [----------] 2.06% 
train_model result: {'val_loss': 2939.958251953125, 'hp_metric': 2939.958251953125}
spotpython tuning: 2854.146728515625 [----------] 2.20% 
train_model result: {'val_loss': 2921.4833984375, 'hp_metric': 2921.4833984375}
spotpython tuning: 2854.146728515625 [----------] 2.31% 
train_model result: {'val_loss': 2783.556640625, 'hp_metric': 2783.556640625}
spotpython tuning: 2783.556640625 [----------] 2.46% 
train_model result: {'val_loss': 2789.95751953125, 'hp_metric': 2789.95751953125}
spotpython tuning: 2783.556640625 [----------] 2.59% 
train_model result: {'val_loss': 2868.898681640625, 'hp_metric': 2868.898681640625}
spotpython tuning: 2783.556640625 [----------] 2.75% 
train_model result: {'val_loss': 2853.538818359375, 'hp_metric': 2853.538818359375}
spotpython tuning: 2783.556640625 [----------] 2.91% 
train_model result: {'val_loss': 2848.90673828125, 'hp_metric': 2848.90673828125}
spotpython tuning: 2783.556640625 [----------] 3.14% 
train_model result: {'val_loss': 2903.489501953125, 'hp_metric': 2903.489501953125}
spotpython tuning: 2783.556640625 [----------] 3.47% 
train_model result: {'val_loss': 3022.442626953125, 'hp_metric': 3022.442626953125}
spotpython tuning: 2783.556640625 [----------] 3.75% 
train_model result: {'val_loss': 6122.0107421875, 'hp_metric': 6122.0107421875}
spotpython tuning: 2783.556640625 [----------] 4.37% 
train_model result: {'val_loss': nan, 'hp_metric': nan}
train_model result: {'val_loss': 6152.3798828125, 'hp_metric': 6152.3798828125}
spotpython tuning: 2783.556640625 [#---------] 5.14% 
train_model result: {'val_loss': 3360.380859375, 'hp_metric': 3360.380859375}
spotpython tuning: 2783.556640625 [#---------] 5.34% 
train_model result: {'val_loss': 3182.912841796875, 'hp_metric': 3182.912841796875}
spotpython tuning: 2783.556640625 [#---------] 5.51% 
train_model result: {'val_loss': 2987.604248046875, 'hp_metric': 2987.604248046875}
spotpython tuning: 2783.556640625 [#---------] 5.69% 
train_model result: {'val_loss': 2914.2666015625, 'hp_metric': 2914.2666015625}
spotpython tuning: 2783.556640625 [#---------] 5.85% 
train_model result: {'val_loss': 2920.47021484375, 'hp_metric': 2920.47021484375}
spotpython tuning: 2783.556640625 [#---------] 5.99% 
train_model result: {'val_loss': 18427.5390625, 'hp_metric': 18427.5390625}
spotpython tuning: 2783.556640625 [#---------] 9.18% 
train_model result: {'val_loss': 2936.086181640625, 'hp_metric': 2936.086181640625}
spotpython tuning: 2783.556640625 [#---------] 9.34% 
train_model result: {'val_loss': 2974.59521484375, 'hp_metric': 2974.59521484375}
spotpython tuning: 2783.556640625 [#---------] 9.58% 
train_model result: {'val_loss': 2882.681396484375, 'hp_metric': 2882.681396484375}
spotpython tuning: 2783.556640625 [#---------] 9.75% 
train_model result: {'val_loss': 2850.820556640625, 'hp_metric': 2850.820556640625}
spotpython tuning: 2783.556640625 [#---------] 10.01% 
train_model result: {'val_loss': 2880.844970703125, 'hp_metric': 2880.844970703125}
spotpython tuning: 2783.556640625 [#---------] 10.30% 
train_model result: {'val_loss': 2839.0478515625, 'hp_metric': 2839.0478515625}
spotpython tuning: 2783.556640625 [#---------] 10.57% 
train_model result: {'val_loss': 3002.833251953125, 'hp_metric': 3002.833251953125}
spotpython tuning: 2783.556640625 [#---------] 10.77% 
train_model result: {'val_loss': 2813.8037109375, 'hp_metric': 2813.8037109375}
spotpython tuning: 2783.556640625 [#---------] 10.96% 
train_model result: {'val_loss': 2840.98828125, 'hp_metric': 2840.98828125}
spotpython tuning: 2783.556640625 [#---------] 11.14% 
train_model result: {'val_loss': 2971.65380859375, 'hp_metric': 2971.65380859375}
spotpython tuning: 2783.556640625 [#---------] 11.31% 
train_model result: {'val_loss': 2759.4755859375, 'hp_metric': 2759.4755859375}
spotpython tuning: 2759.4755859375 [#---------] 11.52% 
train_model result: {'val_loss': 2980.46826171875, 'hp_metric': 2980.46826171875}
spotpython tuning: 2759.4755859375 [#---------] 11.73% 
train_model result: {'val_loss': 2957.512939453125, 'hp_metric': 2957.512939453125}
spotpython tuning: 2759.4755859375 [#---------] 11.92% 
train_model result: {'val_loss': 2874.155029296875, 'hp_metric': 2874.155029296875}
spotpython tuning: 2759.4755859375 [#---------] 12.15% 
train_model result: {'val_loss': 2881.974609375, 'hp_metric': 2881.974609375}
spotpython tuning: 2759.4755859375 [#---------] 12.39% 
train_model result: {'val_loss': 2782.873291015625, 'hp_metric': 2782.873291015625}
spotpython tuning: 2759.4755859375 [#---------] 12.58% 
train_model result: {'val_loss': 2835.14501953125, 'hp_metric': 2835.14501953125}
spotpython tuning: 2759.4755859375 [#---------] 12.80% 
train_model result: {'val_loss': 3097.846923828125, 'hp_metric': 3097.846923828125}
spotpython tuning: 2759.4755859375 [#---------] 12.99% 
train_model result: {'val_loss': 4274.876953125, 'hp_metric': 4274.876953125}
spotpython tuning: 2759.4755859375 [#---------] 13.61% 
train_model result: {'val_loss': 3353.592041015625, 'hp_metric': 3353.592041015625}
spotpython tuning: 2759.4755859375 [#---------] 14.25% 
train_model result: {'val_loss': 2901.211181640625, 'hp_metric': 2901.211181640625}
spotpython tuning: 2759.4755859375 [#---------] 14.56% 
train_model result: {'val_loss': 2858.99951171875, 'hp_metric': 2858.99951171875}
spotpython tuning: 2759.4755859375 [#---------] 14.78% 
train_model result: {'val_loss': 3210.951171875, 'hp_metric': 3210.951171875}
spotpython tuning: 2759.4755859375 [#---------] 14.99% 
train_model result: {'val_loss': 3074.379638671875, 'hp_metric': 3074.379638671875}
spotpython tuning: 2759.4755859375 [##--------] 15.22% 
train_model result: {'val_loss': 3420.569091796875, 'hp_metric': 3420.569091796875}
spotpython tuning: 2759.4755859375 [##--------] 15.45% 
train_model result: {'val_loss': 3189.29833984375, 'hp_metric': 3189.29833984375}
spotpython tuning: 2759.4755859375 [##--------] 15.68% 
train_model result: {'val_loss': 3391.440185546875, 'hp_metric': 3391.440185546875}
spotpython tuning: 2759.4755859375 [##--------] 15.89% 
train_model result: {'val_loss': 2973.6806640625, 'hp_metric': 2973.6806640625}
spotpython tuning: 2759.4755859375 [##--------] 16.13% 
train_model result: {'val_loss': 3030.2880859375, 'hp_metric': 3030.2880859375}
spotpython tuning: 2759.4755859375 [##--------] 16.89% 
train_model result: {'val_loss': 3203.175048828125, 'hp_metric': 3203.175048828125}
spotpython tuning: 2759.4755859375 [##--------] 17.09% 
train_model result: {'val_loss': 3137.99951171875, 'hp_metric': 3137.99951171875}
spotpython tuning: 2759.4755859375 [##--------] 17.33% 
train_model result: {'val_loss': 2975.315673828125, 'hp_metric': 2975.315673828125}
spotpython tuning: 2759.4755859375 [##--------] 17.58% 
train_model result: {'val_loss': 4828.265625, 'hp_metric': 4828.265625}
spotpython tuning: 2759.4755859375 [##--------] 17.82% 
train_model result: {'val_loss': 2889.917724609375, 'hp_metric': 2889.917724609375}
spotpython tuning: 2759.4755859375 [##--------] 17.99% 
train_model result: {'val_loss': 2801.740234375, 'hp_metric': 2801.740234375}
spotpython tuning: 2759.4755859375 [##--------] 18.20% 
train_model result: {'val_loss': 3880.754638671875, 'hp_metric': 3880.754638671875}
spotpython tuning: 2759.4755859375 [##--------] 18.68% 
train_model result: {'val_loss': 3008.704345703125, 'hp_metric': 3008.704345703125}
spotpython tuning: 2759.4755859375 [##--------] 18.84% 
train_model result: {'val_loss': 3006.8291015625, 'hp_metric': 3006.8291015625}
spotpython tuning: 2759.4755859375 [##--------] 19.04% 
train_model result: {'val_loss': 4436.16845703125, 'hp_metric': 4436.16845703125}
spotpython tuning: 2759.4755859375 [##--------] 19.55% 
train_model result: {'val_loss': 2909.662353515625, 'hp_metric': 2909.662353515625}
spotpython tuning: 2759.4755859375 [##--------] 19.79% 
train_model result: {'val_loss': 3022.206298828125, 'hp_metric': 3022.206298828125}
spotpython tuning: 2759.4755859375 [##--------] 20.07% 
train_model result: {'val_loss': 2922.756591796875, 'hp_metric': 2922.756591796875}
spotpython tuning: 2759.4755859375 [##--------] 20.34% 
train_model result: {'val_loss': 3235.537109375, 'hp_metric': 3235.537109375}
spotpython tuning: 2759.4755859375 [##--------] 20.53% 
train_model result: {'val_loss': 4844.90673828125, 'hp_metric': 4844.90673828125}
spotpython tuning: 2759.4755859375 [##--------] 20.72% 
train_model result: {'val_loss': 3123.95361328125, 'hp_metric': 3123.95361328125}
spotpython tuning: 2759.4755859375 [##--------] 20.94% 
train_model result: {'val_loss': 2977.15185546875, 'hp_metric': 2977.15185546875}
spotpython tuning: 2759.4755859375 [##--------] 21.21% 
train_model result: {'val_loss': 3351.38623046875, 'hp_metric': 3351.38623046875}
spotpython tuning: 2759.4755859375 [##--------] 21.51% 
train_model result: {'val_loss': 2953.73828125, 'hp_metric': 2953.73828125}
spotpython tuning: 2759.4755859375 [##--------] 22.42% 
train_model result: {'val_loss': 4516.4833984375, 'hp_metric': 4516.4833984375}
spotpython tuning: 2759.4755859375 [##--------] 22.79% 
train_model result: {'val_loss': 2889.399658203125, 'hp_metric': 2889.399658203125}
spotpython tuning: 2759.4755859375 [##--------] 23.27% 
train_model result: {'val_loss': 3245.837646484375, 'hp_metric': 3245.837646484375}
spotpython tuning: 2759.4755859375 [##--------] 23.54% 
train_model result: {'val_loss': 3065.793701171875, 'hp_metric': 3065.793701171875}
spotpython tuning: 2759.4755859375 [##--------] 23.81% 
train_model result: {'val_loss': 2924.618896484375, 'hp_metric': 2924.618896484375}
spotpython tuning: 2759.4755859375 [##--------] 24.22% 
train_model result: {'val_loss': 4435.224609375, 'hp_metric': 4435.224609375}
spotpython tuning: 2759.4755859375 [##--------] 24.53% 
train_model result: {'val_loss': 3581.068359375, 'hp_metric': 3581.068359375}
spotpython tuning: 2759.4755859375 [##--------] 24.97% 
train_model result: {'val_loss': 3850.2587890625, 'hp_metric': 3850.2587890625}
spotpython tuning: 2759.4755859375 [###-------] 25.46% 
train_model result: {'val_loss': 3017.869873046875, 'hp_metric': 3017.869873046875}
spotpython tuning: 2759.4755859375 [###-------] 26.21% 
train_model result: {'val_loss': 4472.35302734375, 'hp_metric': 4472.35302734375}
spotpython tuning: 2759.4755859375 [###-------] 26.71% 
train_model result: {'val_loss': nan, 'hp_metric': nan}
train_model result: {'val_loss': 5800.82763671875, 'hp_metric': 5800.82763671875}
spotpython tuning: 2759.4755859375 [###-------] 27.22% 
train_model result: {'val_loss': 4274.35107421875, 'hp_metric': 4274.35107421875}
spotpython tuning: 2759.4755859375 [###-------] 27.59% 
train_model result: {'val_loss': 3202.145751953125, 'hp_metric': 3202.145751953125}
spotpython tuning: 2759.4755859375 [###-------] 28.22% 
train_model result: {'val_loss': 3721.78955078125, 'hp_metric': 3721.78955078125}
spotpython tuning: 2759.4755859375 [###-------] 28.73% 
train_model result: {'val_loss': 3440.068603515625, 'hp_metric': 3440.068603515625}
spotpython tuning: 2759.4755859375 [###-------] 29.03% 
train_model result: {'val_loss': 3296.788818359375, 'hp_metric': 3296.788818359375}
spotpython tuning: 2759.4755859375 [###-------] 29.48% 
train_model result: {'val_loss': 4306.53662109375, 'hp_metric': 4306.53662109375}
spotpython tuning: 2759.4755859375 [###-------] 29.62% 
train_model result: {'val_loss': 2933.666259765625, 'hp_metric': 2933.666259765625}
spotpython tuning: 2759.4755859375 [###-------] 29.83% 
train_model result: {'val_loss': 3149.304931640625, 'hp_metric': 3149.304931640625}
spotpython tuning: 2759.4755859375 [###-------] 30.09% 
train_model result: {'val_loss': 2968.4130859375, 'hp_metric': 2968.4130859375}
spotpython tuning: 2759.4755859375 [###-------] 30.28% 
train_model result: {'val_loss': 2944.82763671875, 'hp_metric': 2944.82763671875}
spotpython tuning: 2759.4755859375 [###-------] 30.59% 
train_model result: {'val_loss': 3196.100830078125, 'hp_metric': 3196.100830078125}
spotpython tuning: 2759.4755859375 [###-------] 30.90% 
train_model result: {'val_loss': 3056.529541015625, 'hp_metric': 3056.529541015625}
spotpython tuning: 2759.4755859375 [###-------] 31.79% 
train_model result: {'val_loss': 3116.07275390625, 'hp_metric': 3116.07275390625}
spotpython tuning: 2759.4755859375 [###-------] 32.04% 
train_model result: {'val_loss': 2832.96923828125, 'hp_metric': 2832.96923828125}
spotpython tuning: 2759.4755859375 [###-------] 32.85% 
train_model result: {'val_loss': 3247.28125, 'hp_metric': 3247.28125}
spotpython tuning: 2759.4755859375 [###-------] 33.10% 
train_model result: {'val_loss': 4068.393798828125, 'hp_metric': 4068.393798828125}
spotpython tuning: 2759.4755859375 [###-------] 33.41% 
train_model result: {'val_loss': 2879.744140625, 'hp_metric': 2879.744140625}
spotpython tuning: 2759.4755859375 [###-------] 33.60% 
train_model result: {'val_loss': 3227.59912109375, 'hp_metric': 3227.59912109375}
spotpython tuning: 2759.4755859375 [###-------] 33.79% 
train_model result: {'val_loss': 3103.633056640625, 'hp_metric': 3103.633056640625}
spotpython tuning: 2759.4755859375 [###-------] 34.06% 
train_model result: {'val_loss': 2932.197021484375, 'hp_metric': 2932.197021484375}
spotpython tuning: 2759.4755859375 [###-------] 34.31% 
train_model result: {'val_loss': 2877.7587890625, 'hp_metric': 2877.7587890625}
spotpython tuning: 2759.4755859375 [###-------] 34.51% 
train_model result: {'val_loss': 2894.792236328125, 'hp_metric': 2894.792236328125}
spotpython tuning: 2759.4755859375 [###-------] 34.98% 
train_model result: {'val_loss': 2845.106201171875, 'hp_metric': 2845.106201171875}
spotpython tuning: 2759.4755859375 [####------] 35.30% 
train_model result: {'val_loss': 4844.88232421875, 'hp_metric': 4844.88232421875}
spotpython tuning: 2759.4755859375 [####------] 35.67% 
train_model result: {'val_loss': 3029.760498046875, 'hp_metric': 3029.760498046875}
spotpython tuning: 2759.4755859375 [####------] 35.88% 
train_model result: {'val_loss': 10427.0302734375, 'hp_metric': 10427.0302734375}
spotpython tuning: 2759.4755859375 [####------] 36.05% 
train_model result: {'val_loss': 3238.063720703125, 'hp_metric': 3238.063720703125}
spotpython tuning: 2759.4755859375 [####------] 36.21% 
train_model result: {'val_loss': 3059.281494140625, 'hp_metric': 3059.281494140625}
spotpython tuning: 2759.4755859375 [####------] 36.41% 
train_model result: {'val_loss': 3125.760009765625, 'hp_metric': 3125.760009765625}
spotpython tuning: 2759.4755859375 [####------] 37.22% 
train_model result: {'val_loss': 3044.927001953125, 'hp_metric': 3044.927001953125}
spotpython tuning: 2759.4755859375 [####------] 37.43% 
train_model result: {'val_loss': 3221.887451171875, 'hp_metric': 3221.887451171875}
spotpython tuning: 2759.4755859375 [####------] 38.20% 
train_model result: {'val_loss': 3982.82421875, 'hp_metric': 3982.82421875}
spotpython tuning: 2759.4755859375 [####------] 38.56% 
train_model result: {'val_loss': 4345.32177734375, 'hp_metric': 4345.32177734375}
spotpython tuning: 2759.4755859375 [####------] 38.93% 
train_model result: {'val_loss': 3355.024169921875, 'hp_metric': 3355.024169921875}
spotpython tuning: 2759.4755859375 [####------] 39.50% 
train_model result: {'val_loss': 3075.288330078125, 'hp_metric': 3075.288330078125}
spotpython tuning: 2759.4755859375 [####------] 39.70% 
train_model result: {'val_loss': 3064.3623046875, 'hp_metric': 3064.3623046875}
spotpython tuning: 2759.4755859375 [####------] 39.96% 
train_model result: {'val_loss': 2967.67138671875, 'hp_metric': 2967.67138671875}
spotpython tuning: 2759.4755859375 [####------] 40.22% 
train_model result: {'val_loss': 3083.854736328125, 'hp_metric': 3083.854736328125}
spotpython tuning: 2759.4755859375 [####------] 40.39% 
train_model result: {'val_loss': 4929.30859375, 'hp_metric': 4929.30859375}
spotpython tuning: 2759.4755859375 [####------] 40.72% 
train_model result: {'val_loss': 2925.93603515625, 'hp_metric': 2925.93603515625}
spotpython tuning: 2759.4755859375 [####------] 41.03% 
train_model result: {'val_loss': 3648.805419921875, 'hp_metric': 3648.805419921875}
spotpython tuning: 2759.4755859375 [####------] 41.47% 
train_model result: {'val_loss': 3016.84130859375, 'hp_metric': 3016.84130859375}
spotpython tuning: 2759.4755859375 [####------] 42.26% 
train_model result: {'val_loss': 2914.524658203125, 'hp_metric': 2914.524658203125}
spotpython tuning: 2759.4755859375 [####------] 42.52% 
train_model result: {'val_loss': 3139.987548828125, 'hp_metric': 3139.987548828125}
spotpython tuning: 2759.4755859375 [####------] 42.76% 
train_model result: {'val_loss': 2848.1591796875, 'hp_metric': 2848.1591796875}
spotpython tuning: 2759.4755859375 [####------] 43.14% 
train_model result: {'val_loss': 3277.767333984375, 'hp_metric': 3277.767333984375}
spotpython tuning: 2759.4755859375 [####------] 43.36% 
train_model result: {'val_loss': 3115.92529296875, 'hp_metric': 3115.92529296875}
spotpython tuning: 2759.4755859375 [####------] 43.65% 
train_model result: {'val_loss': 3241.60546875, 'hp_metric': 3241.60546875}
spotpython tuning: 2759.4755859375 [####------] 44.22% 
train_model result: {'val_loss': 3625.15380859375, 'hp_metric': 3625.15380859375}
spotpython tuning: 2759.4755859375 [####------] 44.66% 
train_model result: {'val_loss': 2901.837158203125, 'hp_metric': 2901.837158203125}
spotpython tuning: 2759.4755859375 [####------] 44.86% 
train_model result: {'val_loss': 3119.419921875, 'hp_metric': 3119.419921875}
spotpython tuning: 2759.4755859375 [#####-----] 46.02% 
train_model result: {'val_loss': 4571.62890625, 'hp_metric': 4571.62890625}
spotpython tuning: 2759.4755859375 [#####-----] 46.65% 
train_model result: {'val_loss': 3275.287841796875, 'hp_metric': 3275.287841796875}
spotpython tuning: 2759.4755859375 [#####-----] 46.80% 
train_model result: {'val_loss': 2933.600341796875, 'hp_metric': 2933.600341796875}
spotpython tuning: 2759.4755859375 [#####-----] 46.98% 
train_model result: {'val_loss': 2806.59814453125, 'hp_metric': 2806.59814453125}
spotpython tuning: 2759.4755859375 [#####-----] 47.18% 
train_model result: {'val_loss': 3188.509521484375, 'hp_metric': 3188.509521484375}
spotpython tuning: 2759.4755859375 [#####-----] 48.25% 
train_model result: {'val_loss': 4513.62744140625, 'hp_metric': 4513.62744140625}
spotpython tuning: 2759.4755859375 [#####-----] 48.59% 
train_model result: {'val_loss': 4032.038818359375, 'hp_metric': 4032.038818359375}
spotpython tuning: 2759.4755859375 [#####-----] 48.99% 
train_model result: {'val_loss': 3339.339599609375, 'hp_metric': 3339.339599609375}
spotpython tuning: 2759.4755859375 [#####-----] 49.19% 
train_model result: {'val_loss': 2900.001220703125, 'hp_metric': 2900.001220703125}
spotpython tuning: 2759.4755859375 [#####-----] 49.43% 
train_model result: {'val_loss': 2889.6728515625, 'hp_metric': 2889.6728515625}
spotpython tuning: 2759.4755859375 [#####-----] 50.56% 
train_model result: {'val_loss': 2872.426025390625, 'hp_metric': 2872.426025390625}
spotpython tuning: 2759.4755859375 [#####-----] 51.22% 
train_model result: {'val_loss': 3407.701416015625, 'hp_metric': 3407.701416015625}
spotpython tuning: 2759.4755859375 [#####-----] 51.64% 
train_model result: {'val_loss': 4527.54638671875, 'hp_metric': 4527.54638671875}
spotpython tuning: 2759.4755859375 [#####-----] 51.90% 
train_model result: {'val_loss': nan, 'hp_metric': nan}
train_model result: {'val_loss': 5010.5634765625, 'hp_metric': 5010.5634765625}
spotpython tuning: 2759.4755859375 [#####-----] 53.49% 
train_model result: {'val_loss': 4711.5595703125, 'hp_metric': 4711.5595703125}
spotpython tuning: 2759.4755859375 [#####-----] 53.75% 
train_model result: {'val_loss': 3212.814208984375, 'hp_metric': 3212.814208984375}
spotpython tuning: 2759.4755859375 [######----] 56.53% 
train_model result: {'val_loss': 3383.23388671875, 'hp_metric': 3383.23388671875}
spotpython tuning: 2759.4755859375 [######----] 56.72% 
train_model result: {'val_loss': 3516.086669921875, 'hp_metric': 3516.086669921875}
spotpython tuning: 2759.4755859375 [######----] 56.90% 
train_model result: {'val_loss': 2802.86328125, 'hp_metric': 2802.86328125}
spotpython tuning: 2759.4755859375 [######----] 57.09% 
train_model result: {'val_loss': 3720.837646484375, 'hp_metric': 3720.837646484375}
spotpython tuning: 2759.4755859375 [######----] 57.90% 
train_model result: {'val_loss': 3006.5810546875, 'hp_metric': 3006.5810546875}
spotpython tuning: 2759.4755859375 [######----] 58.17% 
train_model result: {'val_loss': 2872.105224609375, 'hp_metric': 2872.105224609375}
spotpython tuning: 2759.4755859375 [######----] 58.35% 
train_model result: {'val_loss': 3038.525390625, 'hp_metric': 3038.525390625}
spotpython tuning: 2759.4755859375 [######----] 59.54% 
train_model result: {'val_loss': 3299.444091796875, 'hp_metric': 3299.444091796875}
spotpython tuning: 2759.4755859375 [######----] 59.71% 
train_model result: {'val_loss': 24081.34375, 'hp_metric': 24081.34375}
spotpython tuning: 2759.4755859375 [######----] 59.92% 
train_model result: {'val_loss': 2901.78173828125, 'hp_metric': 2901.78173828125}
spotpython tuning: 2759.4755859375 [######----] 60.13% 
train_model result: {'val_loss': 2941.851806640625, 'hp_metric': 2941.851806640625}
spotpython tuning: 2759.4755859375 [######----] 61.17% 
train_model result: {'val_loss': 5432.60986328125, 'hp_metric': 5432.60986328125}
spotpython tuning: 2759.4755859375 [######----] 61.59% 
train_model result: {'val_loss': 5350.40625, 'hp_metric': 5350.40625}
spotpython tuning: 2759.4755859375 [######----] 61.74% 
train_model result: {'val_loss': nan, 'hp_metric': nan}
train_model result: {'val_loss': 4499.62744140625, 'hp_metric': 4499.62744140625}
spotpython tuning: 2759.4755859375 [######----] 63.10% 
train_model result: {'val_loss': 2864.1220703125, 'hp_metric': 2864.1220703125}
spotpython tuning: 2759.4755859375 [######----] 63.71% 
train_model result: {'val_loss': 4759.130859375, 'hp_metric': 4759.130859375}
spotpython tuning: 2759.4755859375 [######----] 64.27% 
train_model result: {'val_loss': 3610.442626953125, 'hp_metric': 3610.442626953125}
spotpython tuning: 2759.4755859375 [######----] 64.48% 
train_model result: {'val_loss': 4330.68701171875, 'hp_metric': 4330.68701171875}
spotpython tuning: 2759.4755859375 [#######---] 65.08% 
train_model result: {'val_loss': 18586.474609375, 'hp_metric': 18586.474609375}
spotpython tuning: 2759.4755859375 [#######---] 67.32% 
train_model result: {'val_loss': 9422.9296875, 'hp_metric': 9422.9296875}
spotpython tuning: 2759.4755859375 [#######---] 68.28% 
train_model result: {'val_loss': 3209.944580078125, 'hp_metric': 3209.944580078125}
spotpython tuning: 2759.4755859375 [#######---] 68.57% 
train_model result: {'val_loss': nan, 'hp_metric': nan}
train_model result: {'val_loss': 12717.115234375, 'hp_metric': 12717.115234375}
spotpython tuning: 2759.4755859375 [#######---] 69.49% 
train_model result: {'val_loss': 3148.132568359375, 'hp_metric': 3148.132568359375}
spotpython tuning: 2759.4755859375 [#######---] 69.67% 
train_model result: {'val_loss': 3745.809814453125, 'hp_metric': 3745.809814453125}
spotpython tuning: 2759.4755859375 [#######---] 73.02% 
train_model result: {'val_loss': 23755.208984375, 'hp_metric': 23755.208984375}
spotpython tuning: 2759.4755859375 [#######---] 73.19% 
train_model result: {'val_loss': 2818.158203125, 'hp_metric': 2818.158203125}
spotpython tuning: 2759.4755859375 [#######---] 73.54% 
train_model result: {'val_loss': 4913.912109375, 'hp_metric': 4913.912109375}
spotpython tuning: 2759.4755859375 [#######---] 74.01% 
train_model result: {'val_loss': 3244.613525390625, 'hp_metric': 3244.613525390625}
spotpython tuning: 2759.4755859375 [#######---] 74.43% 
train_model result: {'val_loss': 8187.8427734375, 'hp_metric': 8187.8427734375}
spotpython tuning: 2759.4755859375 [#######---] 74.90% 
train_model result: {'val_loss': 4998.83154296875, 'hp_metric': 4998.83154296875}
spotpython tuning: 2759.4755859375 [########--] 75.90% 
train_model result: {'val_loss': 4909.18701171875, 'hp_metric': 4909.18701171875}
spotpython tuning: 2759.4755859375 [########--] 76.69% 
train_model result: {'val_loss': 3639.555908203125, 'hp_metric': 3639.555908203125}
spotpython tuning: 2759.4755859375 [########--] 77.03% 
train_model result: {'val_loss': 4673.39599609375, 'hp_metric': 4673.39599609375}
spotpython tuning: 2759.4755859375 [########--] 82.54% 
train_model result: {'val_loss': 3074.72216796875, 'hp_metric': 3074.72216796875}
spotpython tuning: 2759.4755859375 [########--] 82.84% 
train_model result: {'val_loss': 5433.59033203125, 'hp_metric': 5433.59033203125}
spotpython tuning: 2759.4755859375 [########--] 83.55% 
train_model result: {'val_loss': 3071.98583984375, 'hp_metric': 3071.98583984375}
spotpython tuning: 2759.4755859375 [########--] 83.76% 
train_model result: {'val_loss': 4693.18505859375, 'hp_metric': 4693.18505859375}
spotpython tuning: 2759.4755859375 [#########-] 85.02% 
train_model result: {'val_loss': 4233.9765625, 'hp_metric': 4233.9765625}
spotpython tuning: 2759.4755859375 [#########-] 85.44% 
train_model result: {'val_loss': 3299.0625, 'hp_metric': 3299.0625}
spotpython tuning: 2759.4755859375 [#########-] 85.63% 
train_model result: {'val_loss': 3900.315673828125, 'hp_metric': 3900.315673828125}
spotpython tuning: 2759.4755859375 [#########-] 88.27% 
train_model result: {'val_loss': 8650.4248046875, 'hp_metric': 8650.4248046875}
spotpython tuning: 2759.4755859375 [#########-] 90.19% 
train_model result: {'val_loss': 24060.443359375, 'hp_metric': 24060.443359375}
spotpython tuning: 2759.4755859375 [#########-] 90.41% 
train_model result: {'val_loss': 22062.734375, 'hp_metric': 22062.734375}
spotpython tuning: 2759.4755859375 [##########] 100.00% Done...

Experiment saved to spot_602_5_experiment.pickle

30.2 Results from the Hyperparameter Tuning Experiment

  • After the hyperparameter tuning is finished, the following information is available:
    • the spot_tuner object and the associated
    • fun_control dictionary
spot_tuner.plot_progress()

30.2.1 Getting the Best Model, i.e, the Tuned Architecture

  • The method get_tuned_architecture [DOC] returns the best model architecture found during the hyperparameter tuning.
  • It returns the transformed values, i.e., batch_size = 2^x if the hyperparameter batch_size was transformed with the transform_power_2_int function.
from spotpython.hyperparameters.values import get_tuned_architecture
import pprint
config = get_tuned_architecture(spot_tuner, fun_control)
pprint.pprint(config)
{'act_fn': ReLU(),
 'batch_norm': False,
 'batch_size': 2048,
 'dropout_prob': 0.025,
 'epochs': 4096,
 'initialization': 'kaiming_uniform',
 'l1': 128,
 'lr_mult': 3.2952069054058097,
 'optimizer': 'Adam',
 'patience': 32}
  • Note: get_tuned_architecture has the option force_minX which does not have any effect in this case.
from spotpython.hyperparameters.values import get_tuned_architecture
config = get_tuned_architecture(spot_tuner, fun_control, force_minX=True)
pprint.pprint(config)
{'act_fn': ReLU(),
 'batch_norm': False,
 'batch_size': 2048,
 'dropout_prob': 0.025,
 'epochs': 4096,
 'initialization': 'kaiming_uniform',
 'l1': 128,
 'lr_mult': 3.2952069054058097,
 'optimizer': 'Adam',
 'patience': 32}

30.3 Training the Tuned Architecture on the Test Data

  • Since we are interested in the explainability of the model, we will train the tuned architecture on the test data.
  • spotpythons’s test_model function [DOC] is used to train the model on the test data.
  • Note: Until now, we do not use any information about the NN’s weights and biases. Only the architecture, which is available as the config, is used.
  • spotpython used the TensorBoard logger to save the training process in the ./runs directory. Therefore, we have to enable the TensorBoard logger in the fun_control dictionary. To get a clean start, we remove an existing runs folder.
from spotpython.light.testmodel import test_model
from spotpython.light.loadmodel import load_light_from_checkpoint
import os
# if the directory "./runs" exists, delete it
if os.path.exists("./runs"):
    os.system("rm -r ./runs")
fun_control.update({"tensorboard_log": True})
test_model(config, fun_control)
┏━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━━━┓
┃        Test metric               DataLoader 0        ┃
┡━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━━━━┩
│         hp_metric              3148.95556640625      │
│         val_loss               3148.95556640625      │
└───────────────────────────┴───────────────────────────┘
test_model result: {'val_loss': 3148.95556640625, 'hp_metric': 3148.95556640625}
(3148.95556640625, 3148.95556640625)
model = load_light_from_checkpoint(config, fun_control)
config: {'l1': 128, 'epochs': 4096, 'batch_size': 2048, 'act_fn': ReLU(), 'optimizer': 'Adam', 'dropout_prob': 0.025, 'lr_mult': 3.2952069054058097, 'patience': 32, 'batch_norm': False, 'initialization': 'kaiming_uniform'}
Loading model with 128_4096_2048_ReLU_Adam_0.025_3.2952_32_False_kaiming_uniform_TEST from runs/saved_models/128_4096_2048_ReLU_Adam_0.025_3.2952_32_False_kaiming_uniform_TEST/last.ckpt
Model: NNLinearRegressor(
  (layers): Sequential(
    (0): Linear(in_features=10, out_features=128, bias=True)
    (1): ReLU()
    (2): Dropout(p=0.025, inplace=False)
    (3): Linear(in_features=128, out_features=64, bias=True)
    (4): ReLU()
    (5): Dropout(p=0.025, inplace=False)
    (6): Linear(in_features=64, out_features=64, bias=True)
    (7): ReLU()
    (8): Dropout(p=0.025, inplace=False)
    (9): Linear(in_features=64, out_features=32, bias=True)
    (10): ReLU()
    (11): Dropout(p=0.025, inplace=False)
    (12): Linear(in_features=32, out_features=32, bias=True)
    (13): ReLU()
    (14): Dropout(p=0.025, inplace=False)
    (15): Linear(in_features=32, out_features=32, bias=True)
    (16): ReLU()
    (17): Dropout(p=0.025, inplace=False)
    (18): Linear(in_features=32, out_features=16, bias=True)
    (19): ReLU()
    (20): Dropout(p=0.025, inplace=False)
    (21): Linear(in_features=16, out_features=16, bias=True)
    (22): ReLU()
    (23): Dropout(p=0.025, inplace=False)
    (24): Linear(in_features=16, out_features=16, bias=True)
    (25): ReLU()
    (26): Dropout(p=0.025, inplace=False)
    (27): Linear(in_features=16, out_features=16, bias=True)
    (28): ReLU()
    (29): Dropout(p=0.025, inplace=False)
    (30): Linear(in_features=16, out_features=1, bias=True)
  )
)

30.3.0.1 Details of the Training Process on the Test Data

  • The test_model method initializes the model with the tuned architecture as follows:
model = fun_control["core_model"](**config, _L_in=_L_in, _L_out=_L_out, _torchmetric=_torchmetric)
  • Then, the Lightning Trainer is initialized with the fun_control dictionary and the model as follows:

        trainer = L.Trainer(
        default_root_dir=os.path.join(fun_control["CHECKPOINT_PATH"], config_id),
        max_epochs=model.hparams.epochs,
        accelerator=fun_control["accelerator"],
        devices=fun_control["devices"],
        logger=TensorBoardLogger(
            save_dir=fun_control["TENSORBOARD_PATH"],
            version=config_id,
            default_hp_metric=True,
            log_graph=fun_control["log_graph"],
        ),
        callbacks=[
            EarlyStopping(monitor="val_loss", patience=config["patience"], mode="min", strict=False, verbose=False),
            ModelCheckpoint(
                dirpath=os.path.join(fun_control["CHECKPOINT_PATH"], config_id), save_last=True
            ), 
        ],
        enable_progress_bar=enable_progress_bar,
    )
    trainer.fit(model=model, datamodule=dm)    
    test_result = trainer.test(datamodule=dm, ckpt_path="last")
  • As shown in the code above, the last checkpoint ist saved.

  • spotpython’s method load_light_from_checkpoint is used to load the last checkpoint and to get the model’s weights and biases. It requires the fun_control dictionary and the config_id as input to find the correct checkpoint.

  • Now, the model is trained and the weights and biases are available.

from torchviz import make_dot
import torch
x = torch.randn(1, 10).requires_grad_(True)
x = x.to("mps")
output = model(x)
dot = make_dot(output, params=dict(model.named_parameters()), show_attrs=True, show_saved=True)
dot.render("model_architecture", format="png")
'model_architecture.png'

architecture

30.4 XAI Methods

  • spotpython provides methods to explain the model’s predictions. The following neural network elements can be analyzed:

30.4.1 Weights

  • Weights are the parameters of the neural network that are learned from the data during training. They connect neurons between layers and determine the strength and direction of the signal sent from one neuron to another. The network adjusts the weights during training to minimize the error between the predicted output and the actual output.
  • Interpretation of the weights: A high weight value indicates a strong influence of the input neuron on the output. Positive weights suggest a positive correlation, whereas negative weights suggest an inverse relationship between neurons.

30.4.2 Activations

  • Activations are the outputs produced by neurons after applying an activation function to the weighted sum of inputs. The activation function (e.g., ReLU, sigmoid, tanh) adds non-linearity to the model, allowing it to learn more complex relationships.
  • Interpretation of the activations: The value of activations indicates the intensity of the signal passed to the next layer. Certain activation patterns can highlight which features or parts of the data the network is focusing on.

30.4.3 Gradients

  • Gradients are the partial derivatives of the loss function with respect to different parameters (weights) of the network. During backpropagation, gradients are used to update the weights in the direction that reduces the loss by methods like gradient descent.
  • Interpretation of the gradients: The magnitude of the gradient indicates how much a parameter should change to reduce the error. A large gradient implies a steeper slope and a bigger update, while a small gradient suggests that the parameter is near an optimal point. If gradients are too small (vanishing gradient problem), the network may learn slowly or stop learning. If they are too large (exploding gradient problem), the updates may be unstable.
  • sptpython provides the method get_gradients to get the gradients of the model.
from spotpython.plot.xai import (get_activations, get_gradients, get_weights, visualize_weights, visualize_gradients, visualize_mean_activations, visualize_gradient_distributions, visualize_weights_distributions, visualize_activations_distributions)
batch_size = config["batch_size"]

30.4.4 Getting the Weights

from spotpython.plot.xai import sort_layers
weights, _ = get_weights(model)
# sort_layers(weights)
visualize_weights(model, absolute=True, cmap="GreenYellowRed", figsize=(6, 6))
1280 values in Layer Layer 0. Geometry: (128, 10)

8192 values in Layer Layer 3. Geometry: (64, 128)

4096 values in Layer Layer 6. Geometry: (64, 64)

2048 values in Layer Layer 9. Geometry: (32, 64)

1024 values in Layer Layer 12. Geometry: (32, 32)

1024 values in Layer Layer 15. Geometry: (32, 32)

512 values in Layer Layer 18. Geometry: (16, 32)

256 values in Layer Layer 21. Geometry: (16, 16)

256 values in Layer Layer 24. Geometry: (16, 16)

256 values in Layer Layer 27. Geometry: (16, 16)

16 values in Layer Layer 30. Geometry: (1, 16)

visualize_weights_distributions(model, color=f"C{0}", columns=4)
n:11

30.4.5 Getting the Activations

from spotpython.plot.xai import get_activations
activations, mean_activations, layer_sizes = get_activations(net=model, fun_control=fun_control, batch_size=batch_size, device="mps")
stage: fit
full_sizefull_train_size: 0.6
full_sizeval_size: 0.24
full_sizetrain_size: 0.36
full_sizetest_size: 0.4
train_size: 0.36, val_size: 0.24 used for train & val data.
LightDataModule.train_dataloader(). data_train size: 160
visualize_mean_activations(mean_activations, layer_sizes=layer_sizes, absolute=True, cmap="GreenYellowRed", figsize=(6, 6))
128 values in Layer 0. Geometry: (1, 128)

64 values in Layer 3. Geometry: (1, 64)

64 values in Layer 6. Geometry: (1, 64)

32 values in Layer 9. Geometry: (1, 32)

32 values in Layer 12. Geometry: (1, 32)

32 values in Layer 15. Geometry: (1, 32)

16 values in Layer 18. Geometry: (1, 16)

16 values in Layer 21. Geometry: (1, 16)

16 values in Layer 24. Geometry: (1, 16)

16 values in Layer 27. Geometry: (1, 16)

visualize_activations_distributions(activations=activations,
                                    net=model, color="C0", columns=4)

30.4.6 Getting the Gradients

gradients, _ = get_gradients(net=model, fun_control=fun_control, batch_size=batch_size, device = "mps")
stage: fit
full_sizefull_train_size: 0.6
full_sizeval_size: 0.24
full_sizetrain_size: 0.36
full_sizetest_size: 0.4
train_size: 0.36, val_size: 0.24 used for train & val data.
LightDataModule.train_dataloader(). data_train size: 160
visualize_gradients(model, fun_control, batch_size, absolute=True, cmap="GreenYellowRed", figsize=(6, 6), device="mps")
stage: fit
full_sizefull_train_size: 0.6
full_sizeval_size: 0.24
full_sizetrain_size: 0.36
full_sizetest_size: 0.4
train_size: 0.36, val_size: 0.24 used for train & val data.
LightDataModule.train_dataloader(). data_train size: 160
1280 values in Layer layers.0.weight. Geometry: (128, 10)

8192 values in Layer layers.3.weight. Geometry: (64, 128)

4096 values in Layer layers.6.weight. Geometry: (64, 64)

2048 values in Layer layers.9.weight. Geometry: (32, 64)

1024 values in Layer layers.12.weight. Geometry: (32, 32)

1024 values in Layer layers.15.weight. Geometry: (32, 32)

512 values in Layer layers.18.weight. Geometry: (16, 32)

256 values in Layer layers.21.weight. Geometry: (16, 16)

256 values in Layer layers.24.weight. Geometry: (16, 16)

256 values in Layer layers.27.weight. Geometry: (16, 16)

16 values in Layer layers.30.weight. Geometry: (1, 16)

visualize_gradient_distributions(model, fun_control, batch_size=batch_size, color=f"C{0}", device="mps", columns=3)
stage: fit
full_sizefull_train_size: 0.6
full_sizeval_size: 0.24
full_sizetrain_size: 0.36
full_sizetest_size: 0.4
train_size: 0.36, val_size: 0.24 used for train & val data.
LightDataModule.train_dataloader(). data_train size: 160
n:11

30.5 Feature Attributions

30.5.1 Integrated Gradients

from spotpython.plot.xai import get_attributions, plot_attributions
df_att = get_attributions(spot_tuner, fun_control, attr_method="IntegratedGradients", n_rel=10)
plot_attributions(df_att, attr_method="IntegratedGradients")
train_model result: {'val_loss': 2917.80126953125, 'hp_metric': 2917.80126953125}
config: {'l1': 128, 'epochs': 4096, 'batch_size': 2048, 'act_fn': ReLU(), 'optimizer': 'Adam', 'dropout_prob': 0.025, 'lr_mult': 3.2952069054058097, 'patience': 32, 'batch_norm': False, 'initialization': 'kaiming_uniform'}
Loading model with 128_4096_2048_ReLU_Adam_0.025_3.2952_32_False_kaiming_uniform_TRAIN from runs/saved_models/128_4096_2048_ReLU_Adam_0.025_3.2952_32_False_kaiming_uniform_TRAIN/last.ckpt
Model: NNLinearRegressor(
  (layers): Sequential(
    (0): Linear(in_features=10, out_features=128, bias=True)
    (1): ReLU()
    (2): Dropout(p=0.025, inplace=False)
    (3): Linear(in_features=128, out_features=64, bias=True)
    (4): ReLU()
    (5): Dropout(p=0.025, inplace=False)
    (6): Linear(in_features=64, out_features=64, bias=True)
    (7): ReLU()
    (8): Dropout(p=0.025, inplace=False)
    (9): Linear(in_features=64, out_features=32, bias=True)
    (10): ReLU()
    (11): Dropout(p=0.025, inplace=False)
    (12): Linear(in_features=32, out_features=32, bias=True)
    (13): ReLU()
    (14): Dropout(p=0.025, inplace=False)
    (15): Linear(in_features=32, out_features=32, bias=True)
    (16): ReLU()
    (17): Dropout(p=0.025, inplace=False)
    (18): Linear(in_features=32, out_features=16, bias=True)
    (19): ReLU()
    (20): Dropout(p=0.025, inplace=False)
    (21): Linear(in_features=16, out_features=16, bias=True)
    (22): ReLU()
    (23): Dropout(p=0.025, inplace=False)
    (24): Linear(in_features=16, out_features=16, bias=True)
    (25): ReLU()
    (26): Dropout(p=0.025, inplace=False)
    (27): Linear(in_features=16, out_features=16, bias=True)
    (28): ReLU()
    (29): Dropout(p=0.025, inplace=False)
    (30): Linear(in_features=16, out_features=1, bias=True)
  )
)
stage: test
full_sizefull_train_size: 0.6
full_sizeval_size: 0.24
full_sizetrain_size: 0.36
full_sizetest_size: 0.4
test_size: 0.4 used for test dataset.
LightDataModule.test_dataloader(). Test set size: 177

30.5.2 Deep Lift

df_lift = get_attributions(spot_tuner, fun_control, attr_method="DeepLift",n_rel=10)
print(df_lift)
plot_attributions(df_lift,  attr_method="DeepLift")
train_model result: {'val_loss': 2792.701171875, 'hp_metric': 2792.701171875}
config: {'l1': 128, 'epochs': 4096, 'batch_size': 2048, 'act_fn': ReLU(), 'optimizer': 'Adam', 'dropout_prob': 0.025, 'lr_mult': 3.2952069054058097, 'patience': 32, 'batch_norm': False, 'initialization': 'kaiming_uniform'}
Loading model with 128_4096_2048_ReLU_Adam_0.025_3.2952_32_False_kaiming_uniform_TRAIN from runs/saved_models/128_4096_2048_ReLU_Adam_0.025_3.2952_32_False_kaiming_uniform_TRAIN/last.ckpt
Model: NNLinearRegressor(
  (layers): Sequential(
    (0): Linear(in_features=10, out_features=128, bias=True)
    (1): ReLU()
    (2): Dropout(p=0.025, inplace=False)
    (3): Linear(in_features=128, out_features=64, bias=True)
    (4): ReLU()
    (5): Dropout(p=0.025, inplace=False)
    (6): Linear(in_features=64, out_features=64, bias=True)
    (7): ReLU()
    (8): Dropout(p=0.025, inplace=False)
    (9): Linear(in_features=64, out_features=32, bias=True)
    (10): ReLU()
    (11): Dropout(p=0.025, inplace=False)
    (12): Linear(in_features=32, out_features=32, bias=True)
    (13): ReLU()
    (14): Dropout(p=0.025, inplace=False)
    (15): Linear(in_features=32, out_features=32, bias=True)
    (16): ReLU()
    (17): Dropout(p=0.025, inplace=False)
    (18): Linear(in_features=32, out_features=16, bias=True)
    (19): ReLU()
    (20): Dropout(p=0.025, inplace=False)
    (21): Linear(in_features=16, out_features=16, bias=True)
    (22): ReLU()
    (23): Dropout(p=0.025, inplace=False)
    (24): Linear(in_features=16, out_features=16, bias=True)
    (25): ReLU()
    (26): Dropout(p=0.025, inplace=False)
    (27): Linear(in_features=16, out_features=16, bias=True)
    (28): ReLU()
    (29): Dropout(p=0.025, inplace=False)
    (30): Linear(in_features=16, out_features=1, bias=True)
  )
)
stage: test
full_sizefull_train_size: 0.6
full_sizeval_size: 0.24
full_sizetrain_size: 0.36
full_sizetest_size: 0.4
test_size: 0.4 used for test dataset.
LightDataModule.test_dataloader(). Test set size: 177
   Feature Index Feature  DeepLiftAttribution
0              3      bp           167.202744
1              2     bmi           166.569290
2              6  s3_hdl           153.134659
3              8  s5_ltg           143.385300
4              1     sex           139.759445
5              0     age           133.720673
6              4   s1_tc            99.584084
7              9  s6_glu            96.482117
8              7  s4_tch            55.382660
9              5  s2_ldl            42.765591

30.5.3 Feature Ablation

df_fl = get_attributions(spot_tuner, fun_control, attr_method="FeatureAblation",n_rel=10)
train_model result: {'val_loss': 2833.344970703125, 'hp_metric': 2833.344970703125}
config: {'l1': 128, 'epochs': 4096, 'batch_size': 2048, 'act_fn': ReLU(), 'optimizer': 'Adam', 'dropout_prob': 0.025, 'lr_mult': 3.2952069054058097, 'patience': 32, 'batch_norm': False, 'initialization': 'kaiming_uniform'}
Loading model with 128_4096_2048_ReLU_Adam_0.025_3.2952_32_False_kaiming_uniform_TRAIN from runs/saved_models/128_4096_2048_ReLU_Adam_0.025_3.2952_32_False_kaiming_uniform_TRAIN/last.ckpt
Model: NNLinearRegressor(
  (layers): Sequential(
    (0): Linear(in_features=10, out_features=128, bias=True)
    (1): ReLU()
    (2): Dropout(p=0.025, inplace=False)
    (3): Linear(in_features=128, out_features=64, bias=True)
    (4): ReLU()
    (5): Dropout(p=0.025, inplace=False)
    (6): Linear(in_features=64, out_features=64, bias=True)
    (7): ReLU()
    (8): Dropout(p=0.025, inplace=False)
    (9): Linear(in_features=64, out_features=32, bias=True)
    (10): ReLU()
    (11): Dropout(p=0.025, inplace=False)
    (12): Linear(in_features=32, out_features=32, bias=True)
    (13): ReLU()
    (14): Dropout(p=0.025, inplace=False)
    (15): Linear(in_features=32, out_features=32, bias=True)
    (16): ReLU()
    (17): Dropout(p=0.025, inplace=False)
    (18): Linear(in_features=32, out_features=16, bias=True)
    (19): ReLU()
    (20): Dropout(p=0.025, inplace=False)
    (21): Linear(in_features=16, out_features=16, bias=True)
    (22): ReLU()
    (23): Dropout(p=0.025, inplace=False)
    (24): Linear(in_features=16, out_features=16, bias=True)
    (25): ReLU()
    (26): Dropout(p=0.025, inplace=False)
    (27): Linear(in_features=16, out_features=16, bias=True)
    (28): ReLU()
    (29): Dropout(p=0.025, inplace=False)
    (30): Linear(in_features=16, out_features=1, bias=True)
  )
)
stage: test
full_sizefull_train_size: 0.6
full_sizeval_size: 0.24
full_sizetrain_size: 0.36
full_sizetest_size: 0.4
test_size: 0.4 used for test dataset.
LightDataModule.test_dataloader(). Test set size: 177
print(df_fl)
plot_attributions(df_fl, attr_method="FeatureAblation")
   Feature Index Feature  FeatureAblationAttribution
0              3      bp                  101.518921
1              6  s3_hdl                   89.019524
2              2     bmi                   85.355545
3              8  s5_ltg                   83.932846
4              0     age                   73.484924
5              1     sex                   67.639557
6              9  s6_glu                   45.687206
7              4   s1_tc                   40.080967
8              7  s4_tch                   14.273449
9              5  s2_ldl                   -4.359542

30.6 Conductance

from spotpython.plot.xai import plot_conductance_last_layer, get_weights_conductance_last_layer
weights_last, layer_conductance_last = get_weights_conductance_last_layer(spot_tuner, fun_control)
plot_conductance_last_layer(weights_last, layer_conductance_last, figsize=(6, 6))
train_model result: {'val_loss': 2804.455078125, 'hp_metric': 2804.455078125}
config: {'l1': 128, 'epochs': 4096, 'batch_size': 2048, 'act_fn': ReLU(), 'optimizer': 'Adam', 'dropout_prob': 0.025, 'lr_mult': 3.2952069054058097, 'patience': 32, 'batch_norm': False, 'initialization': 'kaiming_uniform'}
Loading model with 128_4096_2048_ReLU_Adam_0.025_3.2952_32_False_kaiming_uniform_TRAIN from runs/saved_models/128_4096_2048_ReLU_Adam_0.025_3.2952_32_False_kaiming_uniform_TRAIN/last.ckpt
Model: NNLinearRegressor(
  (layers): Sequential(
    (0): Linear(in_features=10, out_features=128, bias=True)
    (1): ReLU()
    (2): Dropout(p=0.025, inplace=False)
    (3): Linear(in_features=128, out_features=64, bias=True)
    (4): ReLU()
    (5): Dropout(p=0.025, inplace=False)
    (6): Linear(in_features=64, out_features=64, bias=True)
    (7): ReLU()
    (8): Dropout(p=0.025, inplace=False)
    (9): Linear(in_features=64, out_features=32, bias=True)
    (10): ReLU()
    (11): Dropout(p=0.025, inplace=False)
    (12): Linear(in_features=32, out_features=32, bias=True)
    (13): ReLU()
    (14): Dropout(p=0.025, inplace=False)
    (15): Linear(in_features=32, out_features=32, bias=True)
    (16): ReLU()
    (17): Dropout(p=0.025, inplace=False)
    (18): Linear(in_features=32, out_features=16, bias=True)
    (19): ReLU()
    (20): Dropout(p=0.025, inplace=False)
    (21): Linear(in_features=16, out_features=16, bias=True)
    (22): ReLU()
    (23): Dropout(p=0.025, inplace=False)
    (24): Linear(in_features=16, out_features=16, bias=True)
    (25): ReLU()
    (26): Dropout(p=0.025, inplace=False)
    (27): Linear(in_features=16, out_features=16, bias=True)
    (28): ReLU()
    (29): Dropout(p=0.025, inplace=False)
    (30): Linear(in_features=16, out_features=1, bias=True)
  )
)
train_model result: {'val_loss': 2968.392578125, 'hp_metric': 2968.392578125}
config: {'l1': 128, 'epochs': 4096, 'batch_size': 2048, 'act_fn': ReLU(), 'optimizer': 'Adam', 'dropout_prob': 0.025, 'lr_mult': 3.2952069054058097, 'patience': 32, 'batch_norm': False, 'initialization': 'kaiming_uniform'}
Loading model with 128_4096_2048_ReLU_Adam_0.025_3.2952_32_False_kaiming_uniform_TRAIN from runs/saved_models/128_4096_2048_ReLU_Adam_0.025_3.2952_32_False_kaiming_uniform_TRAIN/last.ckpt
Model: NNLinearRegressor(
  (layers): Sequential(
    (0): Linear(in_features=10, out_features=128, bias=True)
    (1): ReLU()
    (2): Dropout(p=0.025, inplace=False)
    (3): Linear(in_features=128, out_features=64, bias=True)
    (4): ReLU()
    (5): Dropout(p=0.025, inplace=False)
    (6): Linear(in_features=64, out_features=64, bias=True)
    (7): ReLU()
    (8): Dropout(p=0.025, inplace=False)
    (9): Linear(in_features=64, out_features=32, bias=True)
    (10): ReLU()
    (11): Dropout(p=0.025, inplace=False)
    (12): Linear(in_features=32, out_features=32, bias=True)
    (13): ReLU()
    (14): Dropout(p=0.025, inplace=False)
    (15): Linear(in_features=32, out_features=32, bias=True)
    (16): ReLU()
    (17): Dropout(p=0.025, inplace=False)
    (18): Linear(in_features=32, out_features=16, bias=True)
    (19): ReLU()
    (20): Dropout(p=0.025, inplace=False)
    (21): Linear(in_features=16, out_features=16, bias=True)
    (22): ReLU()
    (23): Dropout(p=0.025, inplace=False)
    (24): Linear(in_features=16, out_features=16, bias=True)
    (25): ReLU()
    (26): Dropout(p=0.025, inplace=False)
    (27): Linear(in_features=16, out_features=16, bias=True)
    (28): ReLU()
    (29): Dropout(p=0.025, inplace=False)
    (30): Linear(in_features=16, out_features=1, bias=True)
  )
)
Conductance analysis for layer:  Linear(in_features=16, out_features=1, bias=True)