51  Explainable AI with SpotPython and Pytorch

Note, the divergence_threshold is set to 3,000, which is based on some pre-experiments with the user data set.

from spotpython.data.diabetes import Diabetes
from spotpython.hyperdict.light_hyper_dict import LightHyperDict
from spotpython.fun.hyperlight import HyperLight
from spotpython.utils.init import (fun_control_init, surrogate_control_init, design_control_init)
from spotpython.spot import Spot
from spotpython.utils.file import get_experiment_filename
from spotpython.hyperparameters.values import set_hyperparameter
from math import inf

PREFIX="602_12_1"

data_set = Diabetes()

fun_control = fun_control_init(
    save_experiment=True,
    PREFIX=PREFIX,
    fun_evals=inf,
    max_time=1,
    data_set = data_set,
    core_model_name="light.regression.NNLinearRegressor",
    hyperdict=LightHyperDict,
    divergence_threshold=3_000,
    _L_in=10,
    _L_out=1)

fun = HyperLight().fun


set_hyperparameter(fun_control, "optimizer", [ "Adadelta", "Adam", "Adamax"])
set_hyperparameter(fun_control, "l1", [3,7])
set_hyperparameter(fun_control, "epochs", [10,12])
set_hyperparameter(fun_control, "batch_size", [4,11])
set_hyperparameter(fun_control, "dropout_prob", [0.0, 0.025])
set_hyperparameter(fun_control, "patience", [2,9])

design_control = design_control_init(init_size=7)

S = Spot(fun=fun,fun_control=fun_control, design_control=design_control)
module_name: light
submodule_name: regression
model_name: NNLinearRegressor
Experiment saved to 602_12_1_exp.pkl

51.1 Running the Hyperparameter Tuning or Loading the Existing Model

S.run()
┏━━━┳━━━━━━━━┳━━━━━━━━━━━━┳━━━━━━━━┳━━━━━━━┳━━━━━━━━┳━━━━━━━━━━━┳━━━━━━━━━━━┓
┃    Name    Type        Params  Mode    FLOPs   In sizes  Out sizes ┃
┡━━━╇━━━━━━━━╇━━━━━━━━━━━━╇━━━━━━━━╇━━━━━━━╇━━━━━━━━╇━━━━━━━━━━━╇━━━━━━━━━━━┩
│ 0 │ layers │ Sequential │  202 K │ train │ 51.6 M │ [128, 10]   [128, 1] │
└───┴────────┴────────────┴────────┴───────┴────────┴───────────┴───────────┘
Trainable params: 202 K                                                                                            
Non-trainable params: 0                                                                                            
Total params: 202 K                                                                                                
Total estimated model params size (MB): 0                                                                          
Modules in train mode: 17                                                                                          
Modules in eval mode: 0                                                                                            
Total FLOPs: 51.6 M                                                                                                
train_model result: {'val_loss': 24008.5703125, 'hp_metric': 24008.5703125}
┏━━━┳━━━━━━━━┳━━━━━━━━━━━━┳━━━━━━━━┳━━━━━━━┳━━━━━━━┳━━━━━━━━━━━┳━━━━━━━━━━━┓
┃    Name    Type        Params  Mode   FLOPs   In sizes  Out sizes ┃
┡━━━╇━━━━━━━━╇━━━━━━━━━━━━╇━━━━━━━━╇━━━━━━━╇━━━━━━━╇━━━━━━━━━━━╇━━━━━━━━━━━┩
│ 0 │ layers │ Sequential │  3.2 M │ train │ 816 M │ [128, 10]   [128, 1] │
└───┴────────┴────────────┴────────┴───────┴───────┴───────────┴───────────┘
Trainable params: 3.2 M                                                                                            
Non-trainable params: 0                                                                                            
Total params: 3.2 M                                                                                                
Total estimated model params size (MB): 12                                                                         
Modules in train mode: 24                                                                                          
Modules in eval mode: 0                                                                                            
Total FLOPs: 816 M                                                                                                 
train_model result: {'val_loss': 25638.1328125, 'hp_metric': 25638.1328125}
┏━━━┳━━━━━━━━┳━━━━━━━━━━━━┳━━━━━━━━┳━━━━━━━┳━━━━━━━━┳━━━━━━━━━━┳━━━━━━━━━━━┓
┃    Name    Type        Params  Mode    FLOPs  In sizes  Out sizes ┃
┡━━━╇━━━━━━━━╇━━━━━━━━━━━━╇━━━━━━━━╇━━━━━━━╇━━━━━━━━╇━━━━━━━━━━╇━━━━━━━━━━━┩
│ 0 │ layers │ Sequential │  202 K │ train │ 12.9 M │ [32, 10]    [32, 1] │
└───┴────────┴────────────┴────────┴───────┴────────┴──────────┴───────────┘
Trainable params: 202 K                                                                                            
Non-trainable params: 0                                                                                            
Total params: 202 K                                                                                                
Total estimated model params size (MB): 0                                                                          
Modules in train mode: 17                                                                                          
Modules in eval mode: 0                                                                                            
Total FLOPs: 12.9 M                                                                                                
train_model result: {'val_loss': 4298.646484375, 'hp_metric': 4298.646484375}
┏━━━┳━━━━━━━━┳━━━━━━━━━━━━┳━━━━━━━━┳━━━━━━━┳━━━━━━━┳━━━━━━━━━━━━┳━━━━━━━━━━━┓
┃    Name    Type        Params  Mode   FLOPs    In sizes  Out sizes ┃
┡━━━╇━━━━━━━━╇━━━━━━━━━━━━╇━━━━━━━━╇━━━━━━━╇━━━━━━━╇━━━━━━━━━━━━╇━━━━━━━━━━━┩
│ 0 │ layers │ Sequential │  802 K │ train │ 1.6 B │ [1024, 10]  [1024, 1] │
└───┴────────┴────────────┴────────┴───────┴───────┴────────────┴───────────┘
Trainable params: 802 K                                                                                            
Non-trainable params: 0                                                                                            
Total params: 802 K                                                                                                
Total estimated model params size (MB): 3                                                                          
Modules in train mode: 17                                                                                          
Modules in eval mode: 0                                                                                            
Total FLOPs: 1.6 B                                                                                                 
train_model result: {'val_loss': 23521.087890625, 'hp_metric': 23521.087890625}
┏━━━┳━━━━━━━━┳━━━━━━━━━━━━┳━━━━━━━━┳━━━━━━━┳━━━━━━━━┳━━━━━━━━━━━━┳━━━━━━━━━━━┓
┃    Name    Type        Params  Mode    FLOPs    In sizes  Out sizes ┃
┡━━━╇━━━━━━━━╇━━━━━━━━━━━━╇━━━━━━━━╇━━━━━━━╇━━━━━━━━╇━━━━━━━━━━━━╇━━━━━━━━━━━┩
│ 0 │ layers │ Sequential │ 12.7 M │ train │ 52.1 B │ [2048, 10]  [2048, 1] │
└───┴────────┴────────────┴────────┴───────┴────────┴────────────┴───────────┘
Trainable params: 12.7 M                                                                                           
Non-trainable params: 0                                                                                            
Total params: 12.7 M                                                                                               
Total estimated model params size (MB): 50                                                                         
Modules in train mode: 17                                                                                          
Modules in eval mode: 0                                                                                            
Total FLOPs: 52.1 B                                                                                                
train_model result: {'val_loss': 234333.578125, 'hp_metric': 234333.578125}
┏━━━┳━━━━━━━━┳━━━━━━━━━━━━┳━━━━━━━━┳━━━━━━━┳━━━━━━━━┳━━━━━━━━━━━┳━━━━━━━━━━━┓
┃    Name    Type        Params  Mode    FLOPs   In sizes  Out sizes ┃
┡━━━╇━━━━━━━━╇━━━━━━━━━━━━╇━━━━━━━━╇━━━━━━━╇━━━━━━━━╇━━━━━━━━━━━╇━━━━━━━━━━━┩
│ 0 │ layers │ Sequential │ 53.2 K │ train │ 52.5 M │ [512, 10]   [512, 1] │
└───┴────────┴────────────┴────────┴───────┴────────┴───────────┴───────────┘
Trainable params: 53.2 K                                                                                           
Non-trainable params: 0                                                                                            
Total params: 53.2 K                                                                                               
Total estimated model params size (MB): 0                                                                          
Modules in train mode: 24                                                                                          
Modules in eval mode: 0                                                                                            
Total FLOPs: 52.5 M                                                                                                
train_model result: {'val_loss': 24054.228515625, 'hp_metric': 24054.228515625}
┏━━━┳━━━━━━━━┳━━━━━━━━━━━━┳━━━━━━━━┳━━━━━━━┳━━━━━━━━┳━━━━━━━━━━┳━━━━━━━━━━━┓
┃    Name    Type        Params  Mode    FLOPs  In sizes  Out sizes ┃
┡━━━╇━━━━━━━━╇━━━━━━━━━━━━╇━━━━━━━━╇━━━━━━━╇━━━━━━━━╇━━━━━━━━━━╇━━━━━━━━━━━┩
│ 0 │ layers │ Sequential │  807 K │ train │ 51.2 M │ [32, 10]    [32, 1] │
└───┴────────┴────────────┴────────┴───────┴────────┴──────────┴───────────┘
Trainable params: 807 K                                                                                            
Non-trainable params: 0                                                                                            
Total params: 807 K                                                                                                
Total estimated model params size (MB): 3                                                                          
Modules in train mode: 24                                                                                          
Modules in eval mode: 0                                                                                            
Total FLOPs: 51.2 M                                                                                                
train_model result: {'val_loss': 24212.8515625, 'hp_metric': 24212.8515625}
Anisotropic model: n_theta set to 10
┏━━━┳━━━━━━━━┳━━━━━━━━━━━━┳━━━━━━━━┳━━━━━━━┳━━━━━━━┳━━━━━━━━━━┳━━━━━━━━━━━┓
┃    Name    Type        Params  Mode   FLOPs  In sizes  Out sizes ┃
┡━━━╇━━━━━━━━╇━━━━━━━━━━━━╇━━━━━━━━╇━━━━━━━╇━━━━━━━╇━━━━━━━━━━╇━━━━━━━━━━━┩
│ 0 │ layers │ Sequential │  205 K │ train │ 6.5 M │ [16, 10]    [16, 1] │
└───┴────────┴────────────┴────────┴───────┴───────┴──────────┴───────────┘
Trainable params: 205 K                                                                                            
Non-trainable params: 0                                                                                            
Total params: 205 K                                                                                                
Total estimated model params size (MB): 0                                                                          
Modules in train mode: 24                                                                                          
Modules in eval mode: 0                                                                                            
Total FLOPs: 6.5 M                                                                                                 
train_model result: {'val_loss': 22380.19140625, 'hp_metric': 22380.19140625}
Anisotropic model: n_theta set to 10
spotpython tuning: 4298.646484375 [----------] 2.07% 
┏━━━┳━━━━━━━━┳━━━━━━━━━━━━┳━━━━━━━━┳━━━━━━━┳━━━━━━━┳━━━━━━━━━━┳━━━━━━━━━━━┓
┃    Name    Type        Params  Mode   FLOPs  In sizes  Out sizes ┃
┡━━━╇━━━━━━━━╇━━━━━━━━━━━━╇━━━━━━━━╇━━━━━━━╇━━━━━━━╇━━━━━━━━━━╇━━━━━━━━━━━┩
│ 0 │ layers │ Sequential │  202 K │ train │ 6.5 M │ [16, 10]    [16, 1] │
└───┴────────┴────────────┴────────┴───────┴───────┴──────────┴───────────┘
Trainable params: 202 K                                                                                            
Non-trainable params: 0                                                                                            
Total params: 202 K                                                                                                
Total estimated model params size (MB): 0                                                                          
Modules in train mode: 17                                                                                          
Modules in eval mode: 0                                                                                            
Total FLOPs: 6.5 M                                                                                                 
train_model result: {'val_loss': 24050.72265625, 'hp_metric': 24050.72265625}
Anisotropic model: n_theta set to 10
spotpython tuning: 4298.646484375 [----------] 3.72% 
┏━━━┳━━━━━━━━┳━━━━━━━━━━━━┳━━━━━━━━┳━━━━━━━┳━━━━━━━┳━━━━━━━━━━━━┳━━━━━━━━━━━┓
┃    Name    Type        Params  Mode   FLOPs    In sizes  Out sizes ┃
┡━━━╇━━━━━━━━╇━━━━━━━━━━━━╇━━━━━━━━╇━━━━━━━╇━━━━━━━╇━━━━━━━━━━━━╇━━━━━━━━━━━┩
│ 0 │ layers │ Sequential │  202 K │ train │ 826 M │ [2048, 10]  [2048, 1] │
└───┴────────┴────────────┴────────┴───────┴───────┴────────────┴───────────┘
Trainable params: 202 K                                                                                            
Non-trainable params: 0                                                                                            
Total params: 202 K                                                                                                
Total estimated model params size (MB): 0                                                                          
Modules in train mode: 17                                                                                          
Modules in eval mode: 0                                                                                            
Total FLOPs: 826 M                                                                                                 
train_model result: {'val_loss': 23782.5078125, 'hp_metric': 23782.5078125}
Anisotropic model: n_theta set to 10
spotpython tuning: 4298.646484375 [----------] 4.73% 
┏━━━┳━━━━━━━━┳━━━━━━━━━━━━┳━━━━━━━━┳━━━━━━━┳━━━━━━━┳━━━━━━━━━━┳━━━━━━━━━━━┓
┃    Name    Type        Params  Mode   FLOPs  In sizes  Out sizes ┃
┡━━━╇━━━━━━━━╇━━━━━━━━━━━━╇━━━━━━━━╇━━━━━━━╇━━━━━━━╇━━━━━━━━━━╇━━━━━━━━━━━┩
│ 0 │ layers │ Sequential │  202 K │ train │ 6.5 M │ [16, 10]    [16, 1] │
└───┴────────┴────────────┴────────┴───────┴───────┴──────────┴───────────┘
Trainable params: 202 K                                                                                            
Non-trainable params: 0                                                                                            
Total params: 202 K                                                                                                
Total estimated model params size (MB): 0                                                                          
Modules in train mode: 17                                                                                          
Modules in eval mode: 0                                                                                            
Total FLOPs: 6.5 M                                                                                                 
train_model result: {'val_loss': nan, 'hp_metric': nan}
┏━━━┳━━━━━━━━┳━━━━━━━━━━━━┳━━━━━━━━┳━━━━━━━┳━━━━━━━┳━━━━━━━━━━┳━━━━━━━━━━━┓
┃    Name    Type        Params  Mode   FLOPs  In sizes  Out sizes ┃
┡━━━╇━━━━━━━━╇━━━━━━━━━━━━╇━━━━━━━━╇━━━━━━━╇━━━━━━━╇━━━━━━━━━━╇━━━━━━━━━━━┩
│ 0 │ layers │ Sequential │ 53.2 K │ train │ 6.6 M │ [64, 10]    [64, 1] │
└───┴────────┴────────────┴────────┴───────┴───────┴──────────┴───────────┘
Trainable params: 53.2 K                                                                                           
Non-trainable params: 0                                                                                            
Total params: 53.2 K                                                                                               
Total estimated model params size (MB): 0                                                                          
Modules in train mode: 24                                                                                          
Modules in eval mode: 0                                                                                            
Total FLOPs: 6.6 M                                                                                                 
train_model result: {'val_loss': 23811.4921875, 'hp_metric': 23811.4921875}
Anisotropic model: n_theta set to 10
spotpython tuning: 4298.646484375 [#---------] 6.93% 
┏━━━┳━━━━━━━━┳━━━━━━━━━━━━┳━━━━━━━━┳━━━━━━━┳━━━━━━━┳━━━━━━━━━━┳━━━━━━━━━━━┓
┃    Name    Type        Params  Mode   FLOPs  In sizes  Out sizes ┃
┡━━━╇━━━━━━━━╇━━━━━━━━━━━━╇━━━━━━━━╇━━━━━━━╇━━━━━━━╇━━━━━━━━━━╇━━━━━━━━━━━┩
│ 0 │ layers │ Sequential │  202 K │ train │ 6.5 M │ [16, 10]    [16, 1] │
└───┴────────┴────────────┴────────┴───────┴───────┴──────────┴───────────┘
Trainable params: 202 K                                                                                            
Non-trainable params: 0                                                                                            
Total params: 202 K                                                                                                
Total estimated model params size (MB): 0                                                                          
Modules in train mode: 17                                                                                          
Modules in eval mode: 0                                                                                            
Total FLOPs: 6.5 M                                                                                                 
train_model result: {'val_loss': 4016.7470703125, 'hp_metric': 4016.7470703125}
Anisotropic model: n_theta set to 10
spotpython tuning: 4016.7470703125 [#---------] 7.81% 
┏━━━┳━━━━━━━━┳━━━━━━━━━━━━┳━━━━━━━━┳━━━━━━━┳━━━━━━━┳━━━━━━━━━━┳━━━━━━━━━━━┓
┃    Name    Type        Params  Mode   FLOPs  In sizes  Out sizes ┃
┡━━━╇━━━━━━━━╇━━━━━━━━━━━━╇━━━━━━━━╇━━━━━━━╇━━━━━━━╇━━━━━━━━━━╇━━━━━━━━━━━┩
│ 0 │ layers │ Sequential │  205 K │ train │ 6.5 M │ [16, 10]    [16, 1] │
└───┴────────┴────────────┴────────┴───────┴───────┴──────────┴───────────┘
Trainable params: 205 K                                                                                            
Non-trainable params: 0                                                                                            
Total params: 205 K                                                                                                
Total estimated model params size (MB): 0                                                                          
Modules in train mode: 24                                                                                          
Modules in eval mode: 0                                                                                            
Total FLOPs: 6.5 M                                                                                                 
train_model result: {'val_loss': 23819.04296875, 'hp_metric': 23819.04296875}
Anisotropic model: n_theta set to 10
spotpython tuning: 4016.7470703125 [#---------] 9.17% 
┏━━━┳━━━━━━━━┳━━━━━━━━━━━━┳━━━━━━━━┳━━━━━━━┳━━━━━━━┳━━━━━━━━━━┳━━━━━━━━━━━┓
┃    Name    Type        Params  Mode   FLOPs  In sizes  Out sizes ┃
┡━━━╇━━━━━━━━╇━━━━━━━━━━━━╇━━━━━━━━╇━━━━━━━╇━━━━━━━╇━━━━━━━━━━╇━━━━━━━━━━━┩
│ 0 │ layers │ Sequential │ 53.2 K │ train │ 1.6 M │ [16, 10]    [16, 1] │
└───┴────────┴────────────┴────────┴───────┴───────┴──────────┴───────────┘
Trainable params: 53.2 K                                                                                           
Non-trainable params: 0                                                                                            
Total params: 53.2 K                                                                                               
Total estimated model params size (MB): 0                                                                          
Modules in train mode: 24                                                                                          
Modules in eval mode: 0                                                                                            
Total FLOPs: 1.6 M                                                                                                 
train_model result: {'val_loss': 24074.947265625, 'hp_metric': 24074.947265625}
Anisotropic model: n_theta set to 10
spotpython tuning: 4016.7470703125 [#---------] 10.48% 
┏━━━┳━━━━━━━━┳━━━━━━━━━━━━┳━━━━━━━━┳━━━━━━━┳━━━━━━━━┳━━━━━━━━━━━┳━━━━━━━━━━━┓
┃    Name    Type        Params  Mode    FLOPs   In sizes  Out sizes ┃
┡━━━╇━━━━━━━━╇━━━━━━━━━━━━╇━━━━━━━━╇━━━━━━━╇━━━━━━━━╇━━━━━━━━━━━╇━━━━━━━━━━━┩
│ 0 │ layers │ Sequential │  205 K │ train │ 51.6 M │ [128, 10]   [128, 1] │
└───┴────────┴────────────┴────────┴───────┴────────┴───────────┴───────────┘
Trainable params: 205 K                                                                                            
Non-trainable params: 0                                                                                            
Total params: 205 K                                                                                                
Total estimated model params size (MB): 0                                                                          
Modules in train mode: 24                                                                                          
Modules in eval mode: 0                                                                                            
Total FLOPs: 51.6 M                                                                                                
train_model result: {'val_loss': 23498.798828125, 'hp_metric': 23498.798828125}
Anisotropic model: n_theta set to 10
spotpython tuning: 4016.7470703125 [#---------] 11.33% 
┏━━━┳━━━━━━━━┳━━━━━━━━━━━━┳━━━━━━━━┳━━━━━━━┳━━━━━━━┳━━━━━━━━━━┳━━━━━━━━━━━┓
┃    Name    Type        Params  Mode   FLOPs  In sizes  Out sizes ┃
┡━━━╇━━━━━━━━╇━━━━━━━━━━━━╇━━━━━━━━╇━━━━━━━╇━━━━━━━╇━━━━━━━━━━╇━━━━━━━━━━━┩
│ 0 │ layers │ Sequential │  205 K │ train │ 6.5 M │ [16, 10]    [16, 1] │
└───┴────────┴────────────┴────────┴───────┴───────┴──────────┴───────────┘
Trainable params: 205 K                                                                                            
Non-trainable params: 0                                                                                            
Total params: 205 K                                                                                                
Total estimated model params size (MB): 0                                                                          
Modules in train mode: 24                                                                                          
Modules in eval mode: 0                                                                                            
Total FLOPs: 6.5 M                                                                                                 
train_model result: {'val_loss': 23848.24609375, 'hp_metric': 23848.24609375}
Anisotropic model: n_theta set to 10
spotpython tuning: 4016.7470703125 [#---------] 12.87% 
┏━━━┳━━━━━━━━┳━━━━━━━━━━━━┳━━━━━━━━┳━━━━━━━┳━━━━━━━┳━━━━━━━━━━━━┳━━━━━━━━━━━┓
┃    Name    Type        Params  Mode   FLOPs    In sizes  Out sizes ┃
┡━━━╇━━━━━━━━╇━━━━━━━━━━━━╇━━━━━━━━╇━━━━━━━╇━━━━━━━╇━━━━━━━━━━━━╇━━━━━━━━━━━┩
│ 0 │ layers │ Sequential │ 53.2 K │ train │ 209 M │ [2048, 10]  [2048, 1] │
└───┴────────┴────────────┴────────┴───────┴───────┴────────────┴───────────┘
Trainable params: 53.2 K                                                                                           
Non-trainable params: 0                                                                                            
Total params: 53.2 K                                                                                               
Total estimated model params size (MB): 0                                                                          
Modules in train mode: 24                                                                                          
Modules in eval mode: 0                                                                                            
Total FLOPs: 209 M                                                                                                 
train_model result: {'val_loss': 24046.033203125, 'hp_metric': 24046.033203125}
Anisotropic model: n_theta set to 10
spotpython tuning: 4016.7470703125 [#---------] 14.07% 
┏━━━┳━━━━━━━━┳━━━━━━━━━━━━┳━━━━━━━━┳━━━━━━━┳━━━━━━━━┳━━━━━━━━━━━┳━━━━━━━━━━━┓
┃    Name    Type        Params  Mode    FLOPs   In sizes  Out sizes ┃
┡━━━╇━━━━━━━━╇━━━━━━━━━━━━╇━━━━━━━━╇━━━━━━━╇━━━━━━━━╇━━━━━━━━━━━╇━━━━━━━━━━━┩
│ 0 │ layers │ Sequential │  205 K │ train │ 51.6 M │ [128, 10]   [128, 1] │
└───┴────────┴────────────┴────────┴───────┴────────┴───────────┴───────────┘
Trainable params: 205 K                                                                                            
Non-trainable params: 0                                                                                            
Total params: 205 K                                                                                                
Total estimated model params size (MB): 0                                                                          
Modules in train mode: 24                                                                                          
Modules in eval mode: 0                                                                                            
Total FLOPs: 51.6 M                                                                                                
train_model result: {'val_loss': 24020.1953125, 'hp_metric': 24020.1953125}
Anisotropic model: n_theta set to 10
spotpython tuning: 4016.7470703125 [##--------] 16.02% 
┏━━━┳━━━━━━━━┳━━━━━━━━━━━━┳━━━━━━━━┳━━━━━━━┳━━━━━━━┳━━━━━━━━━━━┳━━━━━━━━━━━┓
┃    Name    Type        Params  Mode   FLOPs   In sizes  Out sizes ┃
┡━━━╇━━━━━━━━╇━━━━━━━━━━━━╇━━━━━━━━╇━━━━━━━╇━━━━━━━╇━━━━━━━━━━━╇━━━━━━━━━━━┩
│ 0 │ layers │ Sequential │  3.2 M │ train │ 816 M │ [128, 10]   [128, 1] │
└───┴────────┴────────────┴────────┴───────┴───────┴───────────┴───────────┘
Trainable params: 3.2 M                                                                                            
Non-trainable params: 0                                                                                            
Total params: 3.2 M                                                                                                
Total estimated model params size (MB): 12                                                                         
Modules in train mode: 24                                                                                          
Modules in eval mode: 0                                                                                            
Total FLOPs: 816 M                                                                                                 
train_model result: {'val_loss': 23291.81640625, 'hp_metric': 23291.81640625}
Anisotropic model: n_theta set to 10
spotpython tuning: 4016.7470703125 [##--------] 17.96% 
┏━━━┳━━━━━━━━┳━━━━━━━━━━━━┳━━━━━━━━┳━━━━━━━┳━━━━━━━┳━━━━━━━━━━━┳━━━━━━━━━━━┓
┃    Name    Type        Params  Mode   FLOPs   In sizes  Out sizes ┃
┡━━━╇━━━━━━━━╇━━━━━━━━━━━━╇━━━━━━━━╇━━━━━━━╇━━━━━━━╇━━━━━━━━━━━╇━━━━━━━━━━━┩
│ 0 │ layers │ Sequential │  3.2 M │ train │ 816 M │ [128, 10]   [128, 1] │
└───┴────────┴────────────┴────────┴───────┴───────┴───────────┴───────────┘
Trainable params: 3.2 M                                                                                            
Non-trainable params: 0                                                                                            
Total params: 3.2 M                                                                                                
Total estimated model params size (MB): 12                                                                         
Modules in train mode: 24                                                                                          
Modules in eval mode: 0                                                                                            
Total FLOPs: 816 M                                                                                                 
train_model result: {'val_loss': 24612.265625, 'hp_metric': 24612.265625}
Anisotropic model: n_theta set to 10
spotpython tuning: 4016.7470703125 [##--------] 19.95% 
┏━━━┳━━━━━━━━┳━━━━━━━━━━━━┳━━━━━━━━┳━━━━━━━┳━━━━━━━┳━━━━━━━━━━━┳━━━━━━━━━━━┓
┃    Name    Type        Params  Mode   FLOPs   In sizes  Out sizes ┃
┡━━━╇━━━━━━━━╇━━━━━━━━━━━━╇━━━━━━━━╇━━━━━━━╇━━━━━━━╇━━━━━━━━━━━╇━━━━━━━━━━━┩
│ 0 │ layers │ Sequential │  3.2 M │ train │ 816 M │ [128, 10]   [128, 1] │
└───┴────────┴────────────┴────────┴───────┴───────┴───────────┴───────────┘
Trainable params: 3.2 M                                                                                            
Non-trainable params: 0                                                                                            
Total params: 3.2 M                                                                                                
Total estimated model params size (MB): 12                                                                         
Modules in train mode: 24                                                                                          
Modules in eval mode: 0                                                                                            
Total FLOPs: 816 M                                                                                                 
train_model result: {'val_loss': 24032.283203125, 'hp_metric': 24032.283203125}
Anisotropic model: n_theta set to 10
spotpython tuning: 4016.7470703125 [##--------] 21.92% 
┏━━━┳━━━━━━━━┳━━━━━━━━━━━━┳━━━━━━━━┳━━━━━━━┳━━━━━━━━┳━━━━━━━━━━━┳━━━━━━━━━━━┓
┃    Name    Type        Params  Mode    FLOPs   In sizes  Out sizes ┃
┡━━━╇━━━━━━━━╇━━━━━━━━━━━━╇━━━━━━━━╇━━━━━━━╇━━━━━━━━╇━━━━━━━━━━━╇━━━━━━━━━━━┩
│ 0 │ layers │ Sequential │ 53.2 K │ train │ 26.2 M │ [256, 10]   [256, 1] │
└───┴────────┴────────────┴────────┴───────┴────────┴───────────┴───────────┘
Trainable params: 53.2 K                                                                                           
Non-trainable params: 0                                                                                            
Total params: 53.2 K                                                                                               
Total estimated model params size (MB): 0                                                                          
Modules in train mode: 24                                                                                          
Modules in eval mode: 0                                                                                            
Total FLOPs: 26.2 M                                                                                                
train_model result: {'val_loss': 24049.4453125, 'hp_metric': 24049.4453125}
Anisotropic model: n_theta set to 10
spotpython tuning: 4016.7470703125 [##--------] 23.77% 
┏━━━┳━━━━━━━━┳━━━━━━━━━━━━┳━━━━━━━━┳━━━━━━━┳━━━━━━━┳━━━━━━━━━━━┳━━━━━━━━━━━┓
┃    Name    Type        Params  Mode   FLOPs   In sizes  Out sizes ┃
┡━━━╇━━━━━━━━╇━━━━━━━━━━━━╇━━━━━━━━╇━━━━━━━╇━━━━━━━╇━━━━━━━━━━━╇━━━━━━━━━━━┩
│ 0 │ layers │ Sequential │  3.2 M │ train │ 816 M │ [128, 10]   [128, 1] │
└───┴────────┴────────────┴────────┴───────┴───────┴───────────┴───────────┘
Trainable params: 3.2 M                                                                                            
Non-trainable params: 0                                                                                            
Total params: 3.2 M                                                                                                
Total estimated model params size (MB): 12                                                                         
Modules in train mode: 24                                                                                          
Modules in eval mode: 0                                                                                            
Total FLOPs: 816 M                                                                                                 
train_model result: {'val_loss': 23554.34375, 'hp_metric': 23554.34375}
Anisotropic model: n_theta set to 10
spotpython tuning: 4016.7470703125 [###-------] 26.18% 
┏━━━┳━━━━━━━━┳━━━━━━━━━━━━┳━━━━━━━━┳━━━━━━━┳━━━━━━━┳━━━━━━━━━━━┳━━━━━━━━━━━┓
┃    Name    Type        Params  Mode   FLOPs   In sizes  Out sizes ┃
┡━━━╇━━━━━━━━╇━━━━━━━━━━━━╇━━━━━━━━╇━━━━━━━╇━━━━━━━╇━━━━━━━━━━━╇━━━━━━━━━━━┩
│ 0 │ layers │ Sequential │  3.2 M │ train │ 816 M │ [128, 10]   [128, 1] │
└───┴────────┴────────────┴────────┴───────┴───────┴───────────┴───────────┘
Trainable params: 3.2 M                                                                                            
Non-trainable params: 0                                                                                            
Total params: 3.2 M                                                                                                
Total estimated model params size (MB): 12                                                                         
Modules in train mode: 24                                                                                          
Modules in eval mode: 0                                                                                            
Total FLOPs: 816 M                                                                                                 
train_model result: {'val_loss': 23657.224609375, 'hp_metric': 23657.224609375}
Anisotropic model: n_theta set to 10
spotpython tuning: 4016.7470703125 [###-------] 27.86% 
┏━━━┳━━━━━━━━┳━━━━━━━━━━━━┳━━━━━━━━┳━━━━━━━┳━━━━━━━━┳━━━━━━━━━━┳━━━━━━━━━━━┓
┃    Name    Type        Params  Mode    FLOPs  In sizes  Out sizes ┃
┡━━━╇━━━━━━━━╇━━━━━━━━━━━━╇━━━━━━━━╇━━━━━━━╇━━━━━━━━╇━━━━━━━━━━╇━━━━━━━━━━━┩
│ 0 │ layers │ Sequential │  802 K │ train │ 51.2 M │ [32, 10]    [32, 1] │
└───┴────────┴────────────┴────────┴───────┴────────┴──────────┴───────────┘
Trainable params: 802 K                                                                                            
Non-trainable params: 0                                                                                            
Total params: 802 K                                                                                                
Total estimated model params size (MB): 3                                                                          
Modules in train mode: 17                                                                                          
Modules in eval mode: 0                                                                                            
Total FLOPs: 51.2 M                                                                                                
train_model result: {'val_loss': 23784.43359375, 'hp_metric': 23784.43359375}
Anisotropic model: n_theta set to 10
spotpython tuning: 4016.7470703125 [###-------] 29.39% 
┏━━━┳━━━━━━━━┳━━━━━━━━━━━━┳━━━━━━━━┳━━━━━━━┳━━━━━━━┳━━━━━━━━━━━┳━━━━━━━━━━━┓
┃    Name    Type        Params  Mode   FLOPs   In sizes  Out sizes ┃
┡━━━╇━━━━━━━━╇━━━━━━━━━━━━╇━━━━━━━━╇━━━━━━━╇━━━━━━━╇━━━━━━━━━━━╇━━━━━━━━━━━┩
│ 0 │ layers │ Sequential │  3.2 M │ train │ 816 M │ [128, 10]   [128, 1] │
└───┴────────┴────────────┴────────┴───────┴───────┴───────────┴───────────┘
Trainable params: 3.2 M                                                                                            
Non-trainable params: 0                                                                                            
Total params: 3.2 M                                                                                                
Total estimated model params size (MB): 12                                                                         
Modules in train mode: 24                                                                                          
Modules in eval mode: 0                                                                                            
Total FLOPs: 816 M                                                                                                 
train_model result: {'val_loss': 22549.009765625, 'hp_metric': 22549.009765625}
Anisotropic model: n_theta set to 10
spotpython tuning: 4016.7470703125 [###-------] 31.44% 
┏━━━┳━━━━━━━━┳━━━━━━━━━━━━┳━━━━━━━━┳━━━━━━━┳━━━━━━━┳━━━━━━━━━━━┳━━━━━━━━━━━┓
┃    Name    Type        Params  Mode   FLOPs   In sizes  Out sizes ┃
┡━━━╇━━━━━━━━╇━━━━━━━━━━━━╇━━━━━━━━╇━━━━━━━╇━━━━━━━╇━━━━━━━━━━━╇━━━━━━━━━━━┩
│ 0 │ layers │ Sequential │  3.2 M │ train │ 816 M │ [128, 10]   [128, 1] │
└───┴────────┴────────────┴────────┴───────┴───────┴───────────┴───────────┘
Trainable params: 3.2 M                                                                                            
Non-trainable params: 0                                                                                            
Total params: 3.2 M                                                                                                
Total estimated model params size (MB): 12                                                                         
Modules in train mode: 24                                                                                          
Modules in eval mode: 0                                                                                            
Total FLOPs: 816 M                                                                                                 
train_model result: {'val_loss': 23017.775390625, 'hp_metric': 23017.775390625}
Anisotropic model: n_theta set to 10
spotpython tuning: 4016.7470703125 [###-------] 33.36% 
┏━━━┳━━━━━━━━┳━━━━━━━━━━━━┳━━━━━━━━┳━━━━━━━┳━━━━━━━━┳━━━━━━━━━━┳━━━━━━━━━━━┓
┃    Name    Type        Params  Mode    FLOPs  In sizes  Out sizes ┃
┡━━━╇━━━━━━━━╇━━━━━━━━━━━━╇━━━━━━━━╇━━━━━━━╇━━━━━━━━╇━━━━━━━━━━╇━━━━━━━━━━━┩
│ 0 │ layers │ Sequential │  202 K │ train │ 12.9 M │ [32, 10]    [32, 1] │
└───┴────────┴────────────┴────────┴───────┴────────┴──────────┴───────────┘
Trainable params: 202 K                                                                                            
Non-trainable params: 0                                                                                            
Total params: 202 K                                                                                                
Total estimated model params size (MB): 0                                                                          
Modules in train mode: 17                                                                                          
Modules in eval mode: 0                                                                                            
Total FLOPs: 12.9 M                                                                                                
train_model result: {'val_loss': 13878.26171875, 'hp_metric': 13878.26171875}
Anisotropic model: n_theta set to 10
spotpython tuning: 4016.7470703125 [####------] 35.08% 
┏━━━┳━━━━━━━━┳━━━━━━━━━━━━┳━━━━━━━━┳━━━━━━━┳━━━━━━━━┳━━━━━━━━━━┳━━━━━━━━━━━┓
┃    Name    Type        Params  Mode    FLOPs  In sizes  Out sizes ┃
┡━━━╇━━━━━━━━╇━━━━━━━━━━━━╇━━━━━━━━╇━━━━━━━╇━━━━━━━━╇━━━━━━━━━━╇━━━━━━━━━━━┩
│ 0 │ layers │ Sequential │  202 K │ train │ 12.9 M │ [32, 10]    [32, 1] │
└───┴────────┴────────────┴────────┴───────┴────────┴──────────┴───────────┘
Trainable params: 202 K                                                                                            
Non-trainable params: 0                                                                                            
Total params: 202 K                                                                                                
Total estimated model params size (MB): 0                                                                          
Modules in train mode: 17                                                                                          
Modules in eval mode: 0                                                                                            
Total FLOPs: 12.9 M                                                                                                
train_model result: {'val_loss': 37925.36328125, 'hp_metric': 37925.36328125}
Anisotropic model: n_theta set to 10
spotpython tuning: 4016.7470703125 [####------] 37.78% 
┏━━━┳━━━━━━━━┳━━━━━━━━━━━━┳━━━━━━━━┳━━━━━━━┳━━━━━━━┳━━━━━━━━━━┳━━━━━━━━━━━┓
┃    Name    Type        Params  Mode   FLOPs  In sizes  Out sizes ┃
┡━━━╇━━━━━━━━╇━━━━━━━━━━━━╇━━━━━━━━╇━━━━━━━╇━━━━━━━╇━━━━━━━━━━╇━━━━━━━━━━━┩
│ 0 │ layers │ Sequential │ 51.9 K │ train │ 6.6 M │ [64, 10]    [64, 1] │
└───┴────────┴────────────┴────────┴───────┴───────┴──────────┴───────────┘
Trainable params: 51.9 K                                                                                           
Non-trainable params: 0                                                                                            
Total params: 51.9 K                                                                                               
Total estimated model params size (MB): 0                                                                          
Modules in train mode: 17                                                                                          
Modules in eval mode: 0                                                                                            
Total FLOPs: 6.6 M                                                                                                 
train_model result: {'val_loss': 23333.94140625, 'hp_metric': 23333.94140625}
Anisotropic model: n_theta set to 10
spotpython tuning: 4016.7470703125 [####------] 41.65% 
┏━━━┳━━━━━━━━┳━━━━━━━━━━━━┳━━━━━━━━┳━━━━━━━┳━━━━━━━┳━━━━━━━━━━┳━━━━━━━━━━━┓
┃    Name    Type        Params  Mode   FLOPs  In sizes  Out sizes ┃
┡━━━╇━━━━━━━━╇━━━━━━━━━━━━╇━━━━━━━━╇━━━━━━━╇━━━━━━━╇━━━━━━━━━━╇━━━━━━━━━━━┩
│ 0 │ layers │ Sequential │  202 K │ train │ 6.5 M │ [16, 10]    [16, 1] │
└───┴────────┴────────────┴────────┴───────┴───────┴──────────┴───────────┘
Trainable params: 202 K                                                                                            
Non-trainable params: 0                                                                                            
Total params: 202 K                                                                                                
Total estimated model params size (MB): 0                                                                          
Modules in train mode: 17                                                                                          
Modules in eval mode: 0                                                                                            
Total FLOPs: 6.5 M                                                                                                 
train_model result: {'val_loss': 24858.09375, 'hp_metric': 24858.09375}
Anisotropic model: n_theta set to 10
spotpython tuning: 4016.7470703125 [####------] 44.43% 
┏━━━┳━━━━━━━━┳━━━━━━━━━━━━┳━━━━━━━━┳━━━━━━━┳━━━━━━━━┳━━━━━━━━━━┳━━━━━━━━━━━┓
┃    Name    Type        Params  Mode    FLOPs  In sizes  Out sizes ┃
┡━━━╇━━━━━━━━╇━━━━━━━━━━━━╇━━━━━━━━╇━━━━━━━╇━━━━━━━━╇━━━━━━━━━━╇━━━━━━━━━━━┩
│ 0 │ layers │ Sequential │  205 K │ train │ 12.9 M │ [32, 10]    [32, 1] │
└───┴────────┴────────────┴────────┴───────┴────────┴──────────┴───────────┘
Trainable params: 205 K                                                                                            
Non-trainable params: 0                                                                                            
Total params: 205 K                                                                                                
Total estimated model params size (MB): 0                                                                          
Modules in train mode: 24                                                                                          
Modules in eval mode: 0                                                                                            
Total FLOPs: 12.9 M                                                                                                
train_model result: {'val_loss': 23383.541015625, 'hp_metric': 23383.541015625}
Anisotropic model: n_theta set to 10
spotpython tuning: 4016.7470703125 [#####-----] 46.49% 
┏━━━┳━━━━━━━━┳━━━━━━━━━━━━┳━━━━━━━━┳━━━━━━━┳━━━━━━━┳━━━━━━━━━━┳━━━━━━━━━━━┓
┃    Name    Type        Params  Mode   FLOPs  In sizes  Out sizes ┃
┡━━━╇━━━━━━━━╇━━━━━━━━━━━━╇━━━━━━━━╇━━━━━━━╇━━━━━━━╇━━━━━━━━━━╇━━━━━━━━━━━┩
│ 0 │ layers │ Sequential │ 53.2 K │ train │ 1.6 M │ [16, 10]    [16, 1] │
└───┴────────┴────────────┴────────┴───────┴───────┴──────────┴───────────┘
Trainable params: 53.2 K                                                                                           
Non-trainable params: 0                                                                                            
Total params: 53.2 K                                                                                               
Total estimated model params size (MB): 0                                                                          
Modules in train mode: 24                                                                                          
Modules in eval mode: 0                                                                                            
Total FLOPs: 1.6 M                                                                                                 
train_model result: {'val_loss': 23122.578125, 'hp_metric': 23122.578125}
Anisotropic model: n_theta set to 10
spotpython tuning: 4016.7470703125 [#####-----] 49.20% 
┏━━━┳━━━━━━━━┳━━━━━━━━━━━━┳━━━━━━━━┳━━━━━━━┳━━━━━━━┳━━━━━━━━━━┳━━━━━━━━━━━┓
┃    Name    Type        Params  Mode   FLOPs  In sizes  Out sizes ┃
┡━━━╇━━━━━━━━╇━━━━━━━━━━━━╇━━━━━━━━╇━━━━━━━╇━━━━━━━╇━━━━━━━━━━╇━━━━━━━━━━━┩
│ 0 │ layers │ Sequential │  3.2 M │ train │ 102 M │ [16, 10]    [16, 1] │
└───┴────────┴────────────┴────────┴───────┴───────┴──────────┴───────────┘
Trainable params: 3.2 M                                                                                            
Non-trainable params: 0                                                                                            
Total params: 3.2 M                                                                                                
Total estimated model params size (MB): 12                                                                         
Modules in train mode: 24                                                                                          
Modules in eval mode: 0                                                                                            
Total FLOPs: 102 M                                                                                                 
train_model result: {'val_loss': 20397.021484375, 'hp_metric': 20397.021484375}
Anisotropic model: n_theta set to 10
spotpython tuning: 4016.7470703125 [#####-----] 52.11% 
┏━━━┳━━━━━━━━┳━━━━━━━━━━━━┳━━━━━━━━┳━━━━━━━┳━━━━━━━┳━━━━━━━━━━┳━━━━━━━━━━━┓
┃    Name    Type        Params  Mode   FLOPs  In sizes  Out sizes ┃
┡━━━╇━━━━━━━━╇━━━━━━━━━━━━╇━━━━━━━━╇━━━━━━━╇━━━━━━━╇━━━━━━━━━━╇━━━━━━━━━━━┩
│ 0 │ layers │ Sequential │  3.2 M │ train │ 102 M │ [16, 10]    [16, 1] │
└───┴────────┴────────────┴────────┴───────┴───────┴──────────┴───────────┘
Trainable params: 3.2 M                                                                                            
Non-trainable params: 0                                                                                            
Total params: 3.2 M                                                                                                
Total estimated model params size (MB): 12                                                                         
Modules in train mode: 24                                                                                          
Modules in eval mode: 0                                                                                            
Total FLOPs: 102 M                                                                                                 
train_model result: {'val_loss': 18613.212890625, 'hp_metric': 18613.212890625}
Anisotropic model: n_theta set to 10
spotpython tuning: 4016.7470703125 [#####-----] 54.75% 
┏━━━┳━━━━━━━━┳━━━━━━━━━━━━┳━━━━━━━━┳━━━━━━━┳━━━━━━━┳━━━━━━━━━━┳━━━━━━━━━━━┓
┃    Name    Type        Params  Mode   FLOPs  In sizes  Out sizes ┃
┡━━━╇━━━━━━━━╇━━━━━━━━━━━━╇━━━━━━━━╇━━━━━━━╇━━━━━━━╇━━━━━━━━━━╇━━━━━━━━━━━┩
│ 0 │ layers │ Sequential │  3.2 M │ train │ 102 M │ [16, 10]    [16, 1] │
└───┴────────┴────────────┴────────┴───────┴───────┴──────────┴───────────┘
Trainable params: 3.2 M                                                                                            
Non-trainable params: 0                                                                                            
Total params: 3.2 M                                                                                                
Total estimated model params size (MB): 12                                                                         
Modules in train mode: 24                                                                                          
Modules in eval mode: 0                                                                                            
Total FLOPs: 102 M                                                                                                 
train_model result: {'val_loss': 19348.921875, 'hp_metric': 19348.921875}
Anisotropic model: n_theta set to 10
spotpython tuning: 4016.7470703125 [######----] 56.95% 
┏━━━┳━━━━━━━━┳━━━━━━━━━━━━┳━━━━━━━━┳━━━━━━━┳━━━━━━━━┳━━━━━━━━━━━┳━━━━━━━━━━━┓
┃    Name    Type        Params  Mode    FLOPs   In sizes  Out sizes ┃
┡━━━╇━━━━━━━━╇━━━━━━━━━━━━╇━━━━━━━━╇━━━━━━━╇━━━━━━━━╇━━━━━━━━━━━╇━━━━━━━━━━━┩
│ 0 │ layers │ Sequential │ 53.2 K │ train │ 13.1 M │ [128, 10]   [128, 1] │
└───┴────────┴────────────┴────────┴───────┴────────┴───────────┴───────────┘
Trainable params: 53.2 K                                                                                           
Non-trainable params: 0                                                                                            
Total params: 53.2 K                                                                                               
Total estimated model params size (MB): 0                                                                          
Modules in train mode: 24                                                                                          
Modules in eval mode: 0                                                                                            
Total FLOPs: 13.1 M                                                                                                
train_model result: {'val_loss': 24098.962890625, 'hp_metric': 24098.962890625}
Anisotropic model: n_theta set to 10
spotpython tuning: 4016.7470703125 [######----] 59.10% 
┏━━━┳━━━━━━━━┳━━━━━━━━━━━━┳━━━━━━━━┳━━━━━━━┳━━━━━━━┳━━━━━━━━━━┳━━━━━━━━━━━┓
┃    Name    Type        Params  Mode   FLOPs  In sizes  Out sizes ┃
┡━━━╇━━━━━━━━╇━━━━━━━━━━━━╇━━━━━━━━╇━━━━━━━╇━━━━━━━╇━━━━━━━━━━╇━━━━━━━━━━━┩
│ 0 │ layers │ Sequential │  202 K │ train │ 6.5 M │ [16, 10]    [16, 1] │
└───┴────────┴────────────┴────────┴───────┴───────┴──────────┴───────────┘
Trainable params: 202 K                                                                                            
Non-trainable params: 0                                                                                            
Total params: 202 K                                                                                                
Total estimated model params size (MB): 0                                                                          
Modules in train mode: 17                                                                                          
Modules in eval mode: 0                                                                                            
Total FLOPs: 6.5 M                                                                                                 
train_model result: {'val_loss': 7353.9287109375, 'hp_metric': 7353.9287109375}
Anisotropic model: n_theta set to 10
spotpython tuning: 4016.7470703125 [######----] 63.13% 
┏━━━┳━━━━━━━━┳━━━━━━━━━━━━┳━━━━━━━━┳━━━━━━━┳━━━━━━━┳━━━━━━━━━━┳━━━━━━━━━━━┓
┃    Name    Type        Params  Mode   FLOPs  In sizes  Out sizes ┃
┡━━━╇━━━━━━━━╇━━━━━━━━━━━━╇━━━━━━━━╇━━━━━━━╇━━━━━━━╇━━━━━━━━━━╇━━━━━━━━━━━┩
│ 0 │ layers │ Sequential │  202 K │ train │ 6.5 M │ [16, 10]    [16, 1] │
└───┴────────┴────────────┴────────┴───────┴───────┴──────────┴───────────┘
Trainable params: 202 K                                                                                            
Non-trainable params: 0                                                                                            
Total params: 202 K                                                                                                
Total estimated model params size (MB): 0                                                                          
Modules in train mode: 17                                                                                          
Modules in eval mode: 0                                                                                            
Total FLOPs: 6.5 M                                                                                                 
train_model result: {'val_loss': 5154.142578125, 'hp_metric': 5154.142578125}
Anisotropic model: n_theta set to 10
spotpython tuning: 4016.7470703125 [#######---] 65.77% 
┏━━━┳━━━━━━━━┳━━━━━━━━━━━━┳━━━━━━━━┳━━━━━━━┳━━━━━━━┳━━━━━━━━━━┳━━━━━━━━━━━┓
┃    Name    Type        Params  Mode   FLOPs  In sizes  Out sizes ┃
┡━━━╇━━━━━━━━╇━━━━━━━━━━━━╇━━━━━━━━╇━━━━━━━╇━━━━━━━╇━━━━━━━━━━╇━━━━━━━━━━━┩
│ 0 │ layers │ Sequential │  202 K │ train │ 6.5 M │ [16, 10]    [16, 1] │
└───┴────────┴────────────┴────────┴───────┴───────┴──────────┴───────────┘
Trainable params: 202 K                                                                                            
Non-trainable params: 0                                                                                            
Total params: 202 K                                                                                                
Total estimated model params size (MB): 0                                                                          
Modules in train mode: 17                                                                                          
Modules in eval mode: 0                                                                                            
Total FLOPs: 6.5 M                                                                                                 
train_model result: {'val_loss': 5813.7734375, 'hp_metric': 5813.7734375}
Anisotropic model: n_theta set to 10
spotpython tuning: 4016.7470703125 [#######---] 68.66% 
┏━━━┳━━━━━━━━┳━━━━━━━━━━━━┳━━━━━━━━┳━━━━━━━┳━━━━━━━━┳━━━━━━━━━━┳━━━━━━━━━━━┓
┃    Name    Type        Params  Mode    FLOPs  In sizes  Out sizes ┃
┡━━━╇━━━━━━━━╇━━━━━━━━━━━━╇━━━━━━━━╇━━━━━━━╇━━━━━━━━╇━━━━━━━━━━╇━━━━━━━━━━━┩
│ 0 │ layers │ Sequential │  807 K │ train │ 25.6 M │ [16, 10]    [16, 1] │
└───┴────────┴────────────┴────────┴───────┴────────┴──────────┴───────────┘
Trainable params: 807 K                                                                                            
Non-trainable params: 0                                                                                            
Total params: 807 K                                                                                                
Total estimated model params size (MB): 3                                                                          
Modules in train mode: 24                                                                                          
Modules in eval mode: 0                                                                                            
Total FLOPs: 25.6 M                                                                                                
train_model result: {'val_loss': 20272.11328125, 'hp_metric': 20272.11328125}
Anisotropic model: n_theta set to 10
spotpython tuning: 4016.7470703125 [#######---] 71.35% 
┏━━━┳━━━━━━━━┳━━━━━━━━━━━━┳━━━━━━━━┳━━━━━━━┳━━━━━━━┳━━━━━━━━━━┳━━━━━━━━━━━┓
┃    Name    Type        Params  Mode   FLOPs  In sizes  Out sizes ┃
┡━━━╇━━━━━━━━╇━━━━━━━━━━━━╇━━━━━━━━╇━━━━━━━╇━━━━━━━╇━━━━━━━━━━╇━━━━━━━━━━━┩
│ 0 │ layers │ Sequential │  3.2 M │ train │ 102 M │ [16, 10]    [16, 1] │
└───┴────────┴────────────┴────────┴───────┴───────┴──────────┴───────────┘
Trainable params: 3.2 M                                                                                            
Non-trainable params: 0                                                                                            
Total params: 3.2 M                                                                                                
Total estimated model params size (MB): 12                                                                         
Modules in train mode: 24                                                                                          
Modules in eval mode: 0                                                                                            
Total FLOPs: 102 M                                                                                                 
train_model result: {'val_loss': 23581.716796875, 'hp_metric': 23581.716796875}
Anisotropic model: n_theta set to 10
spotpython tuning: 4016.7470703125 [#######---] 74.50% 
┏━━━┳━━━━━━━━┳━━━━━━━━━━━━┳━━━━━━━━┳━━━━━━━┳━━━━━━━┳━━━━━━━━━━┳━━━━━━━━━━━┓
┃    Name    Type        Params  Mode   FLOPs  In sizes  Out sizes ┃
┡━━━╇━━━━━━━━╇━━━━━━━━━━━━╇━━━━━━━━╇━━━━━━━╇━━━━━━━╇━━━━━━━━━━╇━━━━━━━━━━━┩
│ 0 │ layers │ Sequential │  3.2 M │ train │ 102 M │ [16, 10]    [16, 1] │
└───┴────────┴────────────┴────────┴───────┴───────┴──────────┴───────────┘
Trainable params: 3.2 M                                                                                            
Non-trainable params: 0                                                                                            
Total params: 3.2 M                                                                                                
Total estimated model params size (MB): 12                                                                         
Modules in train mode: 24                                                                                          
Modules in eval mode: 0                                                                                            
Total FLOPs: 102 M                                                                                                 
train_model result: {'val_loss': 17836.59765625, 'hp_metric': 17836.59765625}
Anisotropic model: n_theta set to 10
spotpython tuning: 4016.7470703125 [########--] 77.53% 
┏━━━┳━━━━━━━━┳━━━━━━━━━━━━┳━━━━━━━━┳━━━━━━━┳━━━━━━━┳━━━━━━━━━━┳━━━━━━━━━━━┓
┃    Name    Type        Params  Mode   FLOPs  In sizes  Out sizes ┃
┡━━━╇━━━━━━━━╇━━━━━━━━━━━━╇━━━━━━━━╇━━━━━━━╇━━━━━━━╇━━━━━━━━━━╇━━━━━━━━━━━┩
│ 0 │ layers │ Sequential │  3.2 M │ train │ 102 M │ [16, 10]    [16, 1] │
└───┴────────┴────────────┴────────┴───────┴───────┴──────────┴───────────┘
Trainable params: 3.2 M                                                                                            
Non-trainable params: 0                                                                                            
Total params: 3.2 M                                                                                                
Total estimated model params size (MB): 12                                                                         
Modules in train mode: 24                                                                                          
Modules in eval mode: 0                                                                                            
Total FLOPs: 102 M                                                                                                 
train_model result: {'val_loss': 20769.736328125, 'hp_metric': 20769.736328125}
Anisotropic model: n_theta set to 10
spotpython tuning: 4016.7470703125 [########--] 80.72% 
┏━━━┳━━━━━━━━┳━━━━━━━━━━━━┳━━━━━━━━┳━━━━━━━┳━━━━━━━┳━━━━━━━━━━┳━━━━━━━━━━━┓
┃    Name    Type        Params  Mode   FLOPs  In sizes  Out sizes ┃
┡━━━╇━━━━━━━━╇━━━━━━━━━━━━╇━━━━━━━━╇━━━━━━━╇━━━━━━━╇━━━━━━━━━━╇━━━━━━━━━━━┩
│ 0 │ layers │ Sequential │  3.2 M │ train │ 102 M │ [16, 10]    [16, 1] │
└───┴────────┴────────────┴────────┴───────┴───────┴──────────┴───────────┘
Trainable params: 3.2 M                                                                                            
Non-trainable params: 0                                                                                            
Total params: 3.2 M                                                                                                
Total estimated model params size (MB): 12                                                                         
Modules in train mode: 24                                                                                          
Modules in eval mode: 0                                                                                            
Total FLOPs: 102 M                                                                                                 
train_model result: {'val_loss': 19012.083984375, 'hp_metric': 19012.083984375}
Anisotropic model: n_theta set to 10
spotpython tuning: 4016.7470703125 [########--] 84.39% 
┏━━━┳━━━━━━━━┳━━━━━━━━━━━━┳━━━━━━━━┳━━━━━━━┳━━━━━━━┳━━━━━━━━━━┳━━━━━━━━━━━┓
┃    Name    Type        Params  Mode   FLOPs  In sizes  Out sizes ┃
┡━━━╇━━━━━━━━╇━━━━━━━━━━━━╇━━━━━━━━╇━━━━━━━╇━━━━━━━╇━━━━━━━━━━╇━━━━━━━━━━━┩
│ 0 │ layers │ Sequential │  202 K │ train │ 6.5 M │ [16, 10]    [16, 1] │
└───┴────────┴────────────┴────────┴───────┴───────┴──────────┴───────────┘
Trainable params: 202 K                                                                                            
Non-trainable params: 0                                                                                            
Total params: 202 K                                                                                                
Total estimated model params size (MB): 0                                                                          
Modules in train mode: 17                                                                                          
Modules in eval mode: 0                                                                                            
Total FLOPs: 6.5 M                                                                                                 
train_model result: {'val_loss': 21169.55859375, 'hp_metric': 21169.55859375}
Anisotropic model: n_theta set to 10
spotpython tuning: 4016.7470703125 [#########-] 88.91% 
┏━━━┳━━━━━━━━┳━━━━━━━━━━━━┳━━━━━━━━┳━━━━━━━┳━━━━━━━┳━━━━━━━━━━┳━━━━━━━━━━━┓
┃    Name    Type        Params  Mode   FLOPs  In sizes  Out sizes ┃
┡━━━╇━━━━━━━━╇━━━━━━━━━━━━╇━━━━━━━━╇━━━━━━━╇━━━━━━━╇━━━━━━━━━━╇━━━━━━━━━━━┩
│ 0 │ layers │ Sequential │  3.2 M │ train │ 102 M │ [16, 10]    [16, 1] │
└───┴────────┴────────────┴────────┴───────┴───────┴──────────┴───────────┘
Trainable params: 3.2 M                                                                                            
Non-trainable params: 0                                                                                            
Total params: 3.2 M                                                                                                
Total estimated model params size (MB): 12                                                                         
Modules in train mode: 24                                                                                          
Modules in eval mode: 0                                                                                            
Total FLOPs: 102 M                                                                                                 
train_model result: {'val_loss': 17560.904296875, 'hp_metric': 17560.904296875}
Anisotropic model: n_theta set to 10
spotpython tuning: 4016.7470703125 [#########-] 92.71% 
┏━━━┳━━━━━━━━┳━━━━━━━━━━━━┳━━━━━━━━┳━━━━━━━┳━━━━━━━┳━━━━━━━━━━┳━━━━━━━━━━━┓
┃    Name    Type        Params  Mode   FLOPs  In sizes  Out sizes ┃
┡━━━╇━━━━━━━━╇━━━━━━━━━━━━╇━━━━━━━━╇━━━━━━━╇━━━━━━━╇━━━━━━━━━━╇━━━━━━━━━━━┩
│ 0 │ layers │ Sequential │  3.2 M │ train │ 102 M │ [16, 10]    [16, 1] │
└───┴────────┴────────────┴────────┴───────┴───────┴──────────┴───────────┘
Trainable params: 3.2 M                                                                                            
Non-trainable params: 0                                                                                            
Total params: 3.2 M                                                                                                
Total estimated model params size (MB): 12                                                                         
Modules in train mode: 24                                                                                          
Modules in eval mode: 0                                                                                            
Total FLOPs: 102 M                                                                                                 
train_model result: {'val_loss': 17508.0703125, 'hp_metric': 17508.0703125}
Anisotropic model: n_theta set to 10
spotpython tuning: 4016.7470703125 [##########] 96.01% 
┏━━━┳━━━━━━━━┳━━━━━━━━━━━━┳━━━━━━━━┳━━━━━━━┳━━━━━━━━┳━━━━━━━━━━┳━━━━━━━━━━━┓
┃    Name    Type        Params  Mode    FLOPs  In sizes  Out sizes ┃
┡━━━╇━━━━━━━━╇━━━━━━━━━━━━╇━━━━━━━━╇━━━━━━━╇━━━━━━━━╇━━━━━━━━━━╇━━━━━━━━━━━┩
│ 0 │ layers │ Sequential │  202 K │ train │ 12.9 M │ [32, 10]    [32, 1] │
└───┴────────┴────────────┴────────┴───────┴────────┴──────────┴───────────┘
Trainable params: 202 K                                                                                            
Non-trainable params: 0                                                                                            
Total params: 202 K                                                                                                
Total estimated model params size (MB): 0                                                                          
Modules in train mode: 17                                                                                          
Modules in eval mode: 0                                                                                            
Total FLOPs: 12.9 M                                                                                                
train_model result: {'val_loss': 22455.43359375, 'hp_metric': 22455.43359375}
Anisotropic model: n_theta set to 10
spotpython tuning: 4016.7470703125 [##########] 99.30% 
┏━━━┳━━━━━━━━┳━━━━━━━━━━━━┳━━━━━━━━┳━━━━━━━┳━━━━━━━┳━━━━━━━━━━┳━━━━━━━━━━━┓
┃    Name    Type        Params  Mode   FLOPs  In sizes  Out sizes ┃
┡━━━╇━━━━━━━━╇━━━━━━━━━━━━╇━━━━━━━━╇━━━━━━━╇━━━━━━━╇━━━━━━━━━━╇━━━━━━━━━━━┩
│ 0 │ layers │ Sequential │  3.2 M │ train │ 102 M │ [16, 10]    [16, 1] │
└───┴────────┴────────────┴────────┴───────┴───────┴──────────┴───────────┘
Trainable params: 3.2 M                                                                                            
Non-trainable params: 0                                                                                            
Total params: 3.2 M                                                                                                
Total estimated model params size (MB): 12                                                                         
Modules in train mode: 24                                                                                          
Modules in eval mode: 0                                                                                            
Total FLOPs: 102 M                                                                                                 
train_model result: {'val_loss': 17651.37890625, 'hp_metric': 17651.37890625}
Anisotropic model: n_theta set to 10
spotpython tuning: 4016.7470703125 [##########] 100.00% Done...

Experiment saved to 602_12_1_res.pkl
<spotpython.spot.spot.Spot at 0x13594b0e0>

51.2 Results from the Hyperparameter Tuning Experiment

  • After the hyperparameter tuning is finished, the following information is available:
    • the S object and the associated
    • fun_control dictionary
S.print_results(print_screen=True)
min y: 4016.7470703125
l1: 4.0
epochs: 12.0
batch_size: 4.0
act_fn: 2.0
optimizer: 1.0
dropout_prob: 0.0003746131641613862
lr_mult: 5.932218819111239
patience: 2.0
batch_norm: 0.0
initialization: 1.0
S.plot_progress()

51.2.1 Getting the Best Model, i.e, the Tuned Architecture

  • The method get_tuned_architecture [DOC] returns the best model architecture found during the hyperparameter tuning.
  • It returns the transformed values, i.e., batch_size = 2^x if the hyperparameter batch_size was transformed with the transform_power_2_int function.
from spotpython.hyperparameters.values import get_tuned_architecture
import pprint
config = get_tuned_architecture(S)
pprint.pprint(config)
{'act_fn': ReLU(),
 'batch_norm': False,
 'batch_size': 16,
 'dropout_prob': 0.0003746131641613862,
 'epochs': 4096,
 'initialization': 'kaiming_uniform',
 'l1': 16,
 'lr_mult': 5.932218819111239,
 'optimizer': 'Adam',
 'patience': 4}
  • Note: get_tuned_architecture has the option force_minX which does not have any effect in this case.
from spotpython.hyperparameters.values import get_tuned_architecture
config = get_tuned_architecture(S, force_minX=True)
pprint.pprint(config)
{'act_fn': ReLU(),
 'batch_norm': False,
 'batch_size': 16,
 'dropout_prob': 0.0003746131641613862,
 'epochs': 4096,
 'initialization': 'kaiming_uniform',
 'l1': 16,
 'lr_mult': 5.932218819111239,
 'optimizer': 'Adam',
 'patience': 4}

51.3 Training the Tuned Architecture on the Test Data

  • Since we are interested in the explainability of the model, we will train the tuned architecture on the test data.
  • spotpythons’s test_model function [DOC] is used to train the model on the test data.
  • Note: Until now, we do not use any information about the NN’s weights and biases. Only the architecture, which is available as the config, is used.
  • spotpython used the TensorBoard logger to save the training process in the ./runs directory. Therefore, we have to enable the TensorBoard logger in the fun_control dictionary. To get a clean start, we remove an existing runs folder.
from spotpython.light.testmodel import test_model
from spotpython.light.loadmodel import load_light_from_checkpoint
fun_control.update({"tensorboard_log": True})
test_model(config, fun_control)
┏━━━┳━━━━━━━━┳━━━━━━━━━━━━┳━━━━━━━━┳━━━━━━━┳━━━━━━━┳━━━━━━━━━━┳━━━━━━━━━━━┓
┃    Name    Type        Params  Mode   FLOPs  In sizes  Out sizes ┃
┡━━━╇━━━━━━━━╇━━━━━━━━━━━━╇━━━━━━━━╇━━━━━━━╇━━━━━━━╇━━━━━━━━━━╇━━━━━━━━━━━┩
│ 0 │ layers │ Sequential │  202 K │ train │ 6.5 M │ [16, 10]    [16, 1] │
└───┴────────┴────────────┴────────┴───────┴───────┴──────────┴───────────┘
Trainable params: 202 K                                                                                            
Non-trainable params: 0                                                                                            
Total params: 202 K                                                                                                
Total estimated model params size (MB): 0                                                                          
Modules in train mode: 17                                                                                          
Modules in eval mode: 0                                                                                            
Total FLOPs: 6.5 M                                                                                                 
┏━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━━━┓
┃        Test metric               DataLoader 0        ┃
┡━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━━━━┩
│         hp_metric              2695.215576171875     │
│         val_loss               2695.215576171875     │
└───────────────────────────┴───────────────────────────┘
test_model result: {'val_loss': 2695.215576171875, 'hp_metric': 2695.215576171875}
(2695.215576171875, 2695.215576171875)
model = load_light_from_checkpoint(config, fun_control)
config: {'l1': 16, 'epochs': 4096, 'batch_size': 16, 'act_fn': ReLU(), 'optimizer': 'Adam', 'dropout_prob': 0.0003746131641613862, 'lr_mult': 5.932218819111239, 'patience': 4, 'batch_norm': False, 'initialization': 'kaiming_uniform'}
Loading model with 16_4096_16_ReLU_Adam_0.0004_5.9322_4_False_kaiming_uniform_TEST from runs/saved_models/16_4096_16_ReLU_Adam_0.0004_5.9322_4_False_kaiming_uniform_TEST/last.ckpt
Model: NNLinearRegressor(
  (layers): Sequential(
    (0): Linear(in_features=10, out_features=320, bias=True)
    (1): ReLU()
    (2): Dropout(p=0.0003746131641613862, inplace=False)
    (3): Linear(in_features=320, out_features=160, bias=True)
    (4): ReLU()
    (5): Dropout(p=0.0003746131641613862, inplace=False)
    (6): Linear(in_features=160, out_features=320, bias=True)
    (7): ReLU()
    (8): Dropout(p=0.0003746131641613862, inplace=False)
    (9): Linear(in_features=320, out_features=160, bias=True)
    (10): ReLU()
    (11): Dropout(p=0.0003746131641613862, inplace=False)
    (12): Linear(in_features=160, out_features=160, bias=True)
    (13): ReLU()
    (14): Dropout(p=0.0003746131641613862, inplace=False)
    (15): Linear(in_features=160, out_features=80, bias=True)
    (16): ReLU()
    (17): Dropout(p=0.0003746131641613862, inplace=False)
    (18): Linear(in_features=80, out_features=80, bias=True)
    (19): ReLU()
    (20): Dropout(p=0.0003746131641613862, inplace=False)
    (21): Linear(in_features=80, out_features=1, bias=True)
  )
)

51.3.0.1 Details of the Training Process on the Test Data

  • The test_model method initializes the model with the tuned architecture as follows:
model = fun_control["core_model"](**config, _L_in=_L_in, _L_out=_L_out, _torchmetric=_torchmetric)
  • Then, the Lightning Trainer is initialized with the fun_control dictionary and the model as follows:

        trainer = L.Trainer(
        default_root_dir=os.path.join(fun_control["CHECKPOINT_PATH"], config_id),
        max_epochs=model.hparams.epochs,
        accelerator=fun_control["accelerator"],
        devices=fun_control["devices"],
        logger=TensorBoardLogger(
            save_dir=fun_control["TENSORBOARD_PATH"],
            version=config_id,
            default_hp_metric=True,
            log_graph=fun_control["log_graph"],
        ),
        callbacks=[
            EarlyStopping(monitor="val_loss", patience=config["patience"], mode="min", strict=False, verbose=False),
            ModelCheckpoint(
                dirpath=os.path.join(fun_control["CHECKPOINT_PATH"], config_id), save_last=True
            ), 
        ],
        enable_progress_bar=enable_progress_bar,
    )
    trainer.fit(model=model, datamodule=dm)    
    test_result = trainer.test(datamodule=dm, ckpt_path="last")
  • As shown in the code above, the last checkpoint ist saved.

  • spotpython’s method load_light_from_checkpoint is used to load the last checkpoint and to get the model’s weights and biases. It requires the fun_control dictionary and the config_id as input to find the correct checkpoint.

  • Now, the model is trained and the weights and biases are available.

51.4 Visualizing the Neural Network Architecture

# get the device
from spotpython.utils.device import getDevice
device = getDevice()
from spotpython.plot.xai import viz_net
viz_net(model, device=device)

architecture

51.5 XAI Methods

  • spotpython provides methods to explain the model’s predictions. The following neural network elements can be analyzed:

51.5.1 Weights

  • Weights are the parameters of the neural network that are learned from the data during training. They connect neurons between layers and determine the strength and direction of the signal sent from one neuron to another. The network adjusts the weights during training to minimize the error between the predicted output and the actual output.
  • Interpretation of the weights: A high weight value indicates a strong influence of the input neuron on the output. Positive weights suggest a positive correlation, whereas negative weights suggest an inverse relationship between neurons.

51.5.2 Activations

  • Activations are the outputs produced by neurons after applying an activation function to the weighted sum of inputs. The activation function (e.g., ReLU, sigmoid, tanh) adds non-linearity to the model, allowing it to learn more complex relationships.
  • Interpretation of the activations: The value of activations indicates the intensity of the signal passed to the next layer. Certain activation patterns can highlight which features or parts of the data the network is focusing on.

51.5.3 Gradients

  • Gradients are the partial derivatives of the loss function with respect to different parameters (weights) of the network. During backpropagation, gradients are used to update the weights in the direction that reduces the loss by methods like gradient descent.
  • Interpretation of the gradients: The magnitude of the gradient indicates how much a parameter should change to reduce the error. A large gradient implies a steeper slope and a bigger update, while a small gradient suggests that the parameter is near an optimal point. If gradients are too small (vanishing gradient problem), the network may learn slowly or stop learning. If they are too large (exploding gradient problem), the updates may be unstable.
  • sptpython provides the method get_gradients to get the gradients of the model.
from spotpython.plot.xai import (get_activations, get_gradients, get_weights, visualize_weights, visualize_gradients, visualize_mean_activations, visualize_gradient_distributions, visualize_weights_distributions, visualize_activations_distributions)
batch_size = config["batch_size"]

51.5.4 Getting the Weights

from spotpython.plot.xai import sort_layers
weights, _ = get_weights(model)
# sort_layers(weights)
visualize_weights(model, absolute=True, cmap="GreenYellowRed", figsize=(6, 6))
3200 values in Layer Layer 0. Geometry: (320, 10)

51200 values in Layer Layer 3. Geometry: (160, 320)

51200 values in Layer Layer 6. Geometry: (320, 160)

51200 values in Layer Layer 9. Geometry: (160, 320)

25600 values in Layer Layer 12. Geometry: (160, 160)

12800 values in Layer Layer 15. Geometry: (80, 160)

6400 values in Layer Layer 18. Geometry: (80, 80)

80 values in Layer Layer 21. Geometry: (1, 80)

visualize_weights_distributions(model, color=f"C{0}", columns=4)
n:8

51.5.5 Getting the Activations

from spotpython.plot.xai import get_activations
activations, mean_activations, layer_sizes = get_activations(net=model, fun_control=fun_control, batch_size=batch_size, device=device)
train_size: 0.36, val_size: 0.24, test_sie: 0.4 for splitting train & val data.
train samples: 160, val samples: 106 generated for train & val data.
LightDataModule.train_dataloader(). data_train size: 160
visualize_mean_activations(mean_activations, layer_sizes=layer_sizes, absolute=True, cmap="GreenYellowRed", figsize=(6, 6))
320 values in Layer 0. Geometry: (1, 320)

160 values in Layer 3. Geometry: (1, 160)

320 values in Layer 6. Geometry: (1, 320)

160 values in Layer 9. Geometry: (1, 160)

160 values in Layer 12. Geometry: (1, 160)

80 values in Layer 15. Geometry: (1, 80)

80 values in Layer 18. Geometry: (1, 80)

visualize_activations_distributions(activations=activations,
                                    net=model, color="C0", columns=4)

51.5.6 Getting the Gradients

gradients, _ = get_gradients(net=model, fun_control=fun_control, batch_size=batch_size, device=device)
train_size: 0.36, val_size: 0.24, test_sie: 0.4 for splitting train & val data.
train samples: 160, val samples: 106 generated for train & val data.
LightDataModule.train_dataloader(). data_train size: 160
visualize_gradients(model, fun_control, batch_size, absolute=True, cmap="GreenYellowRed", figsize=(6, 6), device=device)
train_size: 0.36, val_size: 0.24, test_sie: 0.4 for splitting train & val data.
train samples: 160, val samples: 106 generated for train & val data.
LightDataModule.train_dataloader(). data_train size: 160
3200 values in Layer layers.0.weight. Geometry: (320, 10)

51200 values in Layer layers.3.weight. Geometry: (160, 320)

51200 values in Layer layers.6.weight. Geometry: (320, 160)

51200 values in Layer layers.9.weight. Geometry: (160, 320)

25600 values in Layer layers.12.weight. Geometry: (160, 160)

12800 values in Layer layers.15.weight. Geometry: (80, 160)

6400 values in Layer layers.18.weight. Geometry: (80, 80)

80 values in Layer layers.21.weight. Geometry: (1, 80)

visualize_gradient_distributions(model, fun_control, batch_size=batch_size, color=f"C{0}", device=device, columns=3)
train_size: 0.36, val_size: 0.24, test_sie: 0.4 for splitting train & val data.
train samples: 160, val samples: 106 generated for train & val data.
LightDataModule.train_dataloader(). data_train size: 160
n:8

51.6 Feature Attributions

51.6.1 Integrated Gradients

from spotpython.plot.xai import get_attributions, plot_attributions
df_att = get_attributions(S, fun_control, attr_method="IntegratedGradients", n_rel=10)
plot_attributions(df_att, attr_method="IntegratedGradients")
┏━━━┳━━━━━━━━┳━━━━━━━━━━━━┳━━━━━━━━┳━━━━━━━┳━━━━━━━┳━━━━━━━━━━┳━━━━━━━━━━━┓
┃    Name    Type        Params  Mode   FLOPs  In sizes  Out sizes ┃
┡━━━╇━━━━━━━━╇━━━━━━━━━━━━╇━━━━━━━━╇━━━━━━━╇━━━━━━━╇━━━━━━━━━━╇━━━━━━━━━━━┩
│ 0 │ layers │ Sequential │  202 K │ train │ 6.5 M │ [16, 10]    [16, 1] │
└───┴────────┴────────────┴────────┴───────┴───────┴──────────┴───────────┘
Trainable params: 202 K                                                                                            
Non-trainable params: 0                                                                                            
Total params: 202 K                                                                                                
Total estimated model params size (MB): 0                                                                          
Modules in train mode: 17                                                                                          
Modules in eval mode: 0                                                                                            
Total FLOPs: 6.5 M                                                                                                 
train_model result: {'val_loss': 10326.0830078125, 'hp_metric': 10326.0830078125}
config: {'l1': 16, 'epochs': 4096, 'batch_size': 16, 'act_fn': ReLU(), 'optimizer': 'Adam', 'dropout_prob': 0.0003746131641613862, 'lr_mult': 5.932218819111239, 'patience': 4, 'batch_norm': False, 'initialization': 'kaiming_uniform'}
Loading model with 16_4096_16_ReLU_Adam_0.0004_5.9322_4_False_kaiming_uniform_TRAIN from runs/saved_models/16_4096_16_ReLU_Adam_0.0004_5.9322_4_False_kaiming_uniform_TRAIN/last.ckpt
Model: NNLinearRegressor(
  (layers): Sequential(
    (0): Linear(in_features=10, out_features=320, bias=True)
    (1): ReLU()
    (2): Dropout(p=0.0003746131641613862, inplace=False)
    (3): Linear(in_features=320, out_features=160, bias=True)
    (4): ReLU()
    (5): Dropout(p=0.0003746131641613862, inplace=False)
    (6): Linear(in_features=160, out_features=320, bias=True)
    (7): ReLU()
    (8): Dropout(p=0.0003746131641613862, inplace=False)
    (9): Linear(in_features=320, out_features=160, bias=True)
    (10): ReLU()
    (11): Dropout(p=0.0003746131641613862, inplace=False)
    (12): Linear(in_features=160, out_features=160, bias=True)
    (13): ReLU()
    (14): Dropout(p=0.0003746131641613862, inplace=False)
    (15): Linear(in_features=160, out_features=80, bias=True)
    (16): ReLU()
    (17): Dropout(p=0.0003746131641613862, inplace=False)
    (18): Linear(in_features=80, out_features=80, bias=True)
    (19): ReLU()
    (20): Dropout(p=0.0003746131641613862, inplace=False)
    (21): Linear(in_features=80, out_features=1, bias=True)
  )
)
train_size: 0.36, val_size: 0.24, test_sie: 0.4 for splitting test data.
test samples: 177 generated for test data.
LightDataModule.test_dataloader(). Test set size: 177

51.6.2 Deep Lift

df_lift = get_attributions(S, fun_control, attr_method="DeepLift",n_rel=10)
print(df_lift)
plot_attributions(df_lift,  attr_method="DeepLift")
┏━━━┳━━━━━━━━┳━━━━━━━━━━━━┳━━━━━━━━┳━━━━━━━┳━━━━━━━┳━━━━━━━━━━┳━━━━━━━━━━━┓
┃    Name    Type        Params  Mode   FLOPs  In sizes  Out sizes ┃
┡━━━╇━━━━━━━━╇━━━━━━━━━━━━╇━━━━━━━━╇━━━━━━━╇━━━━━━━╇━━━━━━━━━━╇━━━━━━━━━━━┩
│ 0 │ layers │ Sequential │  202 K │ train │ 6.5 M │ [16, 10]    [16, 1] │
└───┴────────┴────────────┴────────┴───────┴───────┴──────────┴───────────┘
Trainable params: 202 K                                                                                            
Non-trainable params: 0                                                                                            
Total params: 202 K                                                                                                
Total estimated model params size (MB): 0                                                                          
Modules in train mode: 17                                                                                          
Modules in eval mode: 0                                                                                            
Total FLOPs: 6.5 M                                                                                                 
train_model result: {'val_loss': 10574.48828125, 'hp_metric': 10574.48828125}
config: {'l1': 16, 'epochs': 4096, 'batch_size': 16, 'act_fn': ReLU(), 'optimizer': 'Adam', 'dropout_prob': 0.0003746131641613862, 'lr_mult': 5.932218819111239, 'patience': 4, 'batch_norm': False, 'initialization': 'kaiming_uniform'}
Loading model with 16_4096_16_ReLU_Adam_0.0004_5.9322_4_False_kaiming_uniform_TRAIN from runs/saved_models/16_4096_16_ReLU_Adam_0.0004_5.9322_4_False_kaiming_uniform_TRAIN/last.ckpt
Model: NNLinearRegressor(
  (layers): Sequential(
    (0): Linear(in_features=10, out_features=320, bias=True)
    (1): ReLU()
    (2): Dropout(p=0.0003746131641613862, inplace=False)
    (3): Linear(in_features=320, out_features=160, bias=True)
    (4): ReLU()
    (5): Dropout(p=0.0003746131641613862, inplace=False)
    (6): Linear(in_features=160, out_features=320, bias=True)
    (7): ReLU()
    (8): Dropout(p=0.0003746131641613862, inplace=False)
    (9): Linear(in_features=320, out_features=160, bias=True)
    (10): ReLU()
    (11): Dropout(p=0.0003746131641613862, inplace=False)
    (12): Linear(in_features=160, out_features=160, bias=True)
    (13): ReLU()
    (14): Dropout(p=0.0003746131641613862, inplace=False)
    (15): Linear(in_features=160, out_features=80, bias=True)
    (16): ReLU()
    (17): Dropout(p=0.0003746131641613862, inplace=False)
    (18): Linear(in_features=80, out_features=80, bias=True)
    (19): ReLU()
    (20): Dropout(p=0.0003746131641613862, inplace=False)
    (21): Linear(in_features=80, out_features=1, bias=True)
  )
)
train_size: 0.36, val_size: 0.24, test_sie: 0.4 for splitting test data.
test samples: 177 generated for test data.
LightDataModule.test_dataloader(). Test set size: 177
   Feature Index Feature  DeepLiftAttribution
0              1     sex          1199.707031
1              9  s6_glu          1109.091919
2              3      bp          1041.445312
3              2     bmi           994.195007
4              0     age           916.567139
5              6  s3_hdl           910.443237
6              8  s5_ltg           908.410095
7              5  s2_ldl           852.544678
8              7  s4_tch           808.548218
9              4   s1_tc           782.347595

51.6.3 Feature Ablation

df_fl = get_attributions(S, fun_control, attr_method="FeatureAblation",n_rel=10)
┏━━━┳━━━━━━━━┳━━━━━━━━━━━━┳━━━━━━━━┳━━━━━━━┳━━━━━━━┳━━━━━━━━━━┳━━━━━━━━━━━┓
┃    Name    Type        Params  Mode   FLOPs  In sizes  Out sizes ┃
┡━━━╇━━━━━━━━╇━━━━━━━━━━━━╇━━━━━━━━╇━━━━━━━╇━━━━━━━╇━━━━━━━━━━╇━━━━━━━━━━━┩
│ 0 │ layers │ Sequential │  202 K │ train │ 6.5 M │ [16, 10]    [16, 1] │
└───┴────────┴────────────┴────────┴───────┴───────┴──────────┴───────────┘
Trainable params: 202 K                                                                                            
Non-trainable params: 0                                                                                            
Total params: 202 K                                                                                                
Total estimated model params size (MB): 0                                                                          
Modules in train mode: 17                                                                                          
Modules in eval mode: 0                                                                                            
Total FLOPs: 6.5 M                                                                                                 
train_model result: {'val_loss': 7053.767578125, 'hp_metric': 7053.767578125}
config: {'l1': 16, 'epochs': 4096, 'batch_size': 16, 'act_fn': ReLU(), 'optimizer': 'Adam', 'dropout_prob': 0.0003746131641613862, 'lr_mult': 5.932218819111239, 'patience': 4, 'batch_norm': False, 'initialization': 'kaiming_uniform'}
Loading model with 16_4096_16_ReLU_Adam_0.0004_5.9322_4_False_kaiming_uniform_TRAIN from runs/saved_models/16_4096_16_ReLU_Adam_0.0004_5.9322_4_False_kaiming_uniform_TRAIN/last.ckpt
Model: NNLinearRegressor(
  (layers): Sequential(
    (0): Linear(in_features=10, out_features=320, bias=True)
    (1): ReLU()
    (2): Dropout(p=0.0003746131641613862, inplace=False)
    (3): Linear(in_features=320, out_features=160, bias=True)
    (4): ReLU()
    (5): Dropout(p=0.0003746131641613862, inplace=False)
    (6): Linear(in_features=160, out_features=320, bias=True)
    (7): ReLU()
    (8): Dropout(p=0.0003746131641613862, inplace=False)
    (9): Linear(in_features=320, out_features=160, bias=True)
    (10): ReLU()
    (11): Dropout(p=0.0003746131641613862, inplace=False)
    (12): Linear(in_features=160, out_features=160, bias=True)
    (13): ReLU()
    (14): Dropout(p=0.0003746131641613862, inplace=False)
    (15): Linear(in_features=160, out_features=80, bias=True)
    (16): ReLU()
    (17): Dropout(p=0.0003746131641613862, inplace=False)
    (18): Linear(in_features=80, out_features=80, bias=True)
    (19): ReLU()
    (20): Dropout(p=0.0003746131641613862, inplace=False)
    (21): Linear(in_features=80, out_features=1, bias=True)
  )
)
train_size: 0.36, val_size: 0.24, test_sie: 0.4 for splitting test data.
test samples: 177 generated for test data.
LightDataModule.test_dataloader(). Test set size: 177
print(df_fl)
plot_attributions(df_fl, attr_method="FeatureAblation")
   Feature Index Feature  FeatureAblationAttribution
0              1     sex                  652.003418
1              9  s6_glu                  594.168579
2              3      bp                  590.895203
3              2     bmi                  554.761719
4              8  s5_ltg                  511.712616
5              0     age                  480.831482
6              6  s3_hdl                  474.671814
7              5  s2_ldl                  459.251709
8              7  s4_tch                  430.860962
9              4   s1_tc                  415.566467

51.7 Conductance

from spotpython.plot.xai import plot_conductance_last_layer, get_weights_conductance_last_layer
weights_last, layer_conductance_last = get_weights_conductance_last_layer(S, fun_control)
plot_conductance_last_layer(weights_last, layer_conductance_last, figsize=(6, 6))
┏━━━┳━━━━━━━━┳━━━━━━━━━━━━┳━━━━━━━━┳━━━━━━━┳━━━━━━━┳━━━━━━━━━━┳━━━━━━━━━━━┓
┃    Name    Type        Params  Mode   FLOPs  In sizes  Out sizes ┃
┡━━━╇━━━━━━━━╇━━━━━━━━━━━━╇━━━━━━━━╇━━━━━━━╇━━━━━━━╇━━━━━━━━━━╇━━━━━━━━━━━┩
│ 0 │ layers │ Sequential │  202 K │ train │ 6.5 M │ [16, 10]    [16, 1] │
└───┴────────┴────────────┴────────┴───────┴───────┴──────────┴───────────┘
Trainable params: 202 K                                                                                            
Non-trainable params: 0                                                                                            
Total params: 202 K                                                                                                
Total estimated model params size (MB): 0                                                                          
Modules in train mode: 17                                                                                          
Modules in eval mode: 0                                                                                            
Total FLOPs: 6.5 M                                                                                                 
train_model result: {'val_loss': 3613.687744140625, 'hp_metric': 3613.687744140625}
config: {'l1': 16, 'epochs': 4096, 'batch_size': 16, 'act_fn': ReLU(), 'optimizer': 'Adam', 'dropout_prob': 0.0003746131641613862, 'lr_mult': 5.932218819111239, 'patience': 4, 'batch_norm': False, 'initialization': 'kaiming_uniform'}
Loading model with 16_4096_16_ReLU_Adam_0.0004_5.9322_4_False_kaiming_uniform_TRAIN from runs/saved_models/16_4096_16_ReLU_Adam_0.0004_5.9322_4_False_kaiming_uniform_TRAIN/last.ckpt
Model: NNLinearRegressor(
  (layers): Sequential(
    (0): Linear(in_features=10, out_features=320, bias=True)
    (1): ReLU()
    (2): Dropout(p=0.0003746131641613862, inplace=False)
    (3): Linear(in_features=320, out_features=160, bias=True)
    (4): ReLU()
    (5): Dropout(p=0.0003746131641613862, inplace=False)
    (6): Linear(in_features=160, out_features=320, bias=True)
    (7): ReLU()
    (8): Dropout(p=0.0003746131641613862, inplace=False)
    (9): Linear(in_features=320, out_features=160, bias=True)
    (10): ReLU()
    (11): Dropout(p=0.0003746131641613862, inplace=False)
    (12): Linear(in_features=160, out_features=160, bias=True)
    (13): ReLU()
    (14): Dropout(p=0.0003746131641613862, inplace=False)
    (15): Linear(in_features=160, out_features=80, bias=True)
    (16): ReLU()
    (17): Dropout(p=0.0003746131641613862, inplace=False)
    (18): Linear(in_features=80, out_features=80, bias=True)
    (19): ReLU()
    (20): Dropout(p=0.0003746131641613862, inplace=False)
    (21): Linear(in_features=80, out_features=1, bias=True)
  )
)
┏━━━┳━━━━━━━━┳━━━━━━━━━━━━┳━━━━━━━━┳━━━━━━━┳━━━━━━━┳━━━━━━━━━━┳━━━━━━━━━━━┓
┃    Name    Type        Params  Mode   FLOPs  In sizes  Out sizes ┃
┡━━━╇━━━━━━━━╇━━━━━━━━━━━━╇━━━━━━━━╇━━━━━━━╇━━━━━━━╇━━━━━━━━━━╇━━━━━━━━━━━┩
│ 0 │ layers │ Sequential │  202 K │ train │ 6.5 M │ [16, 10]    [16, 1] │
└───┴────────┴────────────┴────────┴───────┴───────┴──────────┴───────────┘
Trainable params: 202 K                                                                                            
Non-trainable params: 0                                                                                            
Total params: 202 K                                                                                                
Total estimated model params size (MB): 0                                                                          
Modules in train mode: 17                                                                                          
Modules in eval mode: 0                                                                                            
Total FLOPs: 6.5 M                                                                                                 
train_model result: {'val_loss': 4638.98779296875, 'hp_metric': 4638.98779296875}
config: {'l1': 16, 'epochs': 4096, 'batch_size': 16, 'act_fn': ReLU(), 'optimizer': 'Adam', 'dropout_prob': 0.0003746131641613862, 'lr_mult': 5.932218819111239, 'patience': 4, 'batch_norm': False, 'initialization': 'kaiming_uniform'}
Loading model with 16_4096_16_ReLU_Adam_0.0004_5.9322_4_False_kaiming_uniform_TRAIN from runs/saved_models/16_4096_16_ReLU_Adam_0.0004_5.9322_4_False_kaiming_uniform_TRAIN/last.ckpt
Model: NNLinearRegressor(
  (layers): Sequential(
    (0): Linear(in_features=10, out_features=320, bias=True)
    (1): ReLU()
    (2): Dropout(p=0.0003746131641613862, inplace=False)
    (3): Linear(in_features=320, out_features=160, bias=True)
    (4): ReLU()
    (5): Dropout(p=0.0003746131641613862, inplace=False)
    (6): Linear(in_features=160, out_features=320, bias=True)
    (7): ReLU()
    (8): Dropout(p=0.0003746131641613862, inplace=False)
    (9): Linear(in_features=320, out_features=160, bias=True)
    (10): ReLU()
    (11): Dropout(p=0.0003746131641613862, inplace=False)
    (12): Linear(in_features=160, out_features=160, bias=True)
    (13): ReLU()
    (14): Dropout(p=0.0003746131641613862, inplace=False)
    (15): Linear(in_features=160, out_features=80, bias=True)
    (16): ReLU()
    (17): Dropout(p=0.0003746131641613862, inplace=False)
    (18): Linear(in_features=80, out_features=80, bias=True)
    (19): ReLU()
    (20): Dropout(p=0.0003746131641613862, inplace=False)
    (21): Linear(in_features=80, out_features=1, bias=True)
  )
)
Conductance analysis for layer:  Linear(in_features=80, out_features=1, bias=True)