import sys
0, './userModel')
sys.path.insert(import my_resnet
import my_hyper_dict
40 Hyperparameter Tuning with spotpython
and PyTorch
Lightning for the Diabetes Data Set Using a User Specified ResNet Model
After importing the necessary libraries, the fun_control
dictionary is set up via the fun_control_init
function. The fun_control
dictionary contains
PREFIX
: a unique identifier for the experimentfun_evals
: the number of function evaluationsmax_time
: the maximum run time in minutesdata_set
: the data set. Here we use theDiabetes
data set that is provided byspotpython
.core_model_name
: the class name of the neural network model. This neural network model is provided byspotpython
.hyperdict
: the hyperparameter dictionary. This dictionary is used to define the hyperparameters of the neural network model. It is also provided byspotpython
._L_in
: the number of input features. Since theDiabetes
data set has 10 features,_L_in
is set to 10._L_out
: the number of output features. Since we want to predict a single value,_L_out
is set to 1.
The HyperLight
class is used to define the objective function fun
. It connects the PyTorch
and the spotpython
methods and is provided by spotpython
.
To access the user specified ResNet model, the path to the user model must be added to the Python path:
In the following code, we do not specify the ResNet model in the fun_control
dictionary. It will be added in a second step as the user specified model.
from spotpython.data.diabetes import Diabetes
from spotpython.hyperdict.light_hyper_dict import LightHyperDict
from spotpython.fun.hyperlight import HyperLight
from spotpython.utils.init import (fun_control_init, surrogate_control_init, design_control_init)
from spotpython.utils.eda import gen_design_table
from spotpython.spot import spot
from spotpython.utils.file import get_experiment_filename
="606-user-resnet"
PREFIX
= Diabetes()
data_set
= fun_control_init(
fun_control =PREFIX,
PREFIX=inf,
fun_evals=1,
max_time= data_set,
data_set =10,
_L_in=1)
_L_out
= HyperLight().fun fun
In a second step, we can add the user specified ResNet model to the fun_control
dictionary:
from spotpython.hyperparameters.values import add_core_model_to_fun_control
=fun_control,
add_core_model_to_fun_control(fun_control=my_resnet.MyResNet,
core_model=my_hyper_dict.MyHyperDict) hyper_dict
The method set_hyperparameter
allows the user to modify default hyperparameter settings. Here we modify some hyperparameters to keep the model small and to decrease the tuning time.
from spotpython.hyperparameters.values import set_hyperparameter
"optimizer", [ "Adadelta", "Adam", "Adamax"])
set_hyperparameter(fun_control, "l1", [3,4])
set_hyperparameter(fun_control, "epochs", [3,7])
set_hyperparameter(fun_control, "batch_size", [4,11])
set_hyperparameter(fun_control, "dropout_prob", [0.0, 0.025])
set_hyperparameter(fun_control, "patience", [2,3])
set_hyperparameter(fun_control, "lr_mult", [0.1, 20.0])
set_hyperparameter(fun_control,
= design_control_init(init_size=10)
design_control
print(gen_design_table(fun_control))
| name | type | default | lower | upper | transform |
|----------------|--------|-----------|---------|---------|-----------------------|
| l1 | int | 3 | 3 | 4 | transform_power_2_int |
| epochs | int | 4 | 3 | 7 | transform_power_2_int |
| batch_size | int | 4 | 4 | 11 | transform_power_2_int |
| act_fn | factor | ReLU | 0 | 5 | None |
| optimizer | factor | SGD | 0 | 2 | None |
| dropout_prob | float | 0.01 | 0 | 0.025 | None |
| lr_mult | float | 1.0 | 0.1 | 20 | None |
| patience | int | 2 | 2 | 3 | transform_power_2_int |
| initialization | factor | Default | 0 | 4 | None |
Finally, a Spot
object is created. Calling the method run()
starts the hyperparameter tuning process.
= spot.Spot(fun=fun,fun_control=fun_control, design_control=design_control)
spot_tuner = spot_tuner.run() res
In fun(): config:
{'act_fn': Tanh(),
'batch_size': 128,
'dropout_prob': np.float64(0.0004697637333605659),
'epochs': 64,
'initialization': 'kaiming_normal',
'l1': 8,
'lr_mult': np.float64(12.192543453926158),
'optimizer': 'Adamax',
'patience': 4}
Milestones: [16, 32, 48]
train_model result: {'val_loss': 23404.1953125, 'hp_metric': 23404.1953125}
In fun(): config:
{'act_fn': LeakyReLU(),
'batch_size': 128,
'dropout_prob': np.float64(0.010646332369413225),
'epochs': 8,
'initialization': 'kaiming_normal',
'l1': 16,
'lr_mult': np.float64(4.816418992866534),
'optimizer': 'Adam',
'patience': 8}
Milestones: [2, 4, 6]
train_model result: {'val_loss': 22284.63671875, 'hp_metric': 22284.63671875}
In fun(): config:
{'act_fn': ELU(),
'batch_size': 512,
'dropout_prob': np.float64(0.018543188398461703),
'epochs': 64,
'initialization': 'xavier_uniform',
'l1': 16,
'lr_mult': np.float64(1.7231674050173253),
'optimizer': 'Adam',
'patience': 4}
Milestones: [16, 32, 48]
train_model result: {'val_loss': 23765.34375, 'hp_metric': 23765.34375}
In fun(): config:
{'act_fn': Sigmoid(),
'batch_size': 16,
'dropout_prob': np.float64(0.01445464281687503),
'epochs': 8,
'initialization': 'Default',
'l1': 16,
'lr_mult': np.float64(18.276406412830898),
'optimizer': 'Adam',
'patience': 4}
Milestones: [2, 4, 6]
train_model result: {'val_loss': 23489.228515625, 'hp_metric': 23489.228515625}
In fun(): config:
{'act_fn': ReLU(),
'batch_size': 1024,
'dropout_prob': np.float64(0.006041903974622355),
'epochs': 128,
'initialization': 'xavier_uniform',
'l1': 16,
'lr_mult': np.float64(11.56656626473706),
'optimizer': 'Adam',
'patience': 8}
Milestones: [32, 64, 96]
train_model result: {'val_loss': 22797.18359375, 'hp_metric': 22797.18359375}
In fun(): config:
{'act_fn': ReLU(),
'batch_size': 2048,
'dropout_prob': np.float64(0.017180774192768807),
'epochs': 128,
'initialization': 'kaiming_uniform',
'l1': 8,
'lr_mult': np.float64(17.497450615230047),
'optimizer': 'Adadelta',
'patience': 8}
Milestones: [32, 64, 96]
train_model result: {'val_loss': 6766.41552734375, 'hp_metric': 6766.41552734375}
In fun(): config:
{'act_fn': Tanh(),
'batch_size': 256,
'dropout_prob': np.float64(0.020472444721704793),
'epochs': 16,
'initialization': 'xavier_normal',
'l1': 8,
'lr_mult': np.float64(9.056021366896996),
'optimizer': 'Adam',
'patience': 4}
Milestones: [4, 8, 12]
train_model result: {'val_loss': 24115.458984375, 'hp_metric': 24115.458984375}
In fun(): config:
{'act_fn': Swish(),
'batch_size': 512,
'dropout_prob': np.float64(0.022634716447473148),
'epochs': 16,
'initialization': 'kaiming_uniform',
'l1': 8,
'lr_mult': np.float64(3.2137011004706815),
'optimizer': 'Adadelta',
'patience': 4}
Milestones: [4, 8, 12]
train_model result: {'val_loss': 23931.376953125, 'hp_metric': 23931.376953125}
In fun(): config:
{'act_fn': LeakyReLU(),
'batch_size': 32,
'dropout_prob': np.float64(0.0031403022317008592),
'epochs': 32,
'initialization': 'xavier_uniform',
'l1': 8,
'lr_mult': np.float64(15.164637662335846),
'optimizer': 'Adam',
'patience': 8}
Milestones: [8, 16, 24]
train_model result: {'val_loss': 21796.736328125, 'hp_metric': 21796.736328125}
In fun(): config:
{'act_fn': ELU(),
'batch_size': 64,
'dropout_prob': np.float64(0.007623810344374791),
'epochs': 32,
'initialization': 'kaiming_normal',
'l1': 16,
'lr_mult': np.float64(7.425439588318577),
'optimizer': 'Adamax',
'patience': 8}
Milestones: [8, 16, 24]
train_model result: {'val_loss': 23608.984375, 'hp_metric': 23608.984375}
In fun(): config:
{'act_fn': ReLU(),
'batch_size': 2048,
'dropout_prob': np.float64(0.01552995414446641),
'epochs': 128,
'initialization': 'kaiming_uniform',
'l1': 8,
'lr_mult': np.float64(20.0),
'optimizer': 'Adadelta',
'patience': 8}
Milestones: [32, 64, 96]
train_model result: {'val_loss': 4165.15234375, 'hp_metric': 4165.15234375}
spotpython tuning: 4165.15234375 [#---------] 5.50%
In fun(): config:
{'act_fn': ELU(),
'batch_size': 2048,
'dropout_prob': np.float64(0.025),
'epochs': 128,
'initialization': 'kaiming_uniform',
'l1': 8,
'lr_mult': np.float64(20.0),
'optimizer': 'Adadelta',
'patience': 8}
Milestones: [32, 64, 96]
train_model result: {'val_loss': 21898.88671875, 'hp_metric': 21898.88671875}
spotpython tuning: 4165.15234375 [##--------] 24.48%
In fun(): config:
{'act_fn': ReLU(),
'batch_size': 2048,
'dropout_prob': np.float64(0.013321149691097397),
'epochs': 128,
'initialization': 'kaiming_uniform',
'l1': 8,
'lr_mult': np.float64(20.0),
'optimizer': 'Adadelta',
'patience': 8}
Milestones: [32, 64, 96]
train_model result: {'val_loss': 23360.689453125, 'hp_metric': 23360.689453125}
spotpython tuning: 4165.15234375 [####------] 44.37%
In fun(): config:
{'act_fn': ReLU(),
'batch_size': 16,
'dropout_prob': np.float64(0.025),
'epochs': 128,
'initialization': 'kaiming_normal',
'l1': 8,
'lr_mult': np.float64(20.0),
'optimizer': 'Adamax',
'patience': 8}
Milestones: [32, 64, 96]
train_model result: {'val_loss': 20871.68359375, 'hp_metric': 20871.68359375}
spotpython tuning: 4165.15234375 [##########] 100.00% Done...
40.1 Looking at the Results
40.1.1 Tuning Progress
After the hyperparameter tuning run is finished, the progress of the hyperparameter tuning can be visualized with spotpython
’s method plot_progress
. The black points represent the performace values (score or metric) of hyperparameter configurations from the initial design, whereas the red points represents the hyperparameter configurations found by the surrogate model based optimization.
spot_tuner.plot_progress()
40.1.2 Tuned Hyperparameters and Their Importance
Results can be printed in tabular form.
from spotpython.utils.eda import gen_design_table
print(gen_design_table(fun_control=fun_control, spot=spot_tuner))
| name | type | default | lower | upper | tuned | transform | importance | stars |
|----------------|--------|-----------|---------|---------|---------------------|-----------------------|--------------|---------|
| l1 | int | 3 | 3.0 | 4.0 | 3.0 | transform_power_2_int | 0.01 | |
| epochs | int | 4 | 3.0 | 7.0 | 7.0 | transform_power_2_int | 0.00 | |
| batch_size | int | 4 | 4.0 | 11.0 | 11.0 | transform_power_2_int | 0.03 | |
| act_fn | factor | ReLU | 0.0 | 5.0 | ReLU | None | 0.08 | |
| optimizer | factor | SGD | 0.0 | 2.0 | Adadelta | None | 0.00 | |
| dropout_prob | float | 0.01 | 0.0 | 0.025 | 0.01552995414446641 | None | 100.00 | *** |
| lr_mult | float | 1.0 | 0.1 | 20.0 | 20.0 | None | 0.00 | |
| patience | int | 2 | 2.0 | 3.0 | 3.0 | transform_power_2_int | 0.00 | |
| initialization | factor | Default | 0.0 | 4.0 | kaiming_uniform | None | 0.00 | |
A histogram can be used to visualize the most important hyperparameters.
=1.0) spot_tuner.plot_importance(threshold
=3) spot_tuner.plot_important_hyperparameter_contour(max_imp
l1: 0.01062745364702381
epochs: 0.001
batch_size: 0.02978691665200304
act_fn: 0.07734913702242881
optimizer: 0.001
dropout_prob: 100.0
lr_mult: 0.001
patience: 0.001
initialization: 0.001
40.1.3 Get the Tuned Architecture
import pprint
from spotpython.hyperparameters.values import get_tuned_architecture
= get_tuned_architecture(spot_tuner, fun_control)
config pprint.pprint(config)
{'act_fn': ReLU(),
'batch_size': 2048,
'dropout_prob': np.float64(0.01552995414446641),
'epochs': 128,
'initialization': 'kaiming_uniform',
'l1': 8,
'lr_mult': np.float64(20.0),
'optimizer': 'Adadelta',
'patience': 8}
40.2 Details of the User-Specified ResNet Model
The specification of a user model requires three files:
my_resnet.py
: the Python file containing the user specified ResNet modelmy_hyperdict.py
: the Python file for loading the hyperparameter dictionarymy_hyperdict.json
for the user specified ResNet modelmy_hyperdict.json
: the JSON file containing the hyperparameter dictionary for the user specified ResNet model
40.2.1 my_resnet.py
import lightning as L
import torch
from torch import nn
from spotpython.hyperparameters.optimizer import optimizer_handler
import torchmetrics.functional.regression
import torch.optim as optim
class ResidualBlock(nn.Module):
def __init__(self, input_dim, output_dim, act_fn, dropout_prob):
super(ResidualBlock, self).__init__()
self.fc1 = nn.Linear(input_dim, output_dim)
self.bn1 = nn.BatchNorm1d(output_dim)
self.ln1 = nn.LayerNorm(output_dim)
self.fc2 = nn.Linear(output_dim, output_dim)
self.bn2 = nn.BatchNorm1d(output_dim)
self.ln2 = nn.LayerNorm(output_dim)
self.act_fn = act_fn
self.dropout = nn.Dropout(dropout_prob)
self.shortcut = nn.Sequential()
if input_dim != output_dim:
self.shortcut = nn.Sequential(
nn.Linear(input_dim, output_dim),
nn.BatchNorm1d(output_dim)
)
def forward(self, x):
= self.shortcut(x)
identity
= self.fc1(x)
out = self.bn1(out)
out = self.ln1(out)
out = self.act_fn(out)
out = self.dropout(out)
out = self.fc2(out)
out = self.bn2(out)
out = self.ln2(out)
out += identity # Residual connection
out = self.act_fn(out)
out return out
class MyResNet(L.LightningModule):
def __init__(
self,
int,
l1: int,
epochs: int,
batch_size: str,
initialization:
act_fn: nn.Module,str,
optimizer: float,
dropout_prob: float,
lr_mult: int,
patience: int,
_L_in: int,
_L_out: str,
_torchmetric:
):super().__init__()
self._L_in = _L_in
self._L_out = _L_out
if _torchmetric is None:
= "mean_squared_error"
_torchmetric self._torchmetric = _torchmetric
self.metric = getattr(torchmetrics.functional.regression, _torchmetric)
self.save_hyperparameters(ignore=["_L_in", "_L_out", "_torchmetric"])
self.example_input_array = torch.zeros((batch_size, self._L_in))
if self.hparams.l1 < 4:
raise ValueError("l1 must be at least 4")
# Get hidden sizes
= self._get_hidden_sizes()
hidden_sizes = [self._L_in] + hidden_sizes
layer_sizes
# Construct the layers with Residual Blocks and Linear Layer at the end
= []
layers for i in range(len(layer_sizes) - 1):
layers.append(
ResidualBlock(
layer_sizes[i], + 1],
layer_sizes[i self.hparams.act_fn,
self.hparams.dropout_prob
)
)-1], self._L_out))
layers.append(nn.Linear(layer_sizes[
self.layers = nn.Sequential(*layers)
# Initialization (Xavier, Kaiming, or Default)
self.apply(self._init_weights)
def _init_weights(self, module):
if isinstance(module, nn.Linear):
if self.hparams.initialization == "xavier_uniform":
nn.init.xavier_uniform_(module.weight)elif self.hparams.initialization == "xavier_normal":
nn.init.xavier_normal_(module.weight)elif self.hparams.initialization == "kaiming_uniform":
nn.init.kaiming_uniform_(module.weight)elif self.hparams.initialization == "kaiming_normal":
nn.init.kaiming_normal_(module.weight)else: # "Default"
nn.init.uniform_(module.weight)if module.bias is not None:
nn.init.zeros_(module.bias)
def _generate_div2_list(self, n, n_min) -> list:
= []
result = n
current = 1
repeats = 4
max_repeats while current >= n_min:
* min(repeats, max_repeats))
result.extend([current] = current // 2
current = repeats + 1
repeats return result
def _get_hidden_sizes(self):
= max(2, int(self._L_in / 4)) # Ensure minimum reasonable size
n_low = max(self.hparams.l1, 2 * n_low)
n_high = self._generate_div2_list(n_high, n_low)
hidden_sizes return hidden_sizes
def forward(self, x: torch.Tensor) -> torch.Tensor:
= self.layers(x)
x return x
def _calculate_loss(self, batch):
= batch
x, y = y.view(len(y), 1)
y = self(x)
y_hat = self.metric(y_hat, y)
loss return loss
def training_step(self, batch: tuple) -> torch.Tensor:
= self._calculate_loss(batch)
val_loss return val_loss
def validation_step(self, batch: tuple, batch_idx: int, prog_bar: bool = False) -> torch.Tensor:
= self._calculate_loss(batch)
val_loss self.log("val_loss", val_loss, prog_bar=prog_bar)
self.log("hp_metric", val_loss, prog_bar=prog_bar)
return val_loss
def test_step(self, batch: tuple, batch_idx: int, prog_bar: bool = False) -> torch.Tensor:
= self._calculate_loss(batch)
val_loss self.log("val_loss", val_loss, prog_bar=prog_bar)
self.log("hp_metric", val_loss, prog_bar=prog_bar)
return val_loss
def predict_step(self, batch: tuple, batch_idx: int, prog_bar: bool = False) -> torch.Tensor:
= batch
x, y = self(x)
yhat = y.view(len(y), 1)
y = yhat.view(len(yhat), 1)
yhat return (x, y, yhat)
def configure_optimizers(self):
= optimizer_handler(
optimizer =self.hparams.optimizer,
optimizer_name=self.parameters(),
params=self.hparams.lr_mult
lr_mult
)
# Dynamic creation of milestones based on the number of epochs.
= 3 # Number of milestones to divide the epochs
num_milestones = [int(self.hparams.epochs / (num_milestones + 1) * (i + 1)) for i in range(num_milestones)]
milestones
# Print milestones for debug purposes
print(f"Milestones: {milestones}")
# Create MultiStepLR scheduler with dynamic milestones and learning rate multiplier.
= optim.lr_scheduler.MultiStepLR(
scheduler
optimizer, =milestones,
milestones=0.1 # Decay factor
gamma
)
# Learning rate scheduler configuration
= {
lr_scheduler_config "scheduler": scheduler,
"interval": "epoch", # Adjust learning rate per epoch
"frequency": 1, # Apply the scheduler at every epoch
}
return {"optimizer": optimizer, "lr_scheduler": lr_scheduler_config}
40.2.2 my_hyperdict.py
import json
from spotpython.data import base
import pathlib
class MyHyperDict(base.FileConfig):
"""User specified hyperparameter dictionary.
This class extends the FileConfig class to provide a dictionary for storing hyperparameters.
Attributes:
filename (str):
The name of the file where the hyperparameters are stored.
"""
def __init__(
self,
str = "my_hyper_dict.json",
filename: None = None,
directory: -> None:
) super().__init__(filename=filename, directory=directory)
self.filename = filename
self.directory = directory
self.hyper_dict = self.load()
@property
def path(self):
if self.directory:
return pathlib.Path(self.directory).joinpath(self.filename)
return pathlib.Path(__file__).parent.joinpath(self.filename)
def load(self) -> dict:
"""Load the hyperparameters from the file.
Returns:
dict: A dictionary containing the hyperparameters.
Examples:
# Assume the user specified file `my_hyper_dict.json` is in the `./hyperdict/` directory.
>>> user_lhd = MyHyperDict(filename='my_hyper_dict.json', directory='./hyperdict/')
"""
with open(self.path, "r") as f:
= json.load(f)
d return d
40.2.3 my_hyperdict.json
"MyResNet": {
"l1": {
"type": "int",
"default": 3,
"transform": "transform_power_2_int",
"lower": 3,
"upper": 10
},"epochs": {
"type": "int",
"default": 4,
"transform": "transform_power_2_int",
"lower": 4,
"upper": 9
},"batch_size": {
"type": "int",
"default": 4,
"transform": "transform_power_2_int",
"lower": 1,
"upper": 6
},"act_fn": {
"levels": [
"Sigmoid",
"Tanh",
"ReLU",
"LeakyReLU",
"ELU",
"Swish"
],"type": "factor",
"default": "ReLU",
"transform": "None",
"class_name": "spotpython.torch.activation",
"core_model_parameter_type": "instance()",
"lower": 0,
"upper": 5
},"optimizer": {
"levels": [
"Adadelta",
"Adagrad",
"Adam",
"AdamW",
"SparseAdam",
"Adamax",
"ASGD",
"NAdam",
"RAdam",
"RMSprop",
"Rprop",
"SGD"
],"type": "factor",
"default": "SGD",
"transform": "None",
"class_name": "torch.optim",
"core_model_parameter_type": "str",
"lower": 0,
"upper": 11
},"dropout_prob": {
"type": "float",
"default": 0.01,
"transform": "None",
"lower": 0.0,
"upper": 0.25
},"lr_mult": {
"type": "float",
"default": 1.0,
"transform": "None",
"lower": 0.1,
"upper": 10.0
},"patience": {
"type": "int",
"default": 2,
"transform": "transform_power_2_int",
"lower": 2,
"upper": 6
},"initialization": {
"levels": [
"Default",
"kaiming_uniform",
"kaiming_normal",
"xavier_uniform",
"xavier_normal"
],"type": "factor",
"default": "Default",
"transform": "None",
"core_model_parameter_type": "str",
"lower": 0,
"upper": 4
} }
40.3 Summary
This section presented an introduction to the basic setup of hyperparameter tuning with spotpython
and PyTorch
Lightning using a ResNet model for the Diabetes data set.