28  Details of the Lightning Module Integration in spotpython

28.1 Introduction

Based on the Diabetes Data set and the NNLinearRegressor model, we will provide details on the integration of the Lightning module in spotpython.

  • Section 28.2: The Hyperlight class provides the fun method, which takes X and fun_control as arguments. It calls the train_model method.
  • Section 28.3: The train_model method trains the model and returns the loss.
  • Section 28.4: The Trainer class is used to train the model and validate it. It also uses the LightDataModule class to load the data.

28.2 1. spotpython.fun.hyperlight.HyperLight.fun()

The class Hyperlight provides the method fun, which takes X (np.ndarray) and fun_control (dict) as arguments. It calls the

from math import inf
import numpy as np
from spotpython.data.diabetes import Diabetes
from spotpython.hyperdict.light_hyper_dict import LightHyperDict
from spotpython.fun.hyperlight import HyperLight
from spotpython.utils.init import fun_control_init
from spotpython.utils.eda import print_exp_table
from spotpython.spot import Spot
from spotpython.hyperparameters.values import get_default_hyperparameters_as_array

PREFIX="000"

data_set = Diabetes()

fun_control = fun_control_init(
    PREFIX=PREFIX,
    save_experiment=True,
    fun_evals=inf,
    max_time=1,
    data_set = data_set,
    core_model_name="light.regression.NNLinearRegressor",
    hyperdict=LightHyperDict,
    _L_in=10,
    _L_out=1,
    TENSORBOARD_CLEAN=True,
    tensorboard_log=True,
    seed=42,)

print_exp_table(fun_control)

X = get_default_hyperparameters_as_array(fun_control)
# set epochs to 2^8:
# X[0, 1] = 8
# set patience to 2^10:
# X[0, 7] = 10

print(f"X: {X}")
# combine X and X to a np.array with shape (2, n_hyperparams)
# so that two values are returned
X = np.vstack((X, X, X))
print(f"X: {X}")
hyper_light = HyperLight(seed=125, log_level=50)
hyper_light.fun(X, fun_control)
  • Using the same seed:
hyper_light = HyperLight(seed=125, log_level=50)
hyper_light.fun(X, fun_control)
  • Using a different seed:
hyper_light = HyperLight(seed=123, log_level=50)
hyper_light.fun(X, fun_control)

28.3 2. spotpython.light.trainmodel.train_model()

from math import inf
import numpy as np
from spotpython.data.diabetes import Diabetes
from spotpython.hyperdict.light_hyper_dict import LightHyperDict
from spotpython.utils.init import fun_control_init
from spotpython.utils.eda import print_exp_table
from spotpython.hyperparameters.values import get_default_hyperparameters_as_array
from spotpython.hyperparameters.values import assign_values, generate_one_config_from_var_dict, get_var_name
from spotpython.light.trainmodel import train_model
import pprint

PREFIX="000"

data_set = Diabetes()

fun_control = fun_control_init(
    PREFIX=PREFIX,
    save_experiment=True,
    fun_evals=inf,
    max_time=1,
    data_set = data_set,
    core_model_name="light.regression.NNLinearRegressor",
    hyperdict=LightHyperDict,
    _L_in=10,
    _L_out=1,
    TENSORBOARD_CLEAN=True,
    tensorboard_log=True,
    seed=42,)

print_exp_table(fun_control)

X = get_default_hyperparameters_as_array(fun_control)
# set epochs to 2^8:
# X[0, 1] = 8
# set patience to 2^10:
# X[0, 7] = 10

print(f"X: {X}")
# combine X and X to a np.array with shape (2, n_hyperparams)
# so that two values are returned
X = np.vstack((X, X))
var_dict = assign_values(X, get_var_name(fun_control))
for config in generate_one_config_from_var_dict(var_dict, fun_control):
    pprint.pprint(config)
    y = train_model(config, fun_control)

28.4 3. Trainer: fit and validate

  • Generate the config dictionary:
from math import inf
import numpy as np
from spotpython.data.diabetes import Diabetes
from spotpython.hyperdict.light_hyper_dict import LightHyperDict
from spotpython.utils.init import fun_control_init
from spotpython.utils.eda import print_exp_table
from spotpython.hyperparameters.values import get_default_hyperparameters_as_array
from spotpython.hyperparameters.values import assign_values, generate_one_config_from_var_dict, get_var_name
from spotpython.light.trainmodel import train_model

PREFIX="000"

data_set = Diabetes()

fun_control = fun_control_init(
    PREFIX=PREFIX,
    save_experiment=True,
    fun_evals=inf,
    max_time=1,
    data_set = data_set,
    core_model_name="light.regression.NNLinearRegressor",
    hyperdict=LightHyperDict,
    _L_in=10,
    _L_out=1,
    TENSORBOARD_CLEAN=True,
    tensorboard_log=True,
    seed=42,)
print_exp_table(fun_control)
X = get_default_hyperparameters_as_array(fun_control)
# set epochs to 2^8:
X[0, 1] = 10
# set patience to 2^10:
X[0, 7] = 10
print(f"X: {X}")
var_dict = assign_values(X, get_var_name(fun_control))
config = list(generate_one_config_from_var_dict(var_dict, fun_control))[0]
config
_L_in = 10
_L_out = 1
_L_cond = 0
_torchmetric = "mean_squared_error"

28.4.1 Commented: Using the fun_control dictionary

# model = fun_control["core_model"](**config, _L_in=_L_in, _L_out=_L_out, _L_cond=_L_cond, _torchmetric=_torchmetric)
# model

28.4.2 Using the source code:

import lightning as L
import torch
from torch import nn
from spotpython.hyperparameters.optimizer import optimizer_handler
import torchmetrics.functional.regression
import torch.optim as optim
from spotpython.hyperparameters.architecture import get_hidden_sizes


class NNLinearRegressor(L.LightningModule):
    def __init__(
        self,
        l1: int,
        epochs: int,
        batch_size: int,
        initialization: str,
        act_fn: nn.Module,
        optimizer: str,
        dropout_prob: float,
        lr_mult: float,
        patience: int,
        batch_norm: bool,
        _L_in: int,
        _L_out: int,
        _torchmetric: str,
        *args,
        **kwargs,
    ):
        super().__init__()
        # Attribute 'act_fn' is an instance of `nn.Module` and is already saved during
        # checkpointing. It is recommended to ignore them
        # using `self.save_hyperparameters(ignore=['act_fn'])`
        # self.save_hyperparameters(ignore=["act_fn"])
        #
        self._L_in = _L_in
        self._L_out = _L_out
        if _torchmetric is None:
            _torchmetric = "mean_squared_error"
        self._torchmetric = _torchmetric
        self.metric = getattr(torchmetrics.functional.regression, _torchmetric)
        # _L_in and _L_out are not hyperparameters, but are needed to create the network
        # _torchmetric is not a hyperparameter, but is needed to calculate the loss
        self.save_hyperparameters(ignore=["_L_in", "_L_out", "_torchmetric"])
        # set dummy input array for Tensorboard Graphs
        # set log_graph=True in Trainer to see the graph (in traintest.py)
        self.example_input_array = torch.zeros((batch_size, self._L_in))
        if self.hparams.l1 < 4:
            raise ValueError("l1 must be at least 4")
        hidden_sizes = get_hidden_sizes(_L_in=self._L_in, l1=l1, n=10)

        if batch_norm:
            # Add batch normalization layers
            layers = []
            layer_sizes = [self._L_in] + hidden_sizes
            for i in range(len(layer_sizes) - 1):
                current_layer_size = layer_sizes[i]
                next_layer_size = layer_sizes[i + 1]
                layers += [
                    nn.Linear(current_layer_size, next_layer_size),
                    nn.BatchNorm1d(next_layer_size),
                    self.hparams.act_fn,
                    nn.Dropout(self.hparams.dropout_prob),
                ]
            layers += [nn.Linear(layer_sizes[-1], self._L_out)]
        else:
            layers = []
            layer_sizes = [self._L_in] + hidden_sizes
            for i in range(len(layer_sizes) - 1):
                current_layer_size = layer_sizes[i]
                next_layer_size = layer_sizes[i + 1]
                layers += [
                    nn.Linear(current_layer_size, next_layer_size),
                    self.hparams.act_fn,
                    nn.Dropout(self.hparams.dropout_prob),
                ]
            layers += [nn.Linear(layer_sizes[-1], self._L_out)]

        # Wrap the layers into a sequential container
        self.layers = nn.Sequential(*layers)

        # Initialization (Xavier, Kaiming, or Default)
        self.apply(self._init_weights)

    def _init_weights(self, module):
        if isinstance(module, nn.Linear):
            if self.hparams.initialization == "xavier_uniform":
                nn.init.xavier_uniform_(module.weight)
            elif self.hparams.initialization == "xavier_normal":
                nn.init.xavier_normal_(module.weight)
            elif self.hparams.initialization == "kaiming_uniform":
                nn.init.kaiming_uniform_(module.weight)
            elif self.hparams.initialization == "kaiming_normal":
                nn.init.kaiming_normal_(module.weight)
            else:  # "Default"
                nn.init.uniform_(module.weight)
            if module.bias is not None:
                nn.init.zeros_(module.bias)

    def forward(self, x: torch.Tensor) -> torch.Tensor:
        """
        Performs a forward pass through the model.

        Args:
            x (torch.Tensor): A tensor containing a batch of input data.

        Returns:
            torch.Tensor: A tensor containing the output of the model.

        """
        x = self.layers(x)
        return x

    def _calculate_loss(self, batch):
        """
        Calculate the loss for the given batch.

        Args:
            batch (tuple): A tuple containing a batch of input data and labels.
            mode (str, optional): The mode of the model. Defaults to "train".

        Returns:
            torch.Tensor: A tensor containing the loss for this batch.

        """
        x, y = batch
        y = y.view(len(y), 1)
        y_hat = self(x)
        loss = self.metric(y_hat, y)
        return loss

    def training_step(self, batch: tuple) -> torch.Tensor:
        """
        Performs a single training step.

        Args:
            batch (tuple): A tuple containing a batch of input data and labels.

        Returns:
            torch.Tensor: A tensor containing the loss for this batch.

        """
        loss = self._calculate_loss(batch)
        self.log("train_loss", loss, on_step=False, on_epoch=True, prog_bar=False)
        return loss

    def validation_step(self, batch: tuple, batch_idx: int, prog_bar: bool = False) -> torch.Tensor:
        """
        Performs a single validation step.

        Args:
            batch (tuple): A tuple containing a batch of input data and labels.
            batch_idx (int): The index of the current batch.
            prog_bar (bool, optional): Whether to display the progress bar. Defaults to False.

        Returns:
            torch.Tensor: A tensor containing the loss for this batch.

        """
        loss = self._calculate_loss(batch)
        self.log("val_loss", loss, on_step=False, on_epoch=True, prog_bar=prog_bar)
        self.log("hp_metric", loss, on_step=False, on_epoch=True, prog_bar=prog_bar)
        return loss

    def test_step(self, batch: tuple, batch_idx: int, prog_bar: bool = False) -> torch.Tensor:
        """
        Performs a single test step.

        Args:
            batch (tuple): A tuple containing a batch of input data and labels.
            batch_idx (int): The index of the current batch.
            prog_bar (bool, optional): Whether to display the progress bar. Defaults to False.

        Returns:
            torch.Tensor: A tensor containing the loss for this batch.
        """
        loss = self._calculate_loss(batch)
        self.log("val_loss", loss, on_step=False, on_epoch=True, prog_bar=prog_bar)
        self.log("hp_metric", loss, on_step=False, on_epoch=True, prog_bar=prog_bar)
        return loss

    def predict_step(self, batch: tuple, batch_idx: int, prog_bar: bool = False) -> torch.Tensor:
        """
        Performs a single prediction step.

        Args:
            batch (tuple): A tuple containing a batch of input data and labels.
            batch_idx (int): The index of the current batch.
            prog_bar (bool, optional): Whether to display the progress bar. Defaults to False.

        Returns:
            torch.Tensor: A tensor containing the prediction for this batch.
        """
        x, y = batch
        yhat = self(x)
        y = y.view(len(y), 1)
        yhat = yhat.view(len(yhat), 1)
        print(f"Predict step x: {x}")
        print(f"Predict step y: {y}")
        print(f"Predict step y_hat: {yhat}")
        # pred_loss = F.mse_loss(y_hat, y)
        # pred loss not registered
        # self.log("pred_loss", pred_loss, prog_bar=prog_bar)
        # self.log("hp_metric", pred_loss, prog_bar=prog_bar)
        # MisconfigurationException: You are trying to `self.log()`
        # but the loop's result collection is not registered yet.
        # This is most likely because you are trying to log in a `predict` hook, but it doesn't support logging.
        # If you want to manually log, please consider using `self.log_dict({'pred_loss': pred_loss})` instead.
        return (x, y, yhat)

    def configure_optimizers(self) -> torch.optim.Optimizer:
        """
        Configures the optimizer for the model.

        Notes:
            The default Lightning way is to define an optimizer as
            `optimizer = torch.optim.Adam(self.parameters(), lr=self.learning_rate)`.
            spotpython uses an optimizer handler to create the optimizer, which
            adapts the learning rate according to the lr_mult hyperparameter as
            well as other hyperparameters. See `spotpython.hyperparameters.optimizer.py` for details.

        Returns:
            torch.optim.Optimizer: The optimizer to use during training.

        """
        # optimizer = torch.optim.Adam(self.parameters(), lr=self.learning_rate)
        optimizer = optimizer_handler(optimizer_name=self.hparams.optimizer, params=self.parameters(), lr_mult=self.hparams.lr_mult)

        num_milestones = 3  # Number of milestones to divide the epochs
        milestones = [int(self.hparams.epochs / (num_milestones + 1) * (i + 1)) for i in range(num_milestones)]
        scheduler = optim.lr_scheduler.MultiStepLR(optimizer, milestones=milestones, gamma=0.1)  # Decay factor

        lr_scheduler_config = {
            "scheduler": scheduler,
            "interval": "epoch",
            "frequency": 1,
        }

        return {"optimizer": optimizer, "lr_scheduler": lr_scheduler_config}
model = NNLinearRegressor(**config, _L_in=_L_in, _L_out=_L_out, _L_cond=_L_cond, _torchmetric=_torchmetric)
model
from spotpython.data.lightdatamodule import LightDataModule
from spotpython.data.diabetes import Diabetes

data_set = Diabetes()
dm = LightDataModule(
    dataset=data_set,
    batch_size=config["batch_size"],
    test_size=fun_control["test_size"],
    test_seed=fun_control["test_seed"],
    scaler=None,
)
  • Using callbacks for early stopping:
from lightning.pytorch.callbacks.early_stopping import EarlyStopping
callbacks = [EarlyStopping(monitor="val_loss", patience=config["patience"], mode="min", strict=False, verbose=False)]
timestamp = True

from lightning.pytorch.callbacks import ModelCheckpoint
if not timestamp:
    # add ModelCheckpoint only if timestamp is False
    callbacks.append(ModelCheckpoint(dirpath=os.path.join(fun_control["CHECKPOINT_PATH"], config_id), save_last=True))  # Save the last checkpoint
from spotpython.utils.eda import generate_config_id
if timestamp:
    # config id is unique. Since the model is not loaded from a checkpoint,
    # the config id is generated here with a timestamp.
    config_id = generate_config_id(config, timestamp=True)
else:
    # config id is not time-dependent and therefore unique,
    # so that the model can be loaded from a checkpoint,
    # the config id is generated here without a timestamp.
    config_id = generate_config_id(config, timestamp=False) + "_TRAIN"
from pytorch_lightning.loggers import TensorBoardLogger
import lightning as L
import os
trainer = L.Trainer(
    # Where to save models
    default_root_dir=os.path.join(fun_control["CHECKPOINT_PATH"], config_id),
    max_epochs=model.hparams.epochs,
    accelerator=fun_control["accelerator"],
    devices=fun_control["devices"],
    strategy=fun_control["strategy"],
    num_nodes=fun_control["num_nodes"],
    precision=fun_control["precision"],
    logger=TensorBoardLogger(save_dir=fun_control["TENSORBOARD_PATH"], version=config_id, default_hp_metric=True, log_graph=fun_control["log_graph"], name=""),
    callbacks=callbacks,
    enable_progress_bar=False,
    num_sanity_val_steps=fun_control["num_sanity_val_steps"],
    log_every_n_steps=fun_control["log_every_n_steps"],
    gradient_clip_val=None,
    gradient_clip_algorithm="norm",
)
trainer.fit(model=model, datamodule=dm, ckpt_path=None)
trainer.validate(model=model, datamodule=dm, verbose=True, ckpt_path=None)

28.4.3 DataModule

from spotpython.data.lightdatamodule import LightDataModule
from spotpython.data.csvdataset import CSVDataset
import torch
dataset = CSVDataset(csv_file='data.csv', target_column='prognosis', feature_type=torch.long)
data_module = LightDataModule(dataset=dataset, batch_size=5, test_size=0.5)
data_module.setup()
print(f"Training set size: {len(data_module.data_train)}")
  • Generate the config dictionary:
from math import inf
import lightning as L
import numpy as np
import os
from spotpython.data.diabetes import Diabetes
from spotpython.hyperdict.light_hyper_dict import LightHyperDict
from spotpython.utils.init import fun_control_init
from spotpython.utils.eda import print_exp_table
from spotpython.hyperparameters.values import get_default_hyperparameters_as_array
from spotpython.hyperparameters.values import assign_values, generate_one_config_from_var_dict, get_var_name
from spotpython.light.trainmodel import train_model, generate_config_id_with_timestamp
from pytorch_lightning.loggers import TensorBoardLogger
from lightning.pytorch.callbacks.early_stopping import EarlyStopping
from spotpython.data.lightdatamodule import LightDataModule
PREFIX="000"
data_set = Diabetes()
fun_control = fun_control_init(
    PREFIX=PREFIX,
    save_experiment=True,
    fun_evals=inf,
    max_time=1,
    data_set = data_set,
    core_model_name="light.regression.NNLinearRegressor",
    hyperdict=LightHyperDict,
    _L_in=10,
    _L_out=1,
    TENSORBOARD_CLEAN=True,
    tensorboard_log=True,
    seed=42,)
print_exp_table(fun_control)
X = get_default_hyperparameters_as_array(fun_control)
# set epochs to 2^8:
X[0, 1] = 10
# set patience to 2^10:
X[0, 7] = 10
print(f"X: {X}")
var_dict = assign_values(X, get_var_name(fun_control))
config = list(generate_one_config_from_var_dict(var_dict, fun_control))[0]
_L_in = fun_control["_L_in"]
_L_out = fun_control["_L_out"]
_L_cond = fun_control["_L_cond"]
_torchmetric = fun_control["_torchmetric"]
model = fun_control["core_model"](**config, _L_in=_L_in, _L_out=_L_out, _L_cond=_L_cond, _torchmetric=_torchmetric)
dm = LightDataModule(
    dataset=fun_control["data_set"],
    batch_size=config["batch_size"],
    num_workers=fun_control["num_workers"],
    test_size=fun_control["test_size"],
    test_seed=fun_control["test_seed"],
    scaler=fun_control["scaler"],
)
config_id = generate_config_id_with_timestamp(config, timestamp=True)
callbacks = [EarlyStopping(monitor="val_loss", patience=config["patience"], mode="min", strict=False, verbose=False)]
trainer = L.Trainer(
    default_root_dir=os.path.join(fun_control["CHECKPOINT_PATH"], config_id),
    max_epochs=model.hparams.epochs,
    accelerator=fun_control["accelerator"],
    devices=fun_control["devices"],
    strategy=fun_control["strategy"],
    num_nodes=fun_control["num_nodes"],
    precision=fun_control["precision"],
    logger=TensorBoardLogger(save_dir=fun_control["TENSORBOARD_PATH"], version=config_id, default_hp_metric=True, log_graph=fun_control["log_graph"], name=""),
    callbacks=callbacks,
    enable_progress_bar=False,
    num_sanity_val_steps=fun_control["num_sanity_val_steps"],
    log_every_n_steps=fun_control["log_every_n_steps"],
    gradient_clip_val=None,
    gradient_clip_algorithm="norm",
)
trainer.fit(model=model, datamodule=dm, ckpt_path=None)
from math import inf
import lightning as L
import numpy as np
from spotpython.data.diabetes import Diabetes
from spotpython.hyperdict.light_hyper_dict import LightHyperDict
from spotpython.utils.init import fun_control_init
from spotpython.hyperparameters.values import assign_values, generate_one_config_from_var_dict, get_var_name
from spotpython.data.lightdatamodule import LightDataModule
from spotpython.utils.scaler import TorchStandardScaler
PREFIX="000"
data_set = Diabetes()
fun_control = fun_control_init(
    PREFIX=PREFIX,
    fun_evals=inf,
    max_time=1,
    data_set = data_set,
    core_model_name="light.regression.NNLinearRegressor",
    hyperdict=LightHyperDict,
    _L_in=10,
    _L_out=1)
X = np.array([[3.0e+00, 5.0, 4.0e+00, 2.0e+00, 1.1e+01, 1.0e-02, 1.0e+00, 1.0e+01, 0.0e+00,
  0.0e+00]])
var_dict = assign_values(X, get_var_name(fun_control))
config = list(generate_one_config_from_var_dict(var_dict, fun_control))[0]
_torchmetric = "mean_squared_error"
model = fun_control["core_model"](**config, _L_in=10, _L_out=1, _L_cond=None, _torchmetric=_torchmetric)
dm = LightDataModule(
    dataset=data_set,
    batch_size=16,
    test_size=0.6,
    scaler=TorchStandardScaler())
trainer = L.Trainer(
    max_epochs=32,
    enable_progress_bar=False,
)
trainer.fit(model=model, datamodule=dm, ckpt_path=None)
trainer.validate(model=model, datamodule=dm, ckpt_path=None)