29 Saving and Loading

This tutorial shows how to save and load objects in spotPython. It is split into the following parts: - Section 29.1 shows how to save and load objects in spotPython, if spotPython is used as an optimizer. - Section 29.2 shows how to save and load hyperparameter tuning experiments. - Section 29.3 shows how to save and load PyTorch Lightning models. - Section 29.4 shows how to convert a PyTorch Lightning model to a plain PyTorch model.

29.1 spotPython: Saving and Loading Optimization Experiments

In this section, we will show how results from spotPython can be saved and reloaded. Here, spotPython can be used as an optimizer.

29.1.1 spotPython as an Optimizer

If spotPython is used as an optimizer, no dictionary of hyperparameters has be specified. The fun_control dictionary is sufficient.

import os
import pprint
from spotPython.utils.file import load_experiment
from spotPython.utils.file import get_experiment_filename
import numpy as np
from math import inf
from spotPython.spot import spot
from spotPython.utils.init import (
    fun_control_init,
    design_control_init,
    surrogate_control_init,
    optimizer_control_init)
from spotPython.fun.objectivefunctions import analytical
fun = analytical().fun_branin
fun_control = fun_control_init(
            PREFIX="branin",
            SUMMARY_WRITER=False,
            lower = np.array([0, 0]),
            upper = np.array([10, 10]),
            fun_evals=8,
            fun_repeats=1,
            max_time=inf,
            noise=False,
            tolerance_x=0,
            ocba_delta=0,
            var_type=["num", "num"],
            infill_criterion="ei",
            n_points=1,
            seed=123,
            log_level=20,
            show_models=False,
            show_progress=True)
design_control = design_control_init(
            init_size=5,
            repeats=1)
surrogate_control = surrogate_control_init(
            model_fun_evals=10000,
            min_theta=-3,
            max_theta=3,
            n_theta=2,
            theta_init_zero=True,
            n_p=1,
            optim_p=False,
            var_type=["num", "num"],
            seed=124)
optimizer_control = optimizer_control_init(
            max_iter=1000,
            seed=125)
spot_tuner = spot.Spot(fun=fun,
            fun_control=fun_control,
            design_control=design_control,
            surrogate_control=surrogate_control,
            optimizer_control=optimizer_control)
spot_tuner.run()
PREFIX = fun_control["PREFIX"]
filename = get_experiment_filename(PREFIX)
spot_tuner.save_experiment(filename=filename)
print(f"filename: {filename}")

spotPython tuning: 4.7932399644479124 [########--] 75.00% 
spotPython tuning: 2.0379795645847087 [#########-] 87.50% 
spotPython tuning: 1.986328241945829 [##########] 100.00% Done...

{'CHECKPOINT_PATH': 'runs/saved_models/',
 'DATASET_PATH': 'data/',
 'PREFIX': 'branin',
 'RESULTS_PATH': 'results/',
 'TENSORBOARD_PATH': 'runs/',
 '_L_in': None,
 '_L_out': None,
 '_torchmetric': None,
 'accelerator': 'auto',
 'converters': None,
 'core_model': None,
 'core_model_name': None,
 'counter': 8,
 'data': None,
 'data_dir': './data',
 'data_module': None,
 'data_set': None,
 'data_set_name': None,
 'db_dict_name': None,
 'design': None,
 'device': None,
 'devices': 1,
 'enable_progress_bar': False,
 'eval': None,
 'fun_evals': 8,
 'fun_repeats': 1,
 'horizon': None,
 'infill_criterion': 'ei',
 'k_folds': 3,
 'log_graph': False,
 'log_level': 20,
 'loss_function': None,
 'lower': array([0, 0]),
 'max_surrogate_points': 30,
 'max_time': inf,
 'metric_params': {},
 'metric_river': None,
 'metric_sklearn': None,
 'metric_sklearn_name': None,
 'metric_torch': None,
 'model_dict': {},
 'n_points': 1,
 'n_samples': None,
 'n_total': None,
 'noise': False,
 'num_workers': 0,
 'ocba_delta': 0,
 'oml_grace_period': None,
 'optimizer': None,
 'path': None,
 'prep_model': None,
 'prep_model_name': None,
 'progress_file': None,
 'save_model': False,
 'scenario': None,
 'seed': 123,
 'show_batch_interval': 1000000,
 'show_models': False,
 'show_progress': True,
 'shuffle': None,
 'sigma': 0.0,
 'spot_tensorboard_path': None,
 'spot_writer': None,
 'target_column': None,
 'target_type': None,
 'task': None,
 'test': None,
 'test_seed': 1234,
 'test_size': 0.4,
 'tolerance_x': 0,
 'train': None,
 'upper': array([10, 10]),
 'var_name': None,
 'var_type': ['num', 'num'],
 'verbosity': 0,
 'weight_coeff': 0.0,
 'weights': 1.0,
 'weights_entry': None}
filename: spot_branin_experiment.pickle

(spot_tuner_1, fun_control_1, design_control_1,
    surrogate_control_1, optimizer_control_1) = load_experiment(filename)

The progress of the original experiment is shown in Figure 29.1 and the reloaded experiment in Figure 29.2.

spot_tuner.plot_progress(log_y=True)

Figure 29.1: Progress of the original experiment

spot_tuner_1.plot_progress(log_y=True)

Figure 29.2: Progress of the reloaded experiment

The results from the original experiment are shown in Table 29.1 and the reloaded experiment in Table 29.2.

spot_tuner.print_results()

min y: 1.986328241945829
x0: 10.0
x1: 3.2107728198306598

Table 29.1

[['x0', 10.0], ['x1', 3.2107728198306598]]

spot_tuner_1.print_results()

min y: 1.986328241945829
x0: 10.0
x1: 3.2107728198306598

Table 29.2

[['x0', 10.0], ['x1', 3.2107728198306598]]

29.1.1.1 Getting the Tuned Hyperparameters

The tuned hyperparameters can be obtained as a dictionary with the following code.

from spotPython.hyperparameters.values import get_tuned_hyperparameters
get_tuned_hyperparameters(spot_tuner=spot_tuner)

{'x0': 10.0, 'x1': 3.2107728198306598}

Summary: Saving and Loading Optimization Experiments

If spotPython is used as an optimizer (without an hyperparameter dictionary), experiments can be saved and reloaded with the save_experiment and load_experiment functions.
The tuned hyperparameters can be obtained with the get_tuned_hyperparameters function.

29.2 spotPython as a Hyperparameter Tuner

If spotPython is used as a hyperparameter tuner, in addition to the fun_control dictionary a core_model dictionary have to be specified. This will be explained in Section 29.2.2.

Furthermore, a data set has to be selected and added to the fun_control dictionary. Here, we will use the Diabetes data set.

29.2.1 The Diabetes Data Set

The hyperparameter tuning of a PyTorch Lightning network on the Diabetes data set is used as an example. The Diabetes data set is a PyTorch Dataset for regression, which originates from the scikit-learn package, see https://scikit-learn.org/stable/modules/generated/sklearn.datasets.load_diabetes.html#sklearn.datasets.load_diabetes.

Ten baseline variables, age, sex, body mass index, average blood pressure, and six blood serum measurements were obtained for each of n = 442 diabetes patients, as well as the response of interest, a quantitative measure of disease progression one year after baseline. The Diabetes data set is described in Table 29.3.

Table 29.3: The Diabetes data set

Description	Value
Samples total	442
Dimensionality	10
Features	real, -.2 < x < .2
Targets	integer 25 - 346

from spotPython.utils.device import getDevice
from math import inf
from spotPython.utils.init import fun_control_init
import numpy as np
from spotPython.hyperparameters.values import set_control_key_value
from spotPython.data.diabetes import Diabetes

MAX_TIME = 1
FUN_EVALS = 8
INIT_SIZE = 5
WORKERS = 0
PREFIX="037"
DEVICE = getDevice()
DEVICES = 1
TEST_SIZE = 0.4
TORCH_METRIC = "mean_squared_error"
dataset = Diabetes()

fun_control = fun_control_init(
    _L_in=10,
    _L_out=1,
    _torchmetric=TORCH_METRIC,
    PREFIX=PREFIX,
    TENSORBOARD_CLEAN=True,
    data_set=dataset,
    device=DEVICE,
    enable_progress_bar=False,
    fun_evals=FUN_EVALS,
    log_level=50,
    max_time=MAX_TIME,
    num_workers=WORKERS,
    show_progress=True,
    test_size=TEST_SIZE,
    tolerance_x=np.sqrt(np.spacing(1)),
    )

Moving TENSORBOARD_PATH: runs/ to TENSORBOARD_PATH_OLD: runs_OLD/runs_2024_04_22_02_15_20
Created spot_tensorboard_path: runs/spot_logs/037_maans14_2024-04-22_02-15-20 for SummaryWriter()

29.2.2 Adding a `core_model` to the `fun_control` Dictionary

spotPython includes the NetLightRegression class [SOURCE] for configurable neural networks. The class is imported here. It inherits from the class Lightning.LightningModule, which is the base class for all models in Lightning. Lightning.LightningModule is a subclass of torch.nn.Module and provides additional functionality for the training and testing of neural networks. The class Lightning.LightningModule is described in the Lightning documentation.

The hyperparameters of the model are specified in the core_model_hyper_dict dictionary [SOURCE].

The core_model dictionary contains the hyperparameters of the model to be tuned. These hyperparameters can be specified and modified with as shown in the following code.

from spotPython.light.regression.netlightregression import NetLightRegression
from spotPython.hyperdict.light_hyper_dict import LightHyperDict
from spotPython.hyperparameters.values import add_core_model_to_fun_control
add_core_model_to_fun_control(fun_control=fun_control,
                              core_model=NetLightRegression,
                              hyper_dict=LightHyperDict)
from spotPython.hyperparameters.values import set_control_hyperparameter_value

set_control_hyperparameter_value(fun_control, "epochs", [4, 5])
set_control_hyperparameter_value(fun_control, "batch_size", [4, 5])
set_control_hyperparameter_value(fun_control, "optimizer", [
                "Adam",
                "RAdam",
            ])
set_control_hyperparameter_value(fun_control, "dropout_prob", [0.01, 0.1])
set_control_hyperparameter_value(fun_control, "lr_mult", [0.05, 1.0])
set_control_hyperparameter_value(fun_control, "patience", [2, 3])
set_control_hyperparameter_value(fun_control, "act_fn",[
                "ReLU",
                "LeakyReLU"
            ] )

Setting hyperparameter epochs to value [4, 5].
Variable type is int.
Core type is None.
Calling modify_hyper_parameter_bounds().
Setting hyperparameter batch_size to value [4, 5].
Variable type is int.
Core type is None.
Calling modify_hyper_parameter_bounds().
Setting hyperparameter optimizer to value ['Adam', 'RAdam'].
Variable type is factor.
Core type is str.
Calling modify_hyper_parameter_levels().
Setting hyperparameter dropout_prob to value [0.01, 0.1].
Variable type is float.
Core type is None.
Calling modify_hyper_parameter_bounds().
Setting hyperparameter lr_mult to value [0.05, 1.0].
Variable type is float.
Core type is None.
Calling modify_hyper_parameter_bounds().
Setting hyperparameter patience to value [2, 3].
Variable type is int.
Core type is None.
Calling modify_hyper_parameter_bounds().
Setting hyperparameter act_fn to value ['ReLU', 'LeakyReLU'].
Variable type is factor.
Core type is instance().
Calling modify_hyper_parameter_levels().

29.2.3 `design_control`, `surrogate_control` Dictionaries and the Objective Function

After specifying the design_control and surrogate_control dictionaries, the objective function fun from the class HyperLight [SOURCE] is selected. It implements an interface from PyTorch’s training, validation, and testing methods to spotPython.

Then, the hyperparameter tuning can be started.

from spotPython.utils.init import design_control_init, surrogate_control_init
design_control = design_control_init(init_size=INIT_SIZE)

surrogate_control = surrogate_control_init(noise=True,
                                            n_theta=2)
from spotPython.fun.hyperlight import HyperLight
fun = HyperLight(log_level=50).fun
from spotPython.spot import spot
spot_tuner = spot.Spot(fun=fun,
                       fun_control=fun_control,
                       design_control=design_control,
                       surrogate_control=surrogate_control)
spot_tuner.run()

LightDataModule.setup(): stage: TrainerFn.FITTING
train_size: 0.36, val_size: 0.24 used for train & val data.
LightDataModule.val_dataloader(). Val. set size: 106
LightDataModule.train_dataloader(). data_train size: 160
LightDataModule.setup(): stage: TrainerFn.VALIDATING
LightDataModule.val_dataloader(). Val. set size: 106
train_model result: {'val_loss': 10240.3544921875, 'hp_metric': 10240.3544921875}
LightDataModule.setup(): stage: TrainerFn.FITTING
train_size: 0.36, val_size: 0.24 used for train & val data.
LightDataModule.val_dataloader(). Val. set size: 106
LightDataModule.train_dataloader(). data_train size: 160
LightDataModule.setup(): stage: TrainerFn.VALIDATING
LightDataModule.val_dataloader(). Val. set size: 106
train_model result: {'val_loss': nan, 'hp_metric': nan}
LightDataModule.setup(): stage: TrainerFn.FITTING
train_size: 0.36, val_size: 0.24 used for train & val data.
LightDataModule.val_dataloader(). Val. set size: 106
LightDataModule.train_dataloader(). data_train size: 160
LightDataModule.setup(): stage: TrainerFn.VALIDATING
LightDataModule.val_dataloader(). Val. set size: 106
train_model result: {'val_loss': nan, 'hp_metric': nan}
LightDataModule.setup(): stage: TrainerFn.FITTING
train_size: 0.36, val_size: 0.24 used for train & val data.
LightDataModule.val_dataloader(). Val. set size: 106
LightDataModule.train_dataloader(). data_train size: 160
LightDataModule.setup(): stage: TrainerFn.VALIDATING
LightDataModule.val_dataloader(). Val. set size: 106
train_model result: {'val_loss': 23857.75, 'hp_metric': 23857.75}
LightDataModule.setup(): stage: TrainerFn.FITTING
train_size: 0.36, val_size: 0.24 used for train & val data.
LightDataModule.val_dataloader(). Val. set size: 106
LightDataModule.train_dataloader(). data_train size: 160
LightDataModule.setup(): stage: TrainerFn.VALIDATING
LightDataModule.val_dataloader(). Val. set size: 106
train_model result: {'val_loss': nan, 'hp_metric': nan}
LightDataModule.setup(): stage: TrainerFn.FITTING
train_size: 0.36, val_size: 0.24 used for train & val data.
LightDataModule.val_dataloader(). Val. set size: 106
LightDataModule.train_dataloader(). data_train size: 160
LightDataModule.setup(): stage: TrainerFn.VALIDATING
LightDataModule.val_dataloader(). Val. set size: 106
train_model result: {'val_loss': 6874.3818359375, 'hp_metric': 6874.3818359375}
spotPython tuning: 6874.3818359375 [####------] 37.50% 
LightDataModule.setup(): stage: TrainerFn.FITTING
train_size: 0.36, val_size: 0.24 used for train & val data.
LightDataModule.val_dataloader(). Val. set size: 106
LightDataModule.train_dataloader(). data_train size: 160
LightDataModule.setup(): stage: TrainerFn.VALIDATING
LightDataModule.val_dataloader(). Val. set size: 106
train_model result: {'val_loss': 7526.0234375, 'hp_metric': 7526.0234375}
spotPython tuning: 6874.3818359375 [#####-----] 50.00% 
LightDataModule.setup(): stage: TrainerFn.FITTING
train_size: 0.36, val_size: 0.24 used for train & val data.
LightDataModule.val_dataloader(). Val. set size: 106
LightDataModule.train_dataloader(). data_train size: 160
LightDataModule.setup(): stage: TrainerFn.VALIDATING
LightDataModule.val_dataloader(). Val. set size: 106
train_model result: {'val_loss': 9060.6591796875, 'hp_metric': 9060.6591796875}
spotPython tuning: 6874.3818359375 [######----] 62.50% 
LightDataModule.setup(): stage: TrainerFn.FITTING
train_size: 0.36, val_size: 0.24 used for train & val data.
LightDataModule.val_dataloader(). Val. set size: 106
LightDataModule.train_dataloader(). data_train size: 160
LightDataModule.setup(): stage: TrainerFn.VALIDATING
LightDataModule.val_dataloader(). Val. set size: 106
train_model result: {'val_loss': 9169.982421875, 'hp_metric': 9169.982421875}
spotPython tuning: 6874.3818359375 [########--] 75.00% 
LightDataModule.setup(): stage: TrainerFn.FITTING
train_size: 0.36, val_size: 0.24 used for train & val data.
LightDataModule.val_dataloader(). Val. set size: 106
LightDataModule.train_dataloader(). data_train size: 160
LightDataModule.setup(): stage: TrainerFn.VALIDATING
LightDataModule.val_dataloader(). Val. set size: 106
train_model result: {'val_loss': 5747.65966796875, 'hp_metric': 5747.65966796875}
spotPython tuning: 5747.65966796875 [#########-] 87.50% 
LightDataModule.setup(): stage: TrainerFn.FITTING
train_size: 0.36, val_size: 0.24 used for train & val data.
LightDataModule.val_dataloader(). Val. set size: 106
LightDataModule.train_dataloader(). data_train size: 160
LightDataModule.setup(): stage: TrainerFn.VALIDATING
LightDataModule.val_dataloader(). Val. set size: 106
train_model result: {'val_loss': 4623.73388671875, 'hp_metric': 4623.73388671875}
spotPython tuning: 4623.73388671875 [##########] 100.00% Done...

{'CHECKPOINT_PATH': 'runs/saved_models/',
 'DATASET_PATH': 'data/',
 'PREFIX': '037',
 'RESULTS_PATH': 'results/',
 'TENSORBOARD_PATH': 'runs/',
 '_L_in': 10,
 '_L_out': 1,
 '_torchmetric': 'mean_squared_error',
 'accelerator': 'auto',
 'converters': None,
 'core_model': <class 'spotPython.light.regression.netlightregression.NetLightRegression'>,
 'core_model_hyper_dict': {'act_fn': {'class_name': 'spotPython.torch.activation',
                                      'core_model_parameter_type': 'instance()',
                                      'default': 'ReLU',
                                      'levels': ['ReLU', 'LeakyReLU'],
                                      'lower': 0,
                                      'transform': 'None',
                                      'type': 'factor',
                                      'upper': 1},
                           'batch_size': {'default': 4,
                                          'lower': 4,
                                          'transform': 'transform_power_2_int',
                                          'type': 'int',
                                          'upper': 5},
                           'dropout_prob': {'default': 0.01,
                                            'lower': 0.01,
                                            'transform': 'None',
                                            'type': 'float',
                                            'upper': 0.1},
                           'epochs': {'default': 4,
                                      'lower': 4,
                                      'transform': 'transform_power_2_int',
                                      'type': 'int',
                                      'upper': 5},
                           'initialization': {'core_model_parameter_type': 'str',
                                              'default': 'Default',
                                              'levels': ['Default',
                                                         'Kaiming',
                                                         'Xavier'],
                                              'lower': 0,
                                              'transform': 'None',
                                              'type': 'factor',
                                              'upper': 2},
                           'l1': {'default': 3,
                                  'lower': 3,
                                  'transform': 'transform_power_2_int',
                                  'type': 'int',
                                  'upper': 8},
                           'lr_mult': {'default': 1.0,
                                       'lower': 0.05,
                                       'transform': 'None',
                                       'type': 'float',
                                       'upper': 1.0},
                           'optimizer': {'class_name': 'torch.optim',
                                         'core_model_parameter_type': 'str',
                                         'default': 'SGD',
                                         'levels': ['Adam', 'RAdam'],
                                         'lower': 0,
                                         'transform': 'None',
                                         'type': 'factor',
                                         'upper': 1},
                           'patience': {'default': 2,
                                        'lower': 2,
                                        'transform': 'transform_power_2_int',
                                        'type': 'int',
                                        'upper': 3}},
 'core_model_hyper_dict_default': {'act_fn': {'class_name': 'spotPython.torch.activation',
                                              'core_model_parameter_type': 'instance()',
                                              'default': 'ReLU',
                                              'levels': ['Sigmoid',
                                                         'Tanh',
                                                         'ReLU',
                                                         'LeakyReLU',
                                                         'ELU',
                                                         'Swish'],
                                              'lower': 0,
                                              'transform': 'None',
                                              'type': 'factor',
                                              'upper': 5},
                                   'batch_size': {'default': 4,
                                                  'lower': 1,
                                                  'transform': 'transform_power_2_int',
                                                  'type': 'int',
                                                  'upper': 4},
                                   'dropout_prob': {'default': 0.01,
                                                    'lower': 0.0,
                                                    'transform': 'None',
                                                    'type': 'float',
                                                    'upper': 0.25},
                                   'epochs': {'default': 4,
                                              'lower': 4,
                                              'transform': 'transform_power_2_int',
                                              'type': 'int',
                                              'upper': 9},
                                   'initialization': {'core_model_parameter_type': 'str',
                                                      'default': 'Default',
                                                      'levels': ['Default',
                                                                 'Kaiming',
                                                                 'Xavier'],
                                                      'lower': 0,
                                                      'transform': 'None',
                                                      'type': 'factor',
                                                      'upper': 2},
                                   'l1': {'default': 3,
                                          'lower': 3,
                                          'transform': 'transform_power_2_int',
                                          'type': 'int',
                                          'upper': 8},
                                   'lr_mult': {'default': 1.0,
                                               'lower': 0.1,
                                               'transform': 'None',
                                               'type': 'float',
                                               'upper': 10.0},
                                   'optimizer': {'class_name': 'torch.optim',
                                                 'core_model_parameter_type': 'str',
                                                 'default': 'SGD',
                                                 'levels': ['Adadelta',
                                                            'Adagrad',
                                                            'Adam',
                                                            'AdamW',
                                                            'SparseAdam',
                                                            'Adamax',
                                                            'ASGD',
                                                            'NAdam',
                                                            'RAdam',
                                                            'RMSprop',
                                                            'Rprop',
                                                            'SGD'],
                                                 'lower': 0,
                                                 'transform': 'None',
                                                 'type': 'factor',
                                                 'upper': 11},
                                   'patience': {'default': 2,
                                                'lower': 2,
                                                'transform': 'transform_power_2_int',
                                                'type': 'int',
                                                'upper': 6}},
 'core_model_name': None,
 'counter': 8,
 'data': None,
 'data_dir': './data',
 'data_module': None,
 'data_set': <spotPython.data.diabetes.Diabetes object at 0x3c9a98f90>,
 'data_set_name': None,
 'db_dict_name': None,
 'design': None,
 'device': 'mps',
 'devices': 1,
 'enable_progress_bar': False,
 'eval': None,
 'fun_evals': 8,
 'fun_repeats': 1,
 'horizon': None,
 'infill_criterion': 'y',
 'k_folds': 3,
 'log_graph': False,
 'log_level': 50,
 'loss_function': None,
 'lower': array([3. , 4. , 1. , 0. , 0. , 0. , 0.1, 2. , 0. ]),
 'max_surrogate_points': 30,
 'max_time': 1,
 'metric_params': {},
 'metric_river': None,
 'metric_sklearn': None,
 'metric_sklearn_name': None,
 'metric_torch': None,
 'model_dict': {},
 'n_points': 1,
 'n_samples': None,
 'n_total': None,
 'noise': False,
 'num_workers': 0,
 'ocba_delta': 0,
 'oml_grace_period': None,
 'optimizer': None,
 'path': None,
 'prep_model': None,
 'prep_model_name': None,
 'progress_file': None,
 'save_model': False,
 'scenario': None,
 'seed': 123,
 'show_batch_interval': 1000000,
 'show_models': False,
 'show_progress': True,
 'shuffle': None,
 'sigma': 0.0,
 'spot_tensorboard_path': 'runs/spot_logs/037_maans14_2024-04-22_02-15-20',
 'spot_writer': <torch.utils.tensorboard.writer.SummaryWriter object at 0x3c9937a90>,
 'target_column': None,
 'target_type': None,
 'task': None,
 'test': None,
 'test_seed': 1234,
 'test_size': 0.4,
 'tolerance_x': 1.4901161193847656e-08,
 'train': None,
 'upper': array([ 8.  ,  9.  ,  4.  ,  5.  , 11.  ,  0.25, 10.  ,  6.  ,  2.  ]),
 'var_name': ['l1',
              'epochs',
              'batch_size',
              'act_fn',
              'optimizer',
              'dropout_prob',
              'lr_mult',
              'patience',
              'initialization'],
 'var_type': ['int',
              'int',
              'int',
              'factor',
              'factor',
              'float',
              'float',
              'int',
              'factor'],
 'verbosity': 0,
 'weight_coeff': 0.0,
 'weights': 1.0,
 'weights_entry': None}

┏━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━━━┓
┃      Validate metric      ┃       DataLoader 0        ┃
┡━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━━━━┩
│         hp_metric         │     10240.3544921875      │
│         val_loss          │     10240.3544921875      │
└───────────────────────────┴───────────────────────────┘

┏━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━━━┓
┃      Validate metric      ┃       DataLoader 0        ┃
┡━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━━━━┩
│         hp_metric         │            nan            │
│         val_loss          │            nan            │
└───────────────────────────┴───────────────────────────┘

┏━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━━━┓
┃      Validate metric      ┃       DataLoader 0        ┃
┡━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━━━━┩
│         hp_metric         │            nan            │
│         val_loss          │            nan            │
└───────────────────────────┴───────────────────────────┘

┏━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━━━┓
┃      Validate metric      ┃       DataLoader 0        ┃
┡━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━━━━┩
│         hp_metric         │         23857.75          │
│         val_loss          │         23857.75          │
└───────────────────────────┴───────────────────────────┘

┏━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━━━┓
┃      Validate metric      ┃       DataLoader 0        ┃
┡━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━━━━┩
│         hp_metric         │            nan            │
│         val_loss          │            nan            │
└───────────────────────────┴───────────────────────────┘

┏━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━━━┓
┃      Validate metric      ┃       DataLoader 0        ┃
┡━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━━━━┩
│         hp_metric         │      6874.3818359375      │
│         val_loss          │      6874.3818359375      │
└───────────────────────────┴───────────────────────────┘

┏━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━━━┓
┃      Validate metric      ┃       DataLoader 0        ┃
┡━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━━━━┩
│         hp_metric         │       7526.0234375        │
│         val_loss          │       7526.0234375        │
└───────────────────────────┴───────────────────────────┘

┏━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━━━┓
┃      Validate metric      ┃       DataLoader 0        ┃
┡━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━━━━┩
│         hp_metric         │      9060.6591796875      │
│         val_loss          │      9060.6591796875      │
└───────────────────────────┴───────────────────────────┘

┏━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━━━┓
┃      Validate metric      ┃       DataLoader 0        ┃
┡━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━━━━┩
│         hp_metric         │      9169.982421875       │
│         val_loss          │      9169.982421875       │
└───────────────────────────┴───────────────────────────┘

┏━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━━━┓
┃      Validate metric      ┃       DataLoader 0        ┃
┡━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━━━━┩
│         hp_metric         │     5747.65966796875      │
│         val_loss          │     5747.65966796875      │
└───────────────────────────┴───────────────────────────┘

┏━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━━━┓
┃      Validate metric      ┃       DataLoader 0        ┃
┡━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━━━━┩
│         hp_metric         │     4623.73388671875      │
│         val_loss          │     4623.73388671875      │
└───────────────────────────┴───────────────────────────┘

<spotPython.spot.spot.Spot at 0x3c9aa1210>

The tuned hyperparameters can be obtained as a dictionary with the following code.

from spotPython.hyperparameters.values import get_tuned_hyperparameters
get_tuned_hyperparameters(spot_tuner)

{'l1': 7.0,
 'epochs': 4.0,
 'batch_size': 4.0,
 'act_fn': 0.0,
 'optimizer': 0.0,
 'dropout_prob': 0.06369557694687934,
 'lr_mult': 0.4447783604968382,
 'patience': 3.0,
 'initialization': 0.0}

Here , the numerical levels of the hyperparameters are used as keys in the dictionary. If the fun_control dictionary is used, the names of the hyperparameters are used as keys in the dictionary.

get_tuned_hyperparameters(spot_tuner, fun_control)

{'l1': 7.0,
 'epochs': 4.0,
 'batch_size': 4.0,
 'act_fn': 'ReLU',
 'optimizer': 'Adam',
 'dropout_prob': 0.06369557694687934,
 'lr_mult': 0.4447783604968382,
 'patience': 3.0,
 'initialization': 'Default'}

PREFIX = fun_control["PREFIX"]
filename = get_experiment_filename(PREFIX)
spot_tuner.save_experiment(filename=filename)
print(f"filename: {filename}")

filename: spot_037_experiment.pickle

The results from the experiment are stored in the pickle file spot_037_experiment.pickle. The experiment can be reloaded with the following code.

(spot_tuner_1, fun_control_1, design_control_1,
    surrogate_control_1, optimizer_control_1) = load_experiment(filename)

Plot the progress of the original experiment are identical to the reloaded experiment.

spot_tuner.plot_progress(log_y=True)
spot_tuner_1.plot_progress(log_y=True)

Finally, the tuned hyperparameters can be obtained as a dictionary from the reloaded experiment with the following code.

get_tuned_hyperparameters(spot_tuner_1, fun_control_1)

{'l1': 7.0,
 'epochs': 4.0,
 'batch_size': 4.0,
 'act_fn': 'ReLU',
 'optimizer': 'Adam',
 'dropout_prob': 0.06369557694687934,
 'lr_mult': 0.4447783604968382,
 'patience': 3.0,
 'initialization': 'Default'}

Summary: Saving and Loading Hyperparameter-Tuning Experiments

If spotPython is used as an hyperparameter tuner (with an hyperparameter dictionary), experiments can be saved and reloaded with the save_experiment and load_experiment functions.
The tuned hyperparameters can be obtained with the get_tuned_hyperparameters function.

29.3 Saving and Loading PyTorch Lightning Models

Section 29.1 and Section 29.2 explained how to save and load optimization and hyperparameter tuning experiments and how to get the tuned hyperparameters as a dictionary. This section shows how to save and load PyTorch Lightning models.

29.3.1 Get the Tuned Architecture

In contrast to the function get_tuned_hyperparameters, the function get_tuned_architecture returns the tuned architecture of the model as a dictionary. Here, the transformations are already applied to the numerical levels of the hyperparameters and the encoding (and types) are the original types of the hyperparameters used by the model. The config dictionary can be passed to the model without any modifications.

from spotPython.hyperparameters.values import get_tuned_architecture
config = get_tuned_architecture(spot_tuner, fun_control)
pprint.pprint(config)

{'act_fn': ReLU(),
 'batch_size': 16,
 'dropout_prob': 0.06369557694687934,
 'epochs': 16,
 'initialization': 'Default',
 'l1': 128,
 'lr_mult': 0.4447783604968382,
 'optimizer': 'Adam',
 'patience': 8}

After getting the tuned architecture, the model can be created and tested with the following code.

from spotPython.light.testmodel import test_model
test_model(config, fun_control)

LightDataModule.setup(): stage: TrainerFn.FITTING
train_size: 0.36, val_size: 0.24 used for train & val data.
LightDataModule.val_dataloader(). Val. set size: 106
LightDataModule.train_dataloader(). data_train size: 160
LightDataModule.setup(): stage: TrainerFn.TESTING
test_size: 0.4 used for test dataset.
LightDataModule.test_dataloader(). Test set size: 177
test_model result: {'val_loss': 14980.9375, 'hp_metric': 14980.9375}

┏━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━━━┓
┃        Test metric        ┃       DataLoader 0        ┃
┡━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━━━━┩
│         hp_metric         │        14980.9375         │
│         val_loss          │        14980.9375         │
└───────────────────────────┴───────────────────────────┘

(14980.9375, 14980.9375)

29.3.2 Load a Model from Checkpoint

from spotPython.light.loadmodel import load_light_from_checkpoint
model_loaded = load_light_from_checkpoint(config, fun_control)

config: {'l1': 128, 'epochs': 16, 'batch_size': 16, 'act_fn': ReLU(), 'optimizer': 'Adam', 'dropout_prob': 0.06369557694687934, 'lr_mult': 0.4447783604968382, 'patience': 8, 'initialization': 'Default'}
Loading model with 128_16_16_ReLU_Adam_0.0637_0.4448_8_Default_TEST from runs/saved_models/128_16_16_ReLU_Adam_0.0637_0.4448_8_Default_TEST/last.ckpt
Model: NetLightRegression(
  (layers): Sequential(
    (0): Linear(in_features=10, out_features=128, bias=True)
    (1): ReLU()
    (2): Dropout(p=0.06369557694687934, inplace=False)
    (3): Linear(in_features=128, out_features=64, bias=True)
    (4): ReLU()
    (5): Dropout(p=0.06369557694687934, inplace=False)
    (6): Linear(in_features=64, out_features=64, bias=True)
    (7): ReLU()
    (8): Dropout(p=0.06369557694687934, inplace=False)
    (9): Linear(in_features=64, out_features=32, bias=True)
    (10): ReLU()
    (11): Dropout(p=0.06369557694687934, inplace=False)
    (12): Linear(in_features=32, out_features=1, bias=True)
  )
)

vars(model_loaded)

{'training': False,
 '_parameters': OrderedDict(),
 '_buffers': OrderedDict(),
 '_non_persistent_buffers_set': set(),
 '_backward_pre_hooks': OrderedDict(),
 '_backward_hooks': OrderedDict(),
 '_is_full_backward_hook': None,
 '_forward_hooks': OrderedDict(),
 '_forward_hooks_with_kwargs': OrderedDict(),
 '_forward_hooks_always_called': OrderedDict(),
 '_forward_pre_hooks': OrderedDict(),
 '_forward_pre_hooks_with_kwargs': OrderedDict(),
 '_state_dict_hooks': OrderedDict(),
 '_state_dict_pre_hooks': OrderedDict(),
 '_load_state_dict_pre_hooks': OrderedDict(),
 '_load_state_dict_post_hooks': OrderedDict(),
 '_modules': OrderedDict([('layers',
               Sequential(
                 (0): Linear(in_features=10, out_features=128, bias=True)
                 (1): ReLU()
                 (2): Dropout(p=0.06369557694687934, inplace=False)
                 (3): Linear(in_features=128, out_features=64, bias=True)
                 (4): ReLU()
                 (5): Dropout(p=0.06369557694687934, inplace=False)
                 (6): Linear(in_features=64, out_features=64, bias=True)
                 (7): ReLU()
                 (8): Dropout(p=0.06369557694687934, inplace=False)
                 (9): Linear(in_features=64, out_features=32, bias=True)
                 (10): ReLU()
                 (11): Dropout(p=0.06369557694687934, inplace=False)
                 (12): Linear(in_features=32, out_features=1, bias=True)
               ))]),
 'prepare_data_per_node': True,
 'allow_zero_length_dataloader_with_multiple_devices': False,
 '_log_hyperparams': True,
 '_dtype': torch.float32,
 '_device': device(type='mps', index=0),
 '_trainer': None,
 '_example_input_array': tensor([[0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
         [0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
         [0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
         [0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
         [0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
         [0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
         [0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
         [0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
         [0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
         [0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
         [0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
         [0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
         [0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
         [0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
         [0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
         [0., 0., 0., 0., 0., 0., 0., 0., 0., 0.]]),
 '_automatic_optimization': True,
 '_strict_loading': None,
 '_current_fx_name': None,
 '_param_requires_grad_state': {},
 '_metric_attributes': None,
 '_compiler_ctx': None,
 '_fabric': None,
 '_fabric_optimizers': [],
 '_L_in': 10,
 '_L_out': 1,
 '_torchmetric': 'mean_squared_error',
 'metric': <function torchmetrics.functional.regression.mse.mean_squared_error(preds: torch.Tensor, target: torch.Tensor, squared: bool = True, num_outputs: int = 1) -> torch.Tensor>,
 '_hparams_name': 'kwargs',
 '_hparams': "act_fn":         ReLU()
 "batch_size":     16
 "dropout_prob":   0.06369557694687934
 "epochs":         16
 "initialization": Default
 "l1":             128
 "lr_mult":        0.4447783604968382
 "optimizer":      Adam
 "patience":       8,
 '_hparams_initial': "act_fn":         ReLU()
 "batch_size":     16
 "dropout_prob":   0.06369557694687934
 "epochs":         16
 "initialization": Default
 "l1":             128
 "lr_mult":        0.4447783604968382
 "optimizer":      Adam
 "patience":       8}

import torch
torch.save(model_loaded, "model.pt")

mymodel = torch.load("model.pt")

# show all attributes of the model
vars(mymodel)

{'training': False,
 '_parameters': OrderedDict(),
 '_buffers': OrderedDict(),
 '_non_persistent_buffers_set': set(),
 '_backward_pre_hooks': OrderedDict(),
 '_backward_hooks': OrderedDict(),
 '_is_full_backward_hook': None,
 '_forward_hooks': OrderedDict(),
 '_forward_hooks_with_kwargs': OrderedDict(),
 '_forward_hooks_always_called': OrderedDict(),
 '_forward_pre_hooks': OrderedDict(),
 '_forward_pre_hooks_with_kwargs': OrderedDict(),
 '_state_dict_hooks': OrderedDict(),
 '_state_dict_pre_hooks': OrderedDict(),
 '_load_state_dict_pre_hooks': OrderedDict(),
 '_load_state_dict_post_hooks': OrderedDict(),
 '_modules': OrderedDict([('layers',
               Sequential(
                 (0): Linear(in_features=10, out_features=128, bias=True)
                 (1): ReLU()
                 (2): Dropout(p=0.06369557694687934, inplace=False)
                 (3): Linear(in_features=128, out_features=64, bias=True)
                 (4): ReLU()
                 (5): Dropout(p=0.06369557694687934, inplace=False)
                 (6): Linear(in_features=64, out_features=64, bias=True)
                 (7): ReLU()
                 (8): Dropout(p=0.06369557694687934, inplace=False)
                 (9): Linear(in_features=64, out_features=32, bias=True)
                 (10): ReLU()
                 (11): Dropout(p=0.06369557694687934, inplace=False)
                 (12): Linear(in_features=32, out_features=1, bias=True)
               ))]),
 'prepare_data_per_node': True,
 'allow_zero_length_dataloader_with_multiple_devices': False,
 '_log_hyperparams': True,
 '_dtype': torch.float32,
 '_device': device(type='mps', index=0),
 '_trainer': None,
 '_example_input_array': tensor([[0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
         [0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
         [0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
         [0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
         [0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
         [0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
         [0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
         [0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
         [0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
         [0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
         [0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
         [0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
         [0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
         [0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
         [0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
         [0., 0., 0., 0., 0., 0., 0., 0., 0., 0.]]),
 '_automatic_optimization': True,
 '_strict_loading': None,
 '_current_fx_name': None,
 '_param_requires_grad_state': {},
 '_metric_attributes': None,
 '_compiler_ctx': None,
 '_fabric': None,
 '_fabric_optimizers': [],
 '_L_in': 10,
 '_L_out': 1,
 '_torchmetric': 'mean_squared_error',
 'metric': <function torchmetrics.functional.regression.mse.mean_squared_error(preds: torch.Tensor, target: torch.Tensor, squared: bool = True, num_outputs: int = 1) -> torch.Tensor>,
 '_hparams_name': 'kwargs',
 '_hparams': "act_fn":         ReLU()
 "batch_size":     16
 "dropout_prob":   0.06369557694687934
 "epochs":         16
 "initialization": Default
 "l1":             128
 "lr_mult":        0.4447783604968382
 "optimizer":      Adam
 "patience":       8,
 '_hparams_initial': "act_fn":         ReLU()
 "batch_size":     16
 "dropout_prob":   0.06369557694687934
 "epochs":         16
 "initialization": Default
 "l1":             128
 "lr_mult":        0.4447783604968382
 "optimizer":      Adam
 "patience":       8}

29.4 Converting a Lightning Model to a Plain Torch Model

29.4.1 The Function `get_removed_attributes_and_base_net`

spotPython provides a function to covert a PyTorch Lightning model to a plain PyTorch model. The function get_removed_attributes_and_base_net returns a tuple with the removed attributes and the base net. The base net is a plain PyTorch model. The removed attributes are the attributes of the PyTorch Lightning model that are not part of the base net.

This conversion can be reverted.

import numpy as np
import torch
from spotPython.utils.device import getDevice
from torch.utils.data import random_split
from spotPython.utils.classes import get_removed_attributes_and_base_net
from spotPython.hyperparameters.optimizer import optimizer_handler
removed_attributes, torch_net = get_removed_attributes_and_base_net(net=mymodel)

print(removed_attributes)

{'allow_zero_length_dataloader_with_multiple_devices': False, '_strict_loading': None, '_device': device(type='mps', index=0), '_example_input_array': tensor([[0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0., 0., 0., 0., 0., 0.]]), '_torchmetric': 'mean_squared_error', 'metric': <function mean_squared_error at 0x3a31c53a0>, '_hparams_initial': "act_fn":         ReLU()
"batch_size":     16
"dropout_prob":   0.06369557694687934
"epochs":         16
"initialization": Default
"l1":             128
"lr_mult":        0.4447783604968382
"optimizer":      Adam
"patience":       8, '_param_requires_grad_state': {}, '_dtype': torch.float32, '_automatic_optimization': True, '_current_fx_name': None, '_metric_attributes': None, '_trainer': None, '_compiler_ctx': None, 'prepare_data_per_node': True, '_L_out': 1, '_L_in': 10, '_hparams_name': 'kwargs', '_fabric': None, '_hparams': "act_fn":         ReLU()
"batch_size":     16
"dropout_prob":   0.06369557694687934
"epochs":         16
"initialization": Default
"l1":             128
"lr_mult":        0.4447783604968382
"optimizer":      Adam
"patience":       8, '_log_hyperparams': True, '_fabric_optimizers': []}

print(torch_net)

NetLightRegression(
  (layers): Sequential(
    (0): Linear(in_features=10, out_features=128, bias=True)
    (1): ReLU()
    (2): Dropout(p=0.06369557694687934, inplace=False)
    (3): Linear(in_features=128, out_features=64, bias=True)
    (4): ReLU()
    (5): Dropout(p=0.06369557694687934, inplace=False)
    (6): Linear(in_features=64, out_features=64, bias=True)
    (7): ReLU()
    (8): Dropout(p=0.06369557694687934, inplace=False)
    (9): Linear(in_features=64, out_features=32, bias=True)
    (10): ReLU()
    (11): Dropout(p=0.06369557694687934, inplace=False)
    (12): Linear(in_features=32, out_features=1, bias=True)
  )
)

29.4.2 An Example how to use the Plain Torch Net

import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset
from sklearn.datasets import load_diabetes
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
import matplotlib.pyplot as plt

# Load the Diabetes dataset from sklearn
diabetes = load_diabetes()
X = diabetes.data
y = diabetes.target

# Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Scale the features
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

# Convert the data to PyTorch tensors
X_train_tensor = torch.tensor(X_train, dtype=torch.float32)
y_train_tensor = torch.tensor(y_train, dtype=torch.float32)
X_test_tensor = torch.tensor(X_test, dtype=torch.float32)
y_test_tensor = torch.tensor(y_test, dtype=torch.float32)

# Create a PyTorch dataset
train_dataset = TensorDataset(X_train_tensor, y_train_tensor)
test_dataset = TensorDataset(X_test_tensor, y_test_tensor)

# Create a PyTorch dataloader
batch_size = 32
train_dataloader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
test_dataloader = DataLoader(test_dataset, batch_size=batch_size)

torch_net.to(getDevice("cpu"))

# train the net
criterion = nn.MSELoss()
optimizer = optim.Adam(torch_net.parameters(), lr=0.01)
n_epochs = 100
losses = []
for epoch in range(n_epochs):
    for inputs, targets in train_dataloader:
        targets = targets.view(-1, 1)
        optimizer.zero_grad()
        outputs = torch_net(inputs)
        loss = criterion(outputs, targets)
        losses.append(loss.item())
        loss.backward()
        optimizer.step()
# visualize the network training
plt.plot(losses)
plt.xlabel("Epoch")
plt.ylabel("Loss")
plt.show()