#!/bin/bash
### Vergabe von Ressourcen
#SBATCH --job-name=Test
#SBATCH --cpus-per-task=20
#SBATCH --gres=gpu:1
#SBATCH --time=48:00:00
#SBATCH --error=job.%J.err
#SBATCH --output=job.%J.out
#----
#SBATCH --partition=gpu
if [ -z "$1" ]; then
"Usage: $0 <path_to_spot.pkl>"
echo 1
exit
fi
=$1
SPOT_PKL
module load conda
### change to your conda environment with spotpython installed via
### pip install spotpython
conda activate spot312
"$SPOT_PKL"
python startPython.py
exit
Appendix F — Using Slurm
F.1 Introduction
This chapter describes how to generate a spotpython
configuration on a local machine and run the spotpython
code on a remote machine using Slurm.
F.2 Prepare the Slurm Scripts on the Remote Machine
Two scripts are required to run the spotpython
code on the remote machine:
startSlurm.sh
andstartPython.py
.
They should be saved in the same directory as the configuration (pickle
) file. These two scripts must be generated only once and can be reused for different configurations.
The startSlurm.sh
script is a shell script that contains the following code:
Save the code in a file named startSlurm.sh
and copy the file to the remote machine via scp
, i.e.,
@144.33.22.1: scp startSlurm.sh user
The startPython.py
script is a Python script that contains the following code:
import argparse
import pickle
from spotpython.utils.file import load_and_run_spot_python_experiment
def main(pickle_file):
= load_and_run_spot_python_experiment(filename=pickle_file)
spot_tuner
if __name__ == "__main__":
= argparse.ArgumentParser(description='Process a pickle file.')
parser 'pickle_file', type=str, help='The path to the pickle file to be processed.')
parser.add_argument(
= parser.parse_args()
args main(args.pickle_file)
Save the code in a file named startPython.py
and copy the file to the remote machine via scp
, i.e.,
@144.33.22.1: scp startPython.py user
F.3 Generate a spotpython
Configuration
The configuration can be generated on a local machine using the following command:
from spotpython.data.diabetes import Diabetes
from spotpython.hyperdict.light_hyper_dict import LightHyperDict
from spotpython.fun.hyperlight import HyperLight
from spotpython.utils.init import (fun_control_init, surrogate_control_init, design_control_init)
from spotpython.spot import Spot
from spotpython.utils.file import get_experiment_filename, get_tuned_architecture
from spotpython.hyperparameters.values import set_hyperparameter
from math import inf
import torch
from torch.utils.data import TensorDataset
# generate data
= 100_000
num_samples = 100
input_dim = torch.randn(num_samples, input_dim) # random data for example
X = torch.randn(num_samples, 1) # random target for example
Y = TensorDataset(X, Y)
data_set
="42"
PREFIX
= fun_control_init(
fun_control ="gpu",
accelerator="auto",
devices=1,
num_nodes=19,
num_workers="32",
precision="auto",
strategy=True,
save_experiment=PREFIX,
PREFIX=50,
fun_evals=inf,
max_time= data_set,
data_set ="light.regression.NNLinearRegressor",
core_model_name=LightHyperDict,
hyperdict=input_dim,
_L_in=1)
_L_out
= HyperLight().fun
fun
"optimizer", [ "Adadelta", "Adam", "Adamax"])
set_hyperparameter(fun_control, "l1", [5,10])
set_hyperparameter(fun_control, "epochs", [10,12])
set_hyperparameter(fun_control, "batch_size", [4,11])
set_hyperparameter(fun_control, "dropout_prob", [0.0, 0.025])
set_hyperparameter(fun_control, "patience", [2,9])
set_hyperparameter(fun_control,
= design_control_init(init_size=10)
design_control
= Spot(fun=fun,fun_control=fun_control, design_control=design_control) S
The configuration is saved as a pickle-file that contains the full information. In our example, the filename is 42_exp.pkl
.
F.4 Copy the Configuration to the Remote Machine
You can copy the configuration to the remote machine using the scp
command. The following command copies the configuration to the remote machine 144.33.22.1
:
42_exp.pkl user@144.33.22.1: scp
F.5 Run the spotpython
Code on the Remote Machine
Login on the remote machine and run the following command to start the spotpython
code:
@144.33.22.1
ssh user# change this to your conda environment!
conda activate spot312 -x ./startSlurm.sh 42_exp.pkl sbatch sh
F.6 Copy the Results to the Local Machine
After the spotpython
code has finished, you can copy the results back to the local machine using the scp
command. The following command copies the results to the local machine:
@144.33.22.1:42_res.pkl . scp user
spotpython
generates two files:PREFIX_exp.pkl
(experiment file), which stores the information about running the experiment, andPREFIX_res.pkl
(result file), which stores the results of the experiment.
F.7 Analyze the Results on the Local Machine
The file 42_res.pkl
contains the results of the spotpython
code. You can analyze the results on the local machine using the following code.
from spotpython.utils.file import load_experiment
= load_experiment(PREFIX) spot_tuner
F.7.1 Visualizing the Tuning Progress
Now the spot_tuner
object is loaded and you can analyze the results interactively.
=True, filename=None) spot_tuner.plot_progress(log_y
F.7.2 Design Table with Default and Tuned Hyperparameters
from spotpython.utils.eda import print_res_table
print_res_table(spot_tuner)
F.7.3 Plotting Important Hyperparameters
=3) spot_tuner.plot_important_hyperparameter_contour(max_imp
F.7.4 The Tuned Hyperparameters
= get_tuned_architecture(spot_tuner)
config pprint.pprint(config)