import time
from math import inf
import multiprocessing
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
from spotpython.data.diabetes import Diabetes
from spotpython.hyperdict.light_hyper_dict import LightHyperDict
from spotpython.fun.hyperlight import HyperLight
from spotpython.utils.init import (fun_control_init, design_control_init)
from spotpython.hyperparameters.values import set_hyperparameter
from spotpython.spot import Spot
from spotpython.utils.parallel import make_parallel
Appendix H — Parallelism in Initial Design
In spotpython
, we provide a wrapper function, that encapsulates the objective function to enable its parallel execution via multiprocessing
or joblib
, allowing multiple configurations to be evaluated at the same time.
H.1 Setup
To demonstrate the performance gain enabled by parallelization, we use a similar example to that in Section 47, where we perform hyperparameter tuning with spotpython
and PyTorch
Lightning on the Diabetes dataset using a ResNet model. We compare the time required with and without parallelization. First, we import the necessary libraries, including the wrapper function make_parallel
. We then define the fun_control
and design_control
settings. For design_control
, we deliberately choose an initial design size of 10
for demonstration purposes.
= Diabetes()
dataset = fun_control_init(
fun_control =10,
fun_evals=inf,
max_time=dataset,
data_set="light.regression.NNResNetRegressor",
core_model_name=LightHyperDict,
hyperdict=10,
_L_in=1,
_L_out=125,
seed=False,
tensorboard_log=False,
TENSORBOARD_CLEAN
)"optimizer", ["Adadelta", "Adam", "Adamax"])
set_hyperparameter(fun_control, "l1", [2, 5])
set_hyperparameter(fun_control, "epochs", [5, 8])
set_hyperparameter(fun_control, "batch_size", [5, 8])
set_hyperparameter(fun_control, "dropout_prob", [0.0, 0.5])
set_hyperparameter(fun_control, "patience", [2, 3])
set_hyperparameter(fun_control, "lr_mult", [0.1, 10.0])
set_hyperparameter(fun_control,
= design_control_init(
design_control =10
init_size
)
= HyperLight().fun fun
module_name: light
submodule_name: regression
model_name: NNResNetRegressor
H.2 Experiments
We now measure the time required for sequential and parallel evaluation, beginning with the sequential approach.
H.2.1 Sequential Execution
= time.perf_counter()
tic = Spot(fun=fun, fun_control=fun_control, design_control=design_control)
spot_tuner = spot_tuner.run()
res = time.perf_counter()
toc = toc - tic
time_seq print(f"Time taken for sequential execution: {time_seq:.2f} seconds")
train_model result: {'val_loss': 23811.58984375, 'hp_metric': 23811.58984375}
train_model result: {'val_loss': 25125.443359375, 'hp_metric': 25125.443359375}
train_model result: {'val_loss': 21832.447265625, 'hp_metric': 21832.447265625}
train_model result: {'val_loss': 22035.162109375, 'hp_metric': 22035.162109375}
train_model result: {'val_loss': 23203.830078125, 'hp_metric': 23203.830078125}
train_model result: {'val_loss': 14768.158203125, 'hp_metric': 14768.158203125}
train_model result: {'val_loss': 22453.712890625, 'hp_metric': 22453.712890625}
train_model result: {'val_loss': 24413.876953125, 'hp_metric': 24413.876953125}
train_model result: {'val_loss': 23999.810546875, 'hp_metric': 23999.810546875}
train_model result: {'val_loss': 24063.423828125, 'hp_metric': 24063.423828125}
Experiment saved to 000_res.pkl
Time taken for sequential execution: 119.64 seconds
H.2.2 Parallel Execution
To use make_parallel
, the number of cores must be specified via the num_cores
parameter. By default, the function utilizes multiprocessing
, but other parallelization methods can be selected using the method
argument. The following two lines of code demonstrate how to set up the parallel function and run the Spot
tuner with it.
parallel_fun = make_parallel(fun, num_cores=num_cores)
spot_parallel_tuner = Spot(fun=parallel_fun, fun_control=fun_control, design_control=design_control)
We consider parallel efficiency, a metric that measures how effectively additional computational resources (cores/processors) are being utilized in a parallel computation. It’s calculated as: \[ \text{Efficiency} = \frac{\text{Speedup}}{\text{Number of Processors}}, \] where:
- Speedup = Time(Sequential) / Time(Parallel)
- Number of Processors = Number of cores used
It can be interpreted as follows:
- 1.0 (100%): Perfect linear scaling - doubling cores halves execution time
- 0.8-0.9 (80-90%): Excellent scaling - minimal parallelization overhead
- 0.5-0.7 (50-70%): Good scaling - reasonable utilization of additional cores
- <0.5 (<50%): Poor scaling - diminishing returns from adding more cores
When efficiency drops significantly as you add cores, it indicates:
- Communication overhead increasing
- Synchronization bottlenecks
- Load imbalance between cores
- Portions of code that remain sequential (Amdahl’s Law limitation)
# Get available cores
= multiprocessing.cpu_count()
available_cores print(f"Available cores: {available_cores}")
# Generate list of cores to test (powers of 2 up to available cores)
= []
cores_to_test = 0
power while 2**power <= available_cores:
2**power)
cores_to_test.append(+= 1
power
# If the number of available cores is not a power of 2, add it to the list
if available_cores not in cores_to_test:
cores_to_test.append(available_cores)
# Prepare DataFrame to store results
= pd.DataFrame(columns=["number_of_cores", "time"])
results_df
# Run the experiment for each core count
for num_cores in cores_to_test:
print(f"\nTesting with {num_cores} cores...")
= time.perf_counter()
tic = make_parallel(fun, num_cores=num_cores)
parallel_fun = Spot(fun=parallel_fun, fun_control=fun_control, design_control=design_control)
spot_parallel_tuner = spot_parallel_tuner.run()
res = time.perf_counter()
toc = toc - tic
time_taken
# Add result to DataFrame
= pd.concat([results_df, pd.DataFrame({
results_df "number_of_cores": [num_cores],
"time": [time_taken]
=True)
})], ignore_index
print(f"Time taken with {num_cores} cores: {time_taken:.2f} seconds")
Available cores: 24
Testing with 1 cores...
train_model result: {'val_loss': 23811.58984375, 'hp_metric': 23811.58984375}
train_model result: {'val_loss': 22507.580078125, 'hp_metric': 22507.580078125}
train_model result: {'val_loss': 22085.53515625, 'hp_metric': 22085.53515625}
train_model result: {'val_loss': 21815.44140625, 'hp_metric': 21815.44140625}
train_model result: {'val_loss': 23528.212890625, 'hp_metric': 23528.212890625}
train_model result: {'val_loss': 23563.298828125, 'hp_metric': 23563.298828125}
train_model result: {'val_loss': 22435.96875, 'hp_metric': 22435.96875}
train_model result: {'val_loss': 23983.15625, 'hp_metric': 23983.15625}
train_model result: {'val_loss': 23000.837890625, 'hp_metric': 23000.837890625}
train_model result: {'val_loss': 23442.49609375, 'hp_metric': 23442.49609375}
Experiment saved to 000_res.pkl
Time taken with 1 cores: 143.65 seconds
Testing with 2 cores...
train_model result: {'val_loss': 22085.53515625, 'hp_metric': 22085.53515625}
train_model result: {'val_loss': 21815.44140625, 'hp_metric': 21815.44140625}
Experiment saved to 000_res.pkl
Time taken with 2 cores: 89.37 seconds
Testing with 4 cores...
train_model result: {'val_loss': 23811.58984375, 'hp_metric': 23811.58984375}
train_model result: {'val_loss': 23442.49609375, 'hp_metric': 23442.49609375}
train_model result: {'val_loss': 22085.53515625, 'hp_metric': 22085.53515625}
train_model result: {'val_loss': 23528.212890625, 'hp_metric': 23528.212890625}
train_model result: {'val_loss': 22435.96875, 'hp_metric': 22435.96875}
train_model result: {'val_loss': 23000.837890625, 'hp_metric': 23000.837890625}
train_model result: {'val_loss': 21815.44140625, 'hp_metric': 21815.44140625}
train_model result: {'val_loss': 22507.580078125, 'hp_metric': 22507.580078125}
train_model result: {'val_loss': 23563.298828125, 'hp_metric': 23563.298828125}
train_model result: {'val_loss': 23983.15625, 'hp_metric': 23983.15625}
Experiment saved to 000_res.pkl
Time taken with 4 cores: 90.43 seconds
Testing with 8 cores...
train_model result: {'val_loss': 23811.58984375, 'hp_metric': 23811.58984375}
train_model result: {'val_loss': 22435.96875, 'hp_metric': 22435.96875}
train_model result: {'val_loss': 22085.53515625, 'hp_metric': 22085.53515625}
train_model result: {'val_loss': 23442.49609375, 'hp_metric': 23442.49609375}
train_model result: {'val_loss': 23528.212890625, 'hp_metric': 23528.212890625}
train_model result: {'val_loss': 23563.298828125, 'hp_metric': 23563.298828125}
train_model result: {'val_loss': 23000.837890625, 'hp_metric': 23000.837890625}
train_model result: {'val_loss': 22507.580078125, 'hp_metric': 22507.580078125}
train_model result: {'val_loss': 23983.15625, 'hp_metric': 23983.15625}
train_model result: {'val_loss': 21815.44140625, 'hp_metric': 21815.44140625}
Experiment saved to 000_res.pkl
Time taken with 8 cores: 85.67 seconds
Testing with 16 cores...
train_model result: {'val_loss': 23563.298828125, 'hp_metric': 23563.298828125}
train_model result: {'val_loss': 22435.96875, 'hp_metric': 22435.96875}
train_model result: {'val_loss': 22085.53515625, 'hp_metric': 22085.53515625}
train_model result: {'val_loss': 23442.49609375, 'hp_metric': 23442.49609375}
train_model result: {'val_loss': 23528.212890625, 'hp_metric': 23528.212890625}
train_model result: {'val_loss': 22507.580078125, 'hp_metric': 22507.580078125}
train_model result: {'val_loss': 23811.58984375, 'hp_metric': 23811.58984375}
train_model result: {'val_loss': 23000.837890625, 'hp_metric': 23000.837890625}
train_model result: {'val_loss': 23983.15625, 'hp_metric': 23983.15625}
train_model result: {'val_loss': 21815.44140625, 'hp_metric': 21815.44140625}
Experiment saved to 000_res.pkl
Time taken with 16 cores: 87.47 seconds
Testing with 24 cores...
train_model result: {'val_loss': 23563.298828125, 'hp_metric': 23563.298828125}
train_model result: {'val_loss': 22435.96875, 'hp_metric': 22435.96875}
train_model result: {'val_loss': 22085.53515625, 'hp_metric': 22085.53515625}
train_model result: {'val_loss': 23442.49609375, 'hp_metric': 23442.49609375}
train_model result: {'val_loss': 23528.212890625, 'hp_metric': 23528.212890625}
train_model result: {'val_loss': 23811.58984375, 'hp_metric': 23811.58984375}
train_model result: {'val_loss': 22507.580078125, 'hp_metric': 22507.580078125}
train_model result: {'val_loss': 23000.837890625, 'hp_metric': 23000.837890625}
train_model result: {'val_loss': 23983.15625, 'hp_metric': 23983.15625}
train_model result: {'val_loss': 21815.44140625, 'hp_metric': 21815.44140625}
Experiment saved to 000_res.pkl
Time taken with 24 cores: 88.25 seconds
H.2.3 Results
print("\nPerformance comparison across different numbers of cores:")
"speedup_vs_sequential"] = time_seq / results_df["time"]
results_df["efficiency"] = results_df["speedup_vs_sequential"] / results_df["number_of_cores"]
results_df[print(results_df)
Performance comparison across different numbers of cores:
number_of_cores time speedup_vs_sequential efficiency
0 1 143.652619 0.832832 0.832832
1 2 89.370600 1.338678 0.669339
2 4 90.434064 1.322935 0.330734
3 8 85.668532 1.396527 0.174566
4 16 87.469851 1.367768 0.085485
5 24 88.254437 1.355608 0.056484
= plt.subplots(1, 2, figsize=(12, 5))
fig, (ax1, ax2)
# Execution time vs number of cores
"number_of_cores"], results_df["time"], marker='o', linestyle='-')
ax1.plot(results_df["Number of cores")
ax1.set_xlabel("Execution time (seconds)")
ax1.set_ylabel("Execution Time vs Number of Cores")
ax1.set_title(True)
ax1.grid(
# Speedup vs number of cores
=time_seq, color='r', linestyle='--', label=f'Sequential ({time_seq:.2f}s)')
ax1.axhline(y
ax1.legend()
# Parallel efficiency
"number_of_cores"], results_df["efficiency"], marker='o', linestyle='-')
ax2.plot(results_df["Number of cores")
ax2.set_xlabel("Parallel efficiency")
ax2.set_ylabel("Parallel Efficiency vs Number of Cores")
ax2.set_title(0, 1.1)
ax2.set_ylim(True)
ax2.grid(
plt.tight_layout() plt.show()
Linux uses the fork
method by default to start new processes, whereas macOS and Windows use the spawn
method. This leads to differences in how processes are handled across operating systems. We use the functionality of set_all_seeds
to ensure that the evaluation remains reproducible across all operating systems.