Skip to content

optimizer

optimizer_handler(optimizer_name, params, lr_mult=1.0, **kwargs)

Returns an instance of the specified optimizer. See Notes below for supported optimizers.

Parameters:

Name Type Description Default
optimizer_name str

The name of the optimizer to use.

required
params list or Tensor

The parameters to optimize.

required
lr_mult float

A multiplier for the learning rate. Defaults to 1.0.

1.0
**kwargs Any

Additional keyword arguments for the optimizer.

{}
Notes

The following optimizers are supported (see also: https://pytorch.org/docs/stable/optim.html#base-class):

* Adadelta
* Adagrad
* Adam
* AdamW
* SparseAdam
* ASGD
* LBFGS
* NAdam
* RAdam
* RMSprop
* Rprop
* SGD

Returns:

Type Description
Optimizer

An instance of the specified optimizer.

Examples:

>>> from torch.utils.data import DataLoader
    from spotpython.data.diabetes import Diabetes
    from spotpython.light.netlightregression import NetLightRegression
    from torch import nn
    import lightning as L
    BATCH_SIZE = 8
    lr_mult=0.1
    dataset = Diabetes()
    train_loader = DataLoader(dataset, batch_size=BATCH_SIZE)
    test_loader = DataLoader(dataset, batch_size=BATCH_SIZE)
    val_loader = DataLoader(dataset, batch_size=BATCH_SIZE)
    # First example: Adam
    net_light_base = NetLightRegression(l1=128, epochs=10, batch_size=BATCH_SIZE,
                                    initialization='xavier', act_fn=nn.ReLU(),
                                    optimizer='Adam', dropout_prob=0.1, lr_mult=lr_mult,
                                    patience=5, _L_in=10, _L_out=1)
    trainer = L.Trainer(max_epochs=2,  enable_progress_bar=False)
    trainer.fit(net_light_base, train_loader)
    # Adam uses a lr which is calculated as lr=lr_mult * 0.001, so this value
    # should be 0.1 * 0.001 = 0.0001
    trainer.optimizers[0].param_groups[0]["lr"] == lr_mult*0.001
    # Second example: Adadelta
    net_light_base = NetLightRegression(l1=128, epochs=10, batch_size=BATCH_SIZE,
                                    initialization='xavier', act_fn=nn.ReLU(),
                                    optimizer='Adadelta', dropout_prob=0.1, lr_mult=lr_mult,
                                    patience=5, _L_in=10, _L_out=1)
    trainer = L.Trainer(max_epochs=2,  enable_progress_bar=False)
    trainer.fit(net_light_base, train_loader)
    # Adadelta uses a lr which is calculated as lr=lr_mult * 1.0, so this value
    # should be 1.0 * 0.1 = 0.1
    trainer.optimizers[0].param_groups[0]["lr"] == lr_mult*1.0
Source code in spotpython/hyperparameters/optimizer.py
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
def optimizer_handler(
    optimizer_name: str, params: Union[list, torch.Tensor], lr_mult: float = 1.0, **kwargs: Any
) -> torch.optim.Optimizer:
    """Returns an instance of the specified optimizer. See Notes below for supported optimizers.

    Args:
        optimizer_name (str):
            The name of the optimizer to use.
        params (list or torch.Tensor):
            The parameters to optimize.
        lr_mult (float, optional):
            A multiplier for the learning rate. Defaults to 1.0.
        **kwargs:
            Additional keyword arguments for the optimizer.

    Notes:
        The following optimizers are supported (see also: https://pytorch.org/docs/stable/optim.html#base-class):

            * Adadelta
            * Adagrad
            * Adam
            * AdamW
            * SparseAdam
            * ASGD
            * LBFGS
            * NAdam
            * RAdam
            * RMSprop
            * Rprop
            * SGD

    Returns:
        (torch.optim.Optimizer):
            An instance of the specified optimizer.

    Examples:
        >>> from torch.utils.data import DataLoader
            from spotpython.data.diabetes import Diabetes
            from spotpython.light.netlightregression import NetLightRegression
            from torch import nn
            import lightning as L
            BATCH_SIZE = 8
            lr_mult=0.1
            dataset = Diabetes()
            train_loader = DataLoader(dataset, batch_size=BATCH_SIZE)
            test_loader = DataLoader(dataset, batch_size=BATCH_SIZE)
            val_loader = DataLoader(dataset, batch_size=BATCH_SIZE)
            # First example: Adam
            net_light_base = NetLightRegression(l1=128, epochs=10, batch_size=BATCH_SIZE,
                                            initialization='xavier', act_fn=nn.ReLU(),
                                            optimizer='Adam', dropout_prob=0.1, lr_mult=lr_mult,
                                            patience=5, _L_in=10, _L_out=1)
            trainer = L.Trainer(max_epochs=2,  enable_progress_bar=False)
            trainer.fit(net_light_base, train_loader)
            # Adam uses a lr which is calculated as lr=lr_mult * 0.001, so this value
            # should be 0.1 * 0.001 = 0.0001
            trainer.optimizers[0].param_groups[0]["lr"] == lr_mult*0.001
            # Second example: Adadelta
            net_light_base = NetLightRegression(l1=128, epochs=10, batch_size=BATCH_SIZE,
                                            initialization='xavier', act_fn=nn.ReLU(),
                                            optimizer='Adadelta', dropout_prob=0.1, lr_mult=lr_mult,
                                            patience=5, _L_in=10, _L_out=1)
            trainer = L.Trainer(max_epochs=2,  enable_progress_bar=False)
            trainer.fit(net_light_base, train_loader)
            # Adadelta uses a lr which is calculated as lr=lr_mult * 1.0, so this value
            # should be 1.0 * 0.1 = 0.1
            trainer.optimizers[0].param_groups[0]["lr"] == lr_mult*1.0
    """
    if optimizer_name == "Adadelta":
        return torch.optim.Adadelta(
            params,
            lr=lr_mult * 1.0,
            rho=0.9,
            eps=1e-06,
            weight_decay=0,
            foreach=None,
            maximize=False,
            # differentiable=False,
        )
    elif optimizer_name == "Adagrad":
        return torch.optim.Adagrad(
            params,
            lr=lr_mult * 0.01,
            lr_decay=0,
            weight_decay=0,
            initial_accumulator_value=0,
            eps=1e-10,
            foreach=None,
            maximize=False,
            # differentiable=False,
        )
    elif optimizer_name == "Adam":
        return torch.optim.Adam(
            params,
            lr=lr_mult * 0.001,
            betas=(0.9, 0.999),
            eps=1e-08,
            weight_decay=0,
            amsgrad=False,
            foreach=None,
            maximize=False,
            capturable=False,
            # differentiable=False,
            fused=None,
        )
    elif optimizer_name == "AdamW":
        return torch.optim.AdamW(
            params,
            lr=lr_mult * 0.001,
            betas=(0.9, 0.999),
            eps=1e-08,
            weight_decay=0.01,
            amsgrad=False,
            foreach=None,
            maximize=False,
            capturable=False,
            # differentiable=False,
            # fused=None,
        )
    elif optimizer_name == "SparseAdam":
        return torch.optim.SparseAdam(params, lr=0.001, betas=(0.9, 0.999), eps=1e-08, maximize=False)
    elif optimizer_name == "Adamax":
        return torch.optim.Adamax(
            params,
            lr=lr_mult * 0.002,
            betas=(0.9, 0.999),
            eps=1e-08,
            weight_decay=0,
            foreach=None,
            maximize=False,
            # differentiable=False,
        )
    elif optimizer_name == "ASGD":
        return torch.optim.ASGD(
            params,
            lr=lr_mult * 0.01,
            lambd=0.0001,
            alpha=0.75,
            t0=1000000.0,
            weight_decay=0,
            foreach=None,
            maximize=False,
            # differentiable=False,
        )
    elif optimizer_name == "LBFGS":
        return torch.optim.LBFGS(
            params,
            lr=lr_mult * 1,
            max_iter=20,
            max_eval=None,
            tolerance_grad=1e-07,
            tolerance_change=1e-09,
            history_size=100,
            line_search_fn=None,
        )
    elif optimizer_name == "NAdam":
        return torch.optim.NAdam(
            params,
            lr=lr_mult * 0.002,
            betas=(0.9, 0.999),
            eps=1e-08,
            weight_decay=0,
            momentum_decay=0.004,
            foreach=None,
            # differentiable=False,
        )
    elif optimizer_name == "RAdam":
        return torch.optim.RAdam(
            params,
            lr=0.001,
            betas=(0.9, 0.999),
            eps=1e-08,
            weight_decay=0,
            foreach=None,
            # differentiable=False
        )
    elif optimizer_name == "RMSprop":
        return torch.optim.RMSprop(
            params,
            lr=lr_mult * 0.01,
            alpha=0.99,
            eps=1e-08,
            weight_decay=0,
            momentum=0,
            centered=False,
            foreach=None,
            maximize=False,
            # differentiable=False,
        )
    elif optimizer_name == "Rprop":
        return torch.optim.Rprop(
            params,
            lr=lr_mult * 0.01,
            etas=(0.5, 1.2),
            step_sizes=(1e-06, 50),
            foreach=None,
            maximize=False,
            # differentiable=False,
        )
    elif optimizer_name == "SGD":
        return torch.optim.SGD(
            params,
            lr=lr_mult * 1e-3,
            momentum=0,
            dampening=0,
            weight_decay=0,
            nesterov=False,
            maximize=False,
            foreach=None,
            # differentiable=False,
        )
    else:
        raise ValueError(f"Optimizer {optimizer_name} not supported")