24 Adam(
double lr = 1e-3,
double decay = .0,
double epsilon = 1e-7,
double beta_1 = .9,
double beta_2 = .999) :
Optimizer(lr, decay),
36 Eigen::MatrixXd weights = layer->weights();
37 Eigen::MatrixXd biases = layer->biases();
38 Eigen::MatrixXd dweights = layer->dweights();
39 Eigen::MatrixXd dbiases = layer->dbiases();
41 Eigen::MatrixXd weights_cache = layer->weights_optimizer();
42 Eigen::MatrixXd biases_cache = layer->biases_optimizer();
44 Eigen::MatrixXd weights_momentums = layer->weights_optimizer_additional();
45 Eigen::MatrixXd biases_momentums = layer->biases_optimizer_additional();
47 weights_momentums = _beta_1 * weights_momentums + (1 - _beta_1) * dweights;
48 biases_momentums = _beta_1 * biases_momentums + (1 - _beta_1) * dbiases;
50 Eigen::MatrixXd weights_momentums_corrected = weights_momentums / (1 - std::pow(_beta_1, (
_iterations + 1)));
51 Eigen::MatrixXd biases_momentums_corrected = biases_momentums / (1 - std::pow(_beta_1, (
_iterations + 1)));
53 weights_cache = _beta_2 * weights_cache + (1 - _beta_2) * dweights.cwisePow(2);
54 biases_cache = _beta_2 * biases_cache + (1 - _beta_2) * dbiases.cwisePow(2);
56 Eigen::MatrixXd weights_cache_corrected = weights_cache / (1 - std::pow(_beta_2, (
_iterations + 1)));
57 Eigen::MatrixXd biases_cache_corrected = biases_cache / (1 - std::pow(_beta_2, (
_iterations + 1)));
59 weights += ((-
_current_lr * weights_momentums_corrected).array() / (weights_cache_corrected.cwisePow(.5).array() + _epsilon)).matrix();
60 biases += ((-
_current_lr * biases_momentums_corrected).array() / (biases_cache_corrected.cwisePow(.5).array() + _epsilon)).matrix();
62 layer->weights_optimizer(weights_cache);
63 layer->biases_optimizer(biases_cache);
65 layer->weights_optimizer_additional(weights_momentums);
66 layer->biases_optimizer_additional(biases_momentums);
68 layer->weights(weights);
69 layer->biases(biases);
Adam optimizer - Adaptive Moment Estimation, one of the most popular and efficient gradient-based opt...
Definition Adam.hpp:13
Adam(double lr=1e-3, double decay=.0, double epsilon=1e-7, double beta_1=.9, double beta_2=.999)
Construct a new Adam object.
Definition Adam.hpp:24
void update_params(std::shared_ptr< Dense > &layer)
Update the parameters of the layer.
Definition Adam.hpp:34
Base class for all optimizers.
Definition Optimizer.hpp:15
int _iterations
Definition Optimizer.hpp:79
double _current_lr
Definition Optimizer.hpp:78
Definition Activation.hpp:6