NNFS
Neural network library from scratch
Loading...
Searching...
No Matches
Adam.hpp
Go to the documentation of this file.
1#pragma once
2
3#include "Optimizer.hpp"
4
5namespace NNFS
6{
12 class Adam : public Optimizer
13 {
14 public:
24 Adam(double lr = 1e-3, double decay = .0, double epsilon = 1e-7, double beta_1 = .9, double beta_2 = .999) : Optimizer(lr, decay),
25 _epsilon(epsilon),
26 _beta_1(beta_1),
27 _beta_2(beta_2) {}
28
34 void update_params(std::shared_ptr<Dense> &layer)
35 {
36 Eigen::MatrixXd weights = layer->weights();
37 Eigen::MatrixXd biases = layer->biases();
38 Eigen::MatrixXd dweights = layer->dweights();
39 Eigen::MatrixXd dbiases = layer->dbiases();
40
41 Eigen::MatrixXd weights_cache = layer->weights_optimizer();
42 Eigen::MatrixXd biases_cache = layer->biases_optimizer();
43
44 Eigen::MatrixXd weights_momentums = layer->weights_optimizer_additional();
45 Eigen::MatrixXd biases_momentums = layer->biases_optimizer_additional();
46
47 weights_momentums = _beta_1 * weights_momentums + (1 - _beta_1) * dweights;
48 biases_momentums = _beta_1 * biases_momentums + (1 - _beta_1) * dbiases;
49
50 Eigen::MatrixXd weights_momentums_corrected = weights_momentums / (1 - std::pow(_beta_1, (_iterations + 1)));
51 Eigen::MatrixXd biases_momentums_corrected = biases_momentums / (1 - std::pow(_beta_1, (_iterations + 1)));
52
53 weights_cache = _beta_2 * weights_cache + (1 - _beta_2) * dweights.cwisePow(2);
54 biases_cache = _beta_2 * biases_cache + (1 - _beta_2) * dbiases.cwisePow(2);
55
56 Eigen::MatrixXd weights_cache_corrected = weights_cache / (1 - std::pow(_beta_2, (_iterations + 1)));
57 Eigen::MatrixXd biases_cache_corrected = biases_cache / (1 - std::pow(_beta_2, (_iterations + 1)));
58
59 weights += ((-_current_lr * weights_momentums_corrected).array() / (weights_cache_corrected.cwisePow(.5).array() + _epsilon)).matrix();
60 biases += ((-_current_lr * biases_momentums_corrected).array() / (biases_cache_corrected.cwisePow(.5).array() + _epsilon)).matrix();
61
62 layer->weights_optimizer(weights_cache);
63 layer->biases_optimizer(biases_cache);
64
65 layer->weights_optimizer_additional(weights_momentums);
66 layer->biases_optimizer_additional(biases_momentums);
67
68 layer->weights(weights);
69 layer->biases(biases);
70 }
71
72 private:
73 double _epsilon; // Epsilon value to avoid division by zero
74 double _beta_1; // Exponential decay rate for the first moment estimates
75 double _beta_2; // Exponential decay rate for the second moment estimates
76 };
77} // namespace NNFS
Adam optimizer - Adaptive Moment Estimation, one of the most popular and efficient gradient-based opt...
Definition Adam.hpp:13
Adam(double lr=1e-3, double decay=.0, double epsilon=1e-7, double beta_1=.9, double beta_2=.999)
Construct a new Adam object.
Definition Adam.hpp:24
void update_params(std::shared_ptr< Dense > &layer)
Update the parameters of the layer.
Definition Adam.hpp:34
Base class for all optimizers.
Definition Optimizer.hpp:15
int _iterations
Definition Optimizer.hpp:79
double _current_lr
Definition Optimizer.hpp:78
Definition Activation.hpp:6