34 Eigen::MatrixXd weights = layer->weights();
35 Eigen::MatrixXd biases = layer->biases();
36 Eigen::MatrixXd dweights = layer->dweights();
37 Eigen::MatrixXd dbiases = layer->dbiases();
39 Eigen::MatrixXd weights_cache = layer->weights_optimizer();
40 Eigen::MatrixXd biases_cache = layer->biases_optimizer();
42 weights_cache += dweights.cwisePow(2);
43 biases_cache += dbiases.cwisePow(2);
45 weights += (-
_current_lr * dweights.array() / (weights_cache.array().sqrt() + _epsilon)).matrix();
46 biases += (-
_current_lr * dbiases.array() / (biases_cache.array().sqrt() + _epsilon)).matrix();
48 layer->weights_optimizer(weights_cache);
49 layer->biases_optimizer(biases_cache);
51 layer->weights(weights);
52 layer->biases(biases);
Adagrad optimizer (Adaptive Gradient)
Definition Adagrad.hpp:14
Adagrad(double lr, double decay=0.0, double epsilon=1e-7)
Construct a new Adagrad object.
Definition Adagrad.hpp:23
void update_params(std::shared_ptr< Dense > &layer)
Update the parameters of the layer.
Definition Adagrad.hpp:32
Base class for all optimizers.
Definition Optimizer.hpp:15
double _current_lr
Definition Optimizer.hpp:78
Definition Activation.hpp:6