10#include "../Layer/Layer.hpp"
11#include "../Layer/Dense.hpp"
13#include "../Activation/Activation.hpp"
14#include "../Activation/ReLU.hpp"
15#include "../Activation/Sigmoid.hpp"
16#include "../Activation/Tanh.hpp"
17#include "../Activation/Softmax.hpp"
19#include "../Loss/Loss.hpp"
20#include "../Loss/CCE_Softmax.hpp"
22#include "../Metrics/Metrics.hpp"
24#include "../Optimizer/Optimizer.hpp"
46 std::shared_ptr<Loss> loss =
nullptr, std::shared_ptr<Optimizer> optimizer =
nullptr)
48 optimizer_object(optimizer),
64 void fit(
const Eigen::MatrixXd &examples,
65 const Eigen::MatrixXd &labels,
66 const Eigen::MatrixXd &test_examples,
67 const Eigen::MatrixXd &test_labels,
70 bool verbose =
true)
override
73 if (loss_object ==
nullptr || optimizer_object ==
nullptr)
75 LOG_ERROR(
"Training is not possible for this neural network object as the loss and optimizer have not been specified.");
81 LOG_ERROR(
"Please compile the neural network object before attempting to train it.");
85 if (examples.cols() != input_dim || test_examples.cols() != input_dim)
87 LOG_ERROR(
"The number of columns in the examples matrix must match the input dimension of the neural network.");
91 if (labels.cols() != output_dim || test_labels.cols() != output_dim)
93 LOG_ERROR(
"The number of columns in the labels matrix must match the output dimension of the neural network.");
97 int num_examples = examples.rows();
98 int num_batches = num_examples / batch_size;
99 int batches_num_length = std::to_string(num_batches).length();
101 for (
int epoch = 1; epoch <= epochs; ++epoch)
103 std::cout <<
"Epoch " << epoch <<
"/" << epochs << std::endl;
105 double total_loss = 0;
106 double total_data_loss = 0;
107 double total_reg_loss = 0;
108 int batch_time_total = 0;
110 auto time_start = std::chrono::high_resolution_clock::now();
112 for (
int i = 0; i < num_batches; ++i)
114 auto batch_time_start = std::chrono::high_resolution_clock::now();
115 double batch_loss = 0;
116 double data_loss = 0;
118 int start = i * batch_size;
119 int end = std::min(start + batch_size, num_examples);
121 Eigen::MatrixXd batch_examples = examples.middleRows(start, end - start);
122 Eigen::MatrixXd batch_labels = labels.middleRows(start, end - start);
124 forward(batch_examples);
126 loss_object->calculate(data_loss, batch_examples, batch_labels);
128 regularization_loss(reg_loss);
130 optimizer_object->pre_update_params();
131 backward(batch_examples, batch_labels);
132 optimizer_object->post_update_params();
136 batch_loss = data_loss + reg_loss;
137 total_loss += batch_loss;
139 total_data_loss += data_loss;
140 total_reg_loss += reg_loss;
142 std::cout <<
" - " << std::setw(batches_num_length) << i + 1;
143 std::cout <<
'/' << num_batches;
145 progressbar(num_batches, i, 50);
147 auto batch_time_end = std::chrono::high_resolution_clock::now();
149 auto running = std::chrono::duration_cast<std::chrono::seconds>(batch_time_end - time_start);
150 auto batch_time = std::chrono::duration_cast<std::chrono::milliseconds>(batch_time_end - batch_time_start);
152 batch_time_total += batch_time.count();
154 std::cout <<
"- " << std::setw(4) << running.count();
157 if (i + 1 != num_batches)
167 double train_accuracy = 0;
168 double test_accuracy = 0;
170 accuracy(train_accuracy, examples, labels);
171 accuracy(test_accuracy, test_examples, test_labels);
173 batch_time_total /= num_batches;
174 total_loss /= num_batches;
175 total_data_loss /= num_batches;
176 total_reg_loss /= num_batches;
178 double current_lr = optimizer_object->current_lr();
180 std::cout <<
" - " << batch_time_total <<
"ms/batch"
181 <<
" - loss: " << std::fixed << std::setprecision(3) << total_loss
182 <<
" ( data: " << total_data_loss <<
", reg: " << total_reg_loss
183 <<
" ) - train_accuracy: " << train_accuracy <<
" - test_accuracy: " << test_accuracy
184 <<
" - lr: " << current_lr
199 layers.push_back(layer);
200 num_layers = layers.size();
216 LOG_ERROR(
"Please add at least one layer to your neural network using the NNFS::add_layer method before compiling to ensure proper model functionality.");
223 for (
int i = 0; i < num_layers; i++)
225 std::shared_ptr<Layer> cur_layer = layers[i];
232 LOG_WARNING(
"Applying an activation function as the first layer of a neural network can distort or lose important information in the input data. It is recommended to use a dense layer for input processing followed by activation functions in subsequent layers to preserve the integrity of the input data.");
234 else if (prev_type == cur_type)
236 LOG_WARNING(
"Applying multiple activation functions in a row can cause issues with learning. Consider using a different layer type or adjusting your model architecture.");
241 std::shared_ptr<Dense> dense_layer =
reinterpret_cast<const std::shared_ptr<Dense> &
>(layers[i]);
246 dense_layer->shape(cur_input, cur_output);
248 if (prev_out != -1 && cur_input != prev_out)
250 LOG_ERROR(
"Shape mismatch detected in NNFS::compile(). Previous dense layer output shape (" << prev_out <<
") does not match current dense layer input shape (" << cur_input <<
").");
254 prev_out = cur_output;
258 LOG_ERROR(
"Unknown layer type detected in NNFS::compile(). Please ensure that all layers in your neural network have a valid layer type and that the NNFS library supports the specified type.");
262 prev_type = cur_type;
265 for (
int i = 0; i < num_layers; i++)
267 std::shared_ptr<Layer> cur_layer = layers[i];
272 std::shared_ptr<Dense> dense_layer =
reinterpret_cast<const std::shared_ptr<Dense> &
>(layers[i]);
277 dense_layer->shape(cur_input, cur_output);
281 input_dim = cur_input;
284 output_dim = cur_output;
299 std::ofstream ofs(path, std::ios::binary);
302 ofs.write(
reinterpret_cast<const char *
>(&num_layers),
sizeof(
int));
304 for (
int i = 0; i < num_layers; i++)
306 std::shared_ptr<Layer> layer = layers[i];
310 std::shared_ptr<Dense> dense_layer =
reinterpret_cast<const std::shared_ptr<Dense> &
>(layers[i]);
327 int type =
static_cast<int>(layer->type);
330 dense_layer->shape(n_input, n_output);
332 Eigen::MatrixXd weights = dense_layer->weights();
333 Eigen::MatrixXd biases = dense_layer->biases();
335 double l1_weight_regularizer = dense_layer->l1_weights_regularizer();
336 double l2_weight_regularizer = dense_layer->l2_weights_regularizer();
337 double l1_bias_regularizer = dense_layer->l1_biases_regularizer();
338 double l2_bias_regularizer = dense_layer->l2_biases_regularizer();
340 Eigen::MatrixXd weights_optimizer = dense_layer->weights_optimizer();
341 Eigen::MatrixXd biases_optimizer = dense_layer->biases_optimizer();
343 Eigen::MatrixXd weights_optimizer_additional = dense_layer->weights_optimizer_additional();
344 Eigen::MatrixXd biases_optimizer_additional = dense_layer->biases_optimizer_additional();
347 ofs.write(
reinterpret_cast<char *
>(&type),
sizeof(type));
348 ofs.write(
reinterpret_cast<char *
>(&n_input),
sizeof(n_input));
349 ofs.write(
reinterpret_cast<char *
>(&n_output),
sizeof(n_output));
351 ofs.write(
reinterpret_cast<char *
>(weights.data()), weights.size() *
sizeof(double));
352 ofs.write(
reinterpret_cast<char *
>(biases.data()), biases.size() *
sizeof(double));
354 ofs.write(
reinterpret_cast<char *
>(&l1_weight_regularizer),
sizeof(double));
355 ofs.write(
reinterpret_cast<char *
>(&l2_weight_regularizer),
sizeof(double));
356 ofs.write(
reinterpret_cast<char *
>(&l1_bias_regularizer),
sizeof(double));
357 ofs.write(
reinterpret_cast<char *
>(&l2_bias_regularizer),
sizeof(double));
359 ofs.write(
reinterpret_cast<char *
>(weights_optimizer.data()), weights_optimizer.size() *
sizeof(double));
360 ofs.write(
reinterpret_cast<char *
>(biases_optimizer.data()), biases_optimizer.size() *
sizeof(double));
361 ofs.write(
reinterpret_cast<char *
>(weights_optimizer_additional.data()), weights_optimizer_additional.size() *
sizeof(double));
362 ofs.write(
reinterpret_cast<char *
>(biases_optimizer_additional.data()), biases_optimizer_additional.size() *
sizeof(double));
366 std::shared_ptr<Activation> activation_layer =
reinterpret_cast<const std::shared_ptr<Activation> &
>(layers[i]);
372 int type =
static_cast<int>(layer->type);
373 int activation_type =
static_cast<int>(activation_layer->activation_type);
376 ofs.write(
reinterpret_cast<char *
>(&type),
sizeof(type));
377 ofs.write(
reinterpret_cast<char *
>(&activation_type),
sizeof(activation_type));
381 LOG_ERROR(
"Unknown layer type detected in NNFS::save(). Please ensure that all layers in your neural network have a valid layer type and that the NNFS library supports the specified type.");
383 std::remove(path.c_str());
397 std::ifstream ifs(path, std::ios::binary);
402 LOG_ERROR(
"File does not exist in NNFS::load(). Please ensure that the specified file exists.");
410 ifs.read(
reinterpret_cast<char *
>(&num_layers),
sizeof(
int));
413 for (
int i = 0; i < num_layers; i++)
417 ifs.read(
reinterpret_cast<char *
>(&type),
sizeof(type));
424 ifs.read(
reinterpret_cast<char *
>(&n_input),
sizeof(n_input));
425 ifs.read(
reinterpret_cast<char *
>(&n_output),
sizeof(n_output));
428 Eigen::MatrixXd weights(n_input, n_output);
429 Eigen::MatrixXd biases(1, n_output);
430 ifs.read(
reinterpret_cast<char *
>(weights.data()), weights.size() *
sizeof(
double));
431 ifs.read(
reinterpret_cast<char *
>(biases.data()), biases.size() *
sizeof(
double));
434 double l1_weight_regularizer;
435 double l2_weight_regularizer;
436 double l1_bias_regularizer;
437 double l2_bias_regularizer;
438 ifs.read(
reinterpret_cast<char *
>(&l1_weight_regularizer),
sizeof(
double));
439 ifs.read(
reinterpret_cast<char *
>(&l2_weight_regularizer),
sizeof(
double));
440 ifs.read(
reinterpret_cast<char *
>(&l1_bias_regularizer),
sizeof(
double));
441 ifs.read(
reinterpret_cast<char *
>(&l2_bias_regularizer),
sizeof(
double));
444 std::shared_ptr<Dense> dense_layer = std::make_shared<Dense>(n_input, n_output, l1_weight_regularizer, l2_weight_regularizer, l1_bias_regularizer, l2_bias_regularizer);
447 Eigen::MatrixXd weights_optimizer(n_input, n_output);
448 Eigen::MatrixXd biases_optimizer(1, n_output);
449 Eigen::MatrixXd weights_optimizer_additional(n_input, n_output);
450 Eigen::MatrixXd biases_optimizer_additional(1, n_output);
451 ifs.read(
reinterpret_cast<char *
>(weights_optimizer.data()), weights_optimizer.size() *
sizeof(
double));
452 ifs.read(
reinterpret_cast<char *
>(biases_optimizer.data()), biases_optimizer.size() *
sizeof(
double));
453 ifs.read(
reinterpret_cast<char *
>(weights_optimizer_additional.data()), weights_optimizer_additional.size() *
sizeof(
double));
454 ifs.read(
reinterpret_cast<char *
>(biases_optimizer_additional.data()), biases_optimizer_additional.size() *
sizeof(
double));
457 weights.resize(n_input, n_output);
458 biases.resize(1, n_output);
459 weights_optimizer.resize(n_input, n_output);
460 biases_optimizer.resize(1, n_output);
461 weights_optimizer_additional.resize(n_input, n_output);
462 biases_optimizer_additional.resize(1, n_output);
465 dense_layer->weights(weights);
466 dense_layer->biases(biases);
467 dense_layer->weights_optimizer(weights_optimizer);
468 dense_layer->biases_optimizer(biases_optimizer);
469 dense_layer->weights_optimizer_additional(weights_optimizer_additional);
470 dense_layer->biases_optimizer_additional(biases_optimizer_additional);
473 layers.push_back(dense_layer);
479 ifs.read(
reinterpret_cast<char *
>(&activation_type),
sizeof(activation_type));
483 switch (activation_type)
504 LOG_ERROR(
"Unknown activation type detected in NNFS::load(). Please ensure that all layers in your neural network have a valid activation type and that the NNFS library supports the specified type.");
510 std::shared_ptr<Activation> activation_layer;
514 activation_layer = std::make_shared<Sigmoid>();
517 activation_layer = std::make_shared<Tanh>();
520 activation_layer = std::make_shared<Softmax>();
523 activation_layer = std::make_shared<ReLU>();
528 layers.push_back(activation_layer);
532 LOG_ERROR(
"Unknown layer type detected in NNFS::load(). Please ensure that all layers in your neural network have a valid layer type and that the NNFS library supports the specified type.");
552 void accuracy(
double &
accuracy,
const Eigen::MatrixXd &examples,
const Eigen::MatrixXd &labels)
554 if (examples.cols() != input_dim || labels.cols() != output_dim)
556 LOG_ERROR(
"Input and output dimensions of the neural network do not match the dimensions of the provided samples and labels.");
560 Eigen::MatrixXd predictions = examples;
561 forward(predictions);
572 Eigen::MatrixXd
predict(
const Eigen::MatrixXd &sample)
574 if (sample.cols() != input_dim)
576 LOG_ERROR(
"Input dimension of the neural network does not match the dimension of the provided sample.");
577 return Eigen::MatrixXd::Zero(sample.rows(), sample.cols());
580 Eigen::MatrixXd prediction = sample;
585 Eigen::MatrixXd prediction_softmax = prediction;
586 softmax.
forward(prediction_softmax, prediction);
587 return prediction_softmax;
599 void forward(Eigen::MatrixXd &x)
override
601 for (
int i = 0; i < num_layers; i++)
603 layers[i]->forward(x, x);
615 void backward(Eigen::MatrixXd &predicted,
const Eigen::MatrixXd &labels)
override
618 loss_object->backward(dx, predicted, labels);
620 for (
int i = num_layers - 1; i >= 0; --i)
622 layers[i]->backward(dx, dx);
625 for (
int i = 0; i < num_layers; i++)
629 std::shared_ptr<Dense> _dense_layer =
reinterpret_cast<const std::shared_ptr<Dense> &
>(layers[i]);
631 optimizer_object->update_params(_dense_layer);
641 void regularization_loss(
double &loss)
643 for (
int i = 0; i < num_layers; i++)
647 std::shared_ptr<Dense> _dense_layer =
reinterpret_cast<const std::shared_ptr<Dense> &
>(layers[i]);
648 loss += loss_object->regularization_loss(_dense_layer);
660 void progressbar(
int total,
int current,
int length)
662 double progress = (current + 1) / (
double)total;
663 int pos = length * progress;
666 for (
int current = 0; current < length; ++current)
672 else if (current == pos)
682 std::cout <<
"] " << std::setw(3) << int(progress * 100);
686 std::vector<std::shared_ptr<Layer>> layers;
687 std::shared_ptr<Loss> loss_object;
688 std::shared_ptr<Optimizer> optimizer_object;
692 bool compiled =
false;
static void accuracy(double &accuracy, const Eigen::MatrixXd &predicted, const Eigen::MatrixXd &labels)
Calculates the accuracy of the model.
Definition Metrics.hpp:23
Abstract base class for the model in a neural network.
Definition Model.hpp:19
A neural network model.
Definition NeuralNetwork.hpp:37
void compile()
Compiles the neural network model.
Definition NeuralNetwork.hpp:208
Eigen::MatrixXd predict(const Eigen::MatrixXd &sample)
Predicts the class of the provided sample(s).
Definition NeuralNetwork.hpp:572
void add_layer(std::shared_ptr< Layer > layer)
Adds a layer to the neural network model.
Definition NeuralNetwork.hpp:197
void accuracy(double &accuracy, const Eigen::MatrixXd &examples, const Eigen::MatrixXd &labels)
Calculates the accuracy of the neural network on the provided examples and labels.
Definition NeuralNetwork.hpp:552
void load(std::string path)
Loads a model from a file in a custom binary format. The model must have been saved using the NNFS::s...
Definition NeuralNetwork.hpp:394
void save(std::string path)
Saves the model to a file in a custom binary format. The model can be loaded using the NNFS::load met...
Definition NeuralNetwork.hpp:296
NeuralNetwork(std::shared_ptr< Loss > loss=nullptr, std::shared_ptr< Optimizer > optimizer=nullptr)
Constructor for NeuralNetwork.
Definition NeuralNetwork.hpp:45
void fit(const Eigen::MatrixXd &examples, const Eigen::MatrixXd &labels, const Eigen::MatrixXd &test_examples, const Eigen::MatrixXd &test_labels, int epochs, int batch_size, bool verbose=true) override
Fit the neural network model to the given data.
Definition NeuralNetwork.hpp:64
Softmax activation function.
Definition Softmax.hpp:13
void forward(Eigen::MatrixXd &out, const Eigen::MatrixXd &x) override
Forward pass of the softmax activation function.
Definition Softmax.hpp:29
#define LOG_ERROR
Definition clue.hpp:138
#define LOG_WARNING
Definition clue.hpp:139
Definition Activation.hpp:6
LayerType
Enum class for layer types.
Definition Layer.hpp:12
ActivationType
Enum class for activation types.
Definition Activation.hpp:11