NNFS
Neural network library from scratch
Loading...
Searching...
No Matches
NeuralNetwork.hpp
Go to the documentation of this file.
1#pragma once
2
3#include <iostream>
4#include <fstream>
5#include <tuple>
6#include <vector>
7#include <chrono>
8
9#include "Model.hpp"
10#include "../Layer/Layer.hpp"
11#include "../Layer/Dense.hpp"
12
13#include "../Activation/Activation.hpp"
14#include "../Activation/ReLU.hpp"
15#include "../Activation/Sigmoid.hpp"
16#include "../Activation/Tanh.hpp"
17#include "../Activation/Softmax.hpp"
18
19#include "../Loss/Loss.hpp"
20#include "../Loss/CCE_Softmax.hpp"
21
22#include "../Metrics/Metrics.hpp"
23
24#include "../Optimizer/Optimizer.hpp"
25
26namespace NNFS
27{
28
36 class NeuralNetwork : public Model
37 {
38 public:
46 std::shared_ptr<Loss> loss = nullptr, std::shared_ptr<Optimizer> optimizer = nullptr)
47 : loss_object(loss),
48 optimizer_object(optimizer),
49 num_layers(0) {}
50
64 void fit(const Eigen::MatrixXd &examples,
65 const Eigen::MatrixXd &labels,
66 const Eigen::MatrixXd &test_examples,
67 const Eigen::MatrixXd &test_labels,
68 int epochs,
69 int batch_size,
70 bool verbose = true) override
71 {
72
73 if (loss_object == nullptr || optimizer_object == nullptr)
74 {
75 LOG_ERROR("Training is not possible for this neural network object as the loss and optimizer have not been specified.");
76 return;
77 }
78
79 if (!compiled)
80 {
81 LOG_ERROR("Please compile the neural network object before attempting to train it.");
82 return;
83 }
84
85 if (examples.cols() != input_dim || test_examples.cols() != input_dim)
86 {
87 LOG_ERROR("The number of columns in the examples matrix must match the input dimension of the neural network.");
88 return;
89 }
90
91 if (labels.cols() != output_dim || test_labels.cols() != output_dim)
92 {
93 LOG_ERROR("The number of columns in the labels matrix must match the output dimension of the neural network.");
94 return;
95 }
96
97 int num_examples = examples.rows();
98 int num_batches = num_examples / batch_size;
99 int batches_num_length = std::to_string(num_batches).length();
100
101 for (int epoch = 1; epoch <= epochs; ++epoch)
102 {
103 std::cout << "Epoch " << epoch << "/" << epochs << std::endl;
104
105 double total_loss = 0;
106 double total_data_loss = 0;
107 double total_reg_loss = 0;
108 int batch_time_total = 0;
109
110 auto time_start = std::chrono::high_resolution_clock::now();
111
112 for (int i = 0; i < num_batches; ++i)
113 {
114 auto batch_time_start = std::chrono::high_resolution_clock::now();
115 double batch_loss = 0;
116 double data_loss = 0;
117 double reg_loss = 0;
118 int start = i * batch_size;
119 int end = std::min(start + batch_size, num_examples);
120
121 Eigen::MatrixXd batch_examples = examples.middleRows(start, end - start);
122 Eigen::MatrixXd batch_labels = labels.middleRows(start, end - start);
123
124 forward(batch_examples);
125
126 loss_object->calculate(data_loss, batch_examples, batch_labels);
127
128 regularization_loss(reg_loss);
129
130 optimizer_object->pre_update_params();
131 backward(batch_examples, batch_labels);
132 optimizer_object->post_update_params();
133
134 if (verbose)
135 {
136 batch_loss = data_loss + reg_loss;
137 total_loss += batch_loss;
138
139 total_data_loss += data_loss;
140 total_reg_loss += reg_loss;
141
142 std::cout << " - " << std::setw(batches_num_length) << i + 1;
143 std::cout << '/' << num_batches;
144
145 progressbar(num_batches, i, 50);
146
147 auto batch_time_end = std::chrono::high_resolution_clock::now();
148
149 auto running = std::chrono::duration_cast<std::chrono::seconds>(batch_time_end - time_start);
150 auto batch_time = std::chrono::duration_cast<std::chrono::milliseconds>(batch_time_end - batch_time_start);
151
152 batch_time_total += batch_time.count();
153
154 std::cout << "- " << std::setw(4) << running.count();
155 std::cout << "s";
156
157 if (i + 1 != num_batches)
158 {
159 std::cout << "\r";
160 std::cout.flush();
161 }
162 }
163 }
164
165 if (verbose)
166 {
167 double train_accuracy = 0;
168 double test_accuracy = 0;
169
170 accuracy(train_accuracy, examples, labels);
171 accuracy(test_accuracy, test_examples, test_labels);
172
173 batch_time_total /= num_batches;
174 total_loss /= num_batches;
175 total_data_loss /= num_batches;
176 total_reg_loss /= num_batches;
177
178 double current_lr = optimizer_object->current_lr();
179
180 std::cout << " - " << batch_time_total << "ms/batch"
181 << " - loss: " << std::fixed << std::setprecision(3) << total_loss
182 << " ( data: " << total_data_loss << ", reg: " << total_reg_loss
183 << " ) - train_accuracy: " << train_accuracy << " - test_accuracy: " << test_accuracy
184 << " - lr: " << current_lr
185 << std::endl;
186 }
187 }
188 }
189
197 void add_layer(std::shared_ptr<Layer> layer)
198 {
199 layers.push_back(layer);
200 num_layers = layers.size();
201 }
202
208 void compile()
209 {
210 compiled = false;
211 input_dim = -1;
212 output_dim = -1;
213
214 if (num_layers == 0)
215 {
216 LOG_ERROR("Please add at least one layer to your neural network using the NNFS::add_layer method before compiling to ensure proper model functionality.");
217 return;
218 }
219
220 LayerType prev_type;
221 int prev_out = -1;
222
223 for (int i = 0; i < num_layers; i++)
224 {
225 std::shared_ptr<Layer> cur_layer = layers[i];
226 LayerType cur_type = cur_layer->type;
227
228 if (cur_type == LayerType::ACTIVATION)
229 {
230 if (i == 0)
231 {
232 LOG_WARNING("Applying an activation function as the first layer of a neural network can distort or lose important information in the input data. It is recommended to use a dense layer for input processing followed by activation functions in subsequent layers to preserve the integrity of the input data.");
233 }
234 else if (prev_type == cur_type)
235 {
236 LOG_WARNING("Applying multiple activation functions in a row can cause issues with learning. Consider using a different layer type or adjusting your model architecture.");
237 }
238 }
239 else if (cur_type == LayerType::DENSE)
240 {
241 std::shared_ptr<Dense> dense_layer = reinterpret_cast<const std::shared_ptr<Dense> &>(layers[i]);
242
243 int cur_input;
244 int cur_output;
245
246 dense_layer->shape(cur_input, cur_output);
247
248 if (prev_out != -1 && cur_input != prev_out)
249 {
250 LOG_ERROR("Shape mismatch detected in NNFS::compile(). Previous dense layer output shape (" << prev_out << ") does not match current dense layer input shape (" << cur_input << ").");
251 return;
252 }
253
254 prev_out = cur_output;
255 }
256 else
257 {
258 LOG_ERROR("Unknown layer type detected in NNFS::compile(). Please ensure that all layers in your neural network have a valid layer type and that the NNFS library supports the specified type.");
259 return;
260 }
261
262 prev_type = cur_type;
263 }
264
265 for (int i = 0; i < num_layers; i++)
266 {
267 std::shared_ptr<Layer> cur_layer = layers[i];
268 LayerType cur_type = cur_layer->type;
269
270 if (cur_type == LayerType::DENSE)
271 {
272 std::shared_ptr<Dense> dense_layer = reinterpret_cast<const std::shared_ptr<Dense> &>(layers[i]);
273
274 int cur_input;
275 int cur_output;
276
277 dense_layer->shape(cur_input, cur_output);
278
279 if (input_dim == -1)
280 {
281 input_dim = cur_input;
282 }
283
284 output_dim = cur_output;
285 }
286 }
287
288 compiled = true;
289 }
290
296 void save(std::string path)
297 {
298 // Create ofstream object
299 std::ofstream ofs(path, std::ios::binary);
300
301 // Write number of layers
302 ofs.write(reinterpret_cast<const char *>(&num_layers), sizeof(int));
303
304 for (int i = 0; i < num_layers; i++)
305 {
306 std::shared_ptr<Layer> layer = layers[i];
307
308 if (layer->type == LayerType::DENSE)
309 {
310 std::shared_ptr<Dense> dense_layer = reinterpret_cast<const std::shared_ptr<Dense> &>(layers[i]);
311
312 // Schema for dense layer
313 // - int type
314 // - int n_input
315 // - int n_output
316 // - Eigen::MatrixXd weights
317 // - Eigen::MatrixXd biases
318 // - double l1_weight_regularizer
319 // - double l2_weight_regularizer
320 // - double l1_bias_regularizer
321 // - double l2_bias_regularizer
322 // - Eigen::MatrixXd weights_optimizer
323 // - Eigen::MatrixXd biases_optimizer
324 // - Eigen::MatrixXd weights_optimizer_additional
325 // - Eigen::MatrixXd biases_optimizer_additional
326
327 int type = static_cast<int>(layer->type);
328 int n_input;
329 int n_output;
330 dense_layer->shape(n_input, n_output);
331
332 Eigen::MatrixXd weights = dense_layer->weights();
333 Eigen::MatrixXd biases = dense_layer->biases();
334
335 double l1_weight_regularizer = dense_layer->l1_weights_regularizer();
336 double l2_weight_regularizer = dense_layer->l2_weights_regularizer();
337 double l1_bias_regularizer = dense_layer->l1_biases_regularizer();
338 double l2_bias_regularizer = dense_layer->l2_biases_regularizer();
339
340 Eigen::MatrixXd weights_optimizer = dense_layer->weights_optimizer();
341 Eigen::MatrixXd biases_optimizer = dense_layer->biases_optimizer();
342
343 Eigen::MatrixXd weights_optimizer_additional = dense_layer->weights_optimizer_additional();
344 Eigen::MatrixXd biases_optimizer_additional = dense_layer->biases_optimizer_additional();
345
346 // Write schema to file
347 ofs.write(reinterpret_cast<char *>(&type), sizeof(type));
348 ofs.write(reinterpret_cast<char *>(&n_input), sizeof(n_input));
349 ofs.write(reinterpret_cast<char *>(&n_output), sizeof(n_output));
350
351 ofs.write(reinterpret_cast<char *>(weights.data()), weights.size() * sizeof(double));
352 ofs.write(reinterpret_cast<char *>(biases.data()), biases.size() * sizeof(double));
353
354 ofs.write(reinterpret_cast<char *>(&l1_weight_regularizer), sizeof(double));
355 ofs.write(reinterpret_cast<char *>(&l2_weight_regularizer), sizeof(double));
356 ofs.write(reinterpret_cast<char *>(&l1_bias_regularizer), sizeof(double));
357 ofs.write(reinterpret_cast<char *>(&l2_bias_regularizer), sizeof(double));
358
359 ofs.write(reinterpret_cast<char *>(weights_optimizer.data()), weights_optimizer.size() * sizeof(double));
360 ofs.write(reinterpret_cast<char *>(biases_optimizer.data()), biases_optimizer.size() * sizeof(double));
361 ofs.write(reinterpret_cast<char *>(weights_optimizer_additional.data()), weights_optimizer_additional.size() * sizeof(double));
362 ofs.write(reinterpret_cast<char *>(biases_optimizer_additional.data()), biases_optimizer_additional.size() * sizeof(double));
363 }
364 else if (layer->type == LayerType::ACTIVATION)
365 {
366 std::shared_ptr<Activation> activation_layer = reinterpret_cast<const std::shared_ptr<Activation> &>(layers[i]);
367
368 // Schema for activation layer
369 // - int type
370 // - int activation_type
371
372 int type = static_cast<int>(layer->type);
373 int activation_type = static_cast<int>(activation_layer->activation_type);
374
375 // Write schema to file
376 ofs.write(reinterpret_cast<char *>(&type), sizeof(type));
377 ofs.write(reinterpret_cast<char *>(&activation_type), sizeof(activation_type));
378 }
379 else
380 {
381 LOG_ERROR("Unknown layer type detected in NNFS::save(). Please ensure that all layers in your neural network have a valid layer type and that the NNFS library supports the specified type.");
382 ofs.close();
383 std::remove(path.c_str());
384 return;
385 }
386 }
387 }
388
394 void load(std::string path)
395 {
396 // Create ifstream object
397 std::ifstream ifs(path, std::ios::binary);
398
399 // Check if file exists
400 if (!ifs.good())
401 {
402 LOG_ERROR("File does not exist in NNFS::load(). Please ensure that the specified file exists.");
403 return;
404 }
405
406 // Clear layers
407 layers.clear();
408
409 // Read number of layers
410 ifs.read(reinterpret_cast<char *>(&num_layers), sizeof(int));
411
412 // Read layers
413 for (int i = 0; i < num_layers; i++)
414 {
415 // Read layer type
416 int type;
417 ifs.read(reinterpret_cast<char *>(&type), sizeof(type));
418
419 if (type == static_cast<int>(LayerType::DENSE))
420 {
421 // Read layer shape
422 int n_input;
423 int n_output;
424 ifs.read(reinterpret_cast<char *>(&n_input), sizeof(n_input));
425 ifs.read(reinterpret_cast<char *>(&n_output), sizeof(n_output));
426
427 // Read weights and biases
428 Eigen::MatrixXd weights(n_input, n_output);
429 Eigen::MatrixXd biases(1, n_output);
430 ifs.read(reinterpret_cast<char *>(weights.data()), weights.size() * sizeof(double));
431 ifs.read(reinterpret_cast<char *>(biases.data()), biases.size() * sizeof(double));
432
433 // Read regularizers
434 double l1_weight_regularizer;
435 double l2_weight_regularizer;
436 double l1_bias_regularizer;
437 double l2_bias_regularizer;
438 ifs.read(reinterpret_cast<char *>(&l1_weight_regularizer), sizeof(double));
439 ifs.read(reinterpret_cast<char *>(&l2_weight_regularizer), sizeof(double));
440 ifs.read(reinterpret_cast<char *>(&l1_bias_regularizer), sizeof(double));
441 ifs.read(reinterpret_cast<char *>(&l2_bias_regularizer), sizeof(double));
442
443 // Create dense layer
444 std::shared_ptr<Dense> dense_layer = std::make_shared<Dense>(n_input, n_output, l1_weight_regularizer, l2_weight_regularizer, l1_bias_regularizer, l2_bias_regularizer);
445
446 // Read optimizers
447 Eigen::MatrixXd weights_optimizer(n_input, n_output);
448 Eigen::MatrixXd biases_optimizer(1, n_output);
449 Eigen::MatrixXd weights_optimizer_additional(n_input, n_output);
450 Eigen::MatrixXd biases_optimizer_additional(1, n_output);
451 ifs.read(reinterpret_cast<char *>(weights_optimizer.data()), weights_optimizer.size() * sizeof(double));
452 ifs.read(reinterpret_cast<char *>(biases_optimizer.data()), biases_optimizer.size() * sizeof(double));
453 ifs.read(reinterpret_cast<char *>(weights_optimizer_additional.data()), weights_optimizer_additional.size() * sizeof(double));
454 ifs.read(reinterpret_cast<char *>(biases_optimizer_additional.data()), biases_optimizer_additional.size() * sizeof(double));
455
456 // Resize all matrices
457 weights.resize(n_input, n_output);
458 biases.resize(1, n_output);
459 weights_optimizer.resize(n_input, n_output);
460 biases_optimizer.resize(1, n_output);
461 weights_optimizer_additional.resize(n_input, n_output);
462 biases_optimizer_additional.resize(1, n_output);
463
464 // Set weights and biases
465 dense_layer->weights(weights);
466 dense_layer->biases(biases);
467 dense_layer->weights_optimizer(weights_optimizer);
468 dense_layer->biases_optimizer(biases_optimizer);
469 dense_layer->weights_optimizer_additional(weights_optimizer_additional);
470 dense_layer->biases_optimizer_additional(biases_optimizer_additional);
471
472 // Add dense layer to layers
473 layers.push_back(dense_layer);
474 }
475 else if (type == static_cast<int>(LayerType::ACTIVATION))
476 {
477 // Read activation type
478 int activation_type;
479 ifs.read(reinterpret_cast<char *>(&activation_type), sizeof(activation_type));
480
481 // Convert activation type to enum
482 ActivationType activation;
483 switch (activation_type)
484 {
485 case 0:
486 activation = ActivationType::RELU;
487 break;
488 case 1:
489 activation = ActivationType::SIGMOID;
490 break;
491 case 2:
492 activation = ActivationType::TANH;
493 break;
494 case 3:
495 activation = ActivationType::SOFTMAX;
496 break;
497 default:
498 activation = ActivationType::NONE;
499 break;
500 }
501
502 if (activation == ActivationType::NONE)
503 {
504 LOG_ERROR("Unknown activation type detected in NNFS::load(). Please ensure that all layers in your neural network have a valid activation type and that the NNFS library supports the specified type.");
505 ifs.close();
506 return;
507 }
508
509 // Create activation layer according to activation type, e.g std::make_shared<ReLU>() or std::make_shared<Sigmoid>() etc.;
510 std::shared_ptr<Activation> activation_layer;
511 switch (activation)
512 {
514 activation_layer = std::make_shared<Sigmoid>();
515 break;
517 activation_layer = std::make_shared<Tanh>();
518 break;
520 activation_layer = std::make_shared<Softmax>();
521 break;
522 default:
523 activation_layer = std::make_shared<ReLU>();
524 break;
525 }
526
527 // Add activation layer to layers
528 layers.push_back(activation_layer);
529 }
530 else
531 {
532 LOG_ERROR("Unknown layer type detected in NNFS::load(). Please ensure that all layers in your neural network have a valid layer type and that the NNFS library supports the specified type.");
533 ifs.close();
534 return;
535 }
536 }
537
538 // If everything went well, close the file
539 ifs.close();
540
541 // Compile the neural network
542 compile();
543 }
544
552 void accuracy(double &accuracy, const Eigen::MatrixXd &examples, const Eigen::MatrixXd &labels)
553 {
554 if (examples.cols() != input_dim || labels.cols() != output_dim)
555 {
556 LOG_ERROR("Input and output dimensions of the neural network do not match the dimensions of the provided samples and labels.");
557 return;
558 }
559
560 Eigen::MatrixXd predictions = examples;
561 forward(predictions);
562 Metrics::accuracy(accuracy, predictions, labels);
563 }
564
572 Eigen::MatrixXd predict(const Eigen::MatrixXd &sample)
573 {
574 if (sample.cols() != input_dim)
575 {
576 LOG_ERROR("Input dimension of the neural network does not match the dimension of the provided sample.");
577 return Eigen::MatrixXd::Zero(sample.rows(), sample.cols());
578 }
579
580 Eigen::MatrixXd prediction = sample;
581 forward(prediction);
582
583 if (layers[layers.size() - 1]->type != LayerType::ACTIVATION) {
584 Softmax softmax = Softmax();
585 Eigen::MatrixXd prediction_softmax = prediction;
586 softmax.forward(prediction_softmax, prediction);
587 return prediction_softmax;
588 }
589
590 return prediction;
591 }
592
593 private:
599 void forward(Eigen::MatrixXd &x) override
600 {
601 for (int i = 0; i < num_layers; i++)
602 {
603 layers[i]->forward(x, x);
604 }
605 }
606
615 void backward(Eigen::MatrixXd &predicted, const Eigen::MatrixXd &labels) override
616 {
617 Eigen::MatrixXd dx;
618 loss_object->backward(dx, predicted, labels);
619
620 for (int i = num_layers - 1; i >= 0; --i)
621 {
622 layers[i]->backward(dx, dx);
623 }
624
625 for (int i = 0; i < num_layers; i++)
626 {
627 if (layers[i]->type == LayerType::DENSE)
628 {
629 std::shared_ptr<Dense> _dense_layer = reinterpret_cast<const std::shared_ptr<Dense> &>(layers[i]);
630
631 optimizer_object->update_params(_dense_layer);
632 }
633 }
634 }
635
641 void regularization_loss(double &loss)
642 {
643 for (int i = 0; i < num_layers; i++)
644 {
645 if (layers[i]->type == LayerType::DENSE)
646 {
647 std::shared_ptr<Dense> _dense_layer = reinterpret_cast<const std::shared_ptr<Dense> &>(layers[i]);
648 loss += loss_object->regularization_loss(_dense_layer);
649 }
650 }
651 }
652
660 void progressbar(int total, int current, int length)
661 {
662 double progress = (current + 1) / (double)total;
663 int pos = length * progress;
664
665 std::cout << " [";
666 for (int current = 0; current < length; ++current)
667 {
668 if (current < pos)
669 {
670 std::cout << "=";
671 }
672 else if (current == pos)
673 {
674 std::cout << ">";
675 }
676 else
677 {
678 std::cout << " ";
679 }
680 }
681
682 std::cout << "] " << std::setw(3) << int(progress * 100);
683 std::cout << "% ";
684 }
685
686 std::vector<std::shared_ptr<Layer>> layers; // Layers of the neural network
687 std::shared_ptr<Loss> loss_object; // Loss function of the neural network
688 std::shared_ptr<Optimizer> optimizer_object; // Optimizer of the neural network
689 int num_layers; // Number of layers in the neural network
690 int input_dim; // Input dimension of the neural network
691 int output_dim; // Output dimension of the neural network
692 bool compiled = false; // Indicates whether the neural network has been compiled
693 };
694
695} // namespace NNFS
static void accuracy(double &accuracy, const Eigen::MatrixXd &predicted, const Eigen::MatrixXd &labels)
Calculates the accuracy of the model.
Definition Metrics.hpp:23
Abstract base class for the model in a neural network.
Definition Model.hpp:19
A neural network model.
Definition NeuralNetwork.hpp:37
void compile()
Compiles the neural network model.
Definition NeuralNetwork.hpp:208
Eigen::MatrixXd predict(const Eigen::MatrixXd &sample)
Predicts the class of the provided sample(s).
Definition NeuralNetwork.hpp:572
void add_layer(std::shared_ptr< Layer > layer)
Adds a layer to the neural network model.
Definition NeuralNetwork.hpp:197
void accuracy(double &accuracy, const Eigen::MatrixXd &examples, const Eigen::MatrixXd &labels)
Calculates the accuracy of the neural network on the provided examples and labels.
Definition NeuralNetwork.hpp:552
void load(std::string path)
Loads a model from a file in a custom binary format. The model must have been saved using the NNFS::s...
Definition NeuralNetwork.hpp:394
void save(std::string path)
Saves the model to a file in a custom binary format. The model can be loaded using the NNFS::load met...
Definition NeuralNetwork.hpp:296
NeuralNetwork(std::shared_ptr< Loss > loss=nullptr, std::shared_ptr< Optimizer > optimizer=nullptr)
Constructor for NeuralNetwork.
Definition NeuralNetwork.hpp:45
void fit(const Eigen::MatrixXd &examples, const Eigen::MatrixXd &labels, const Eigen::MatrixXd &test_examples, const Eigen::MatrixXd &test_labels, int epochs, int batch_size, bool verbose=true) override
Fit the neural network model to the given data.
Definition NeuralNetwork.hpp:64
Softmax activation function.
Definition Softmax.hpp:13
void forward(Eigen::MatrixXd &out, const Eigen::MatrixXd &x) override
Forward pass of the softmax activation function.
Definition Softmax.hpp:29
#define LOG_ERROR
Definition clue.hpp:138
#define LOG_WARNING
Definition clue.hpp:139
Definition Activation.hpp:6
LayerType
Enum class for layer types.
Definition Layer.hpp:12
ActivationType
Enum class for activation types.
Definition Activation.hpp:11