First Attempt at implementing a Perceptron

Clash Royale CLAN TAG#URR8PPP
.everyoneloves__top-leaderboard:empty,.everyoneloves__mid-leaderboard:empty margin-bottom:0;
up vote
4
down vote
favorite
Here is an attempt at implementing the simplest Neural Network, which is an algorithm for learning a binary classifier. In this specific case, it can decide whether an input, of a pair of Cartesian coordinates, belongs to some specific class or not.
main.cpp
#include <iostream>
#include <iomanip>
#include <fstream>
#include <string>
#include <vector>
#include <time.h>
#include "Perceptron.h"
int main()
Settings s(0.1, 100, "Perceptron_test1.txt", "Perceptron_statst1.txt");
Perceptron p(s);
std::cout <<"Donen";
Perceptron.h
#ifndef PERCEPTRON_H
#define PERCEPTRON_H
struct Settings
Settings(double lr, int mi, const std::string& l, const std::string& s)
: learning_rate(lr), max_iterations(mi), load_file(l), save_file(s)
double learning_rate;
int max_iterations;
std::string load_file;
std::string save_file;
;
class Perceptron
private:
typedef int INT;
typedef double REAL;
struct Point
Point (REAL xx, REAL yy) : x(xx), y(yy)
REAL x;
REAL y;
;
struct Statistics
Statistics(REAL rmse, INT i) : RMSE(rmse), iteration(i)
REAL RMSE; /* Measure accuracy with Root Mean Sqaured Error. */
INT iteration; /* Epoch. */
;
public:
Perceptron();
Perceptron(const Settings& settings);
private:
REAL learning_rate;
INT max_iterations;
INT pattern_count;
std::vector<REAL> weights;
std::vector<Perceptron::Point> input;
std::vector<INT> desired_outputs; /* Supervised Learning. */
std::vector<Perceptron::Statistics> stats;
private:
REAL random_real();
void initialize_weights();
void load (std::istream& ifs);
INT calculate_output(const Perceptron::Point& p);
void learn();
void save (std::ostream& ofs);
;
#include "PerceptronDef.cpp"
#endif
PerceptronDef.cpp
Perceptron::Perceptron(const Settings& settings)
: learning_rate(settings.learning_rate), max_iterations(settings.max_iterations), pattern_count(0)
srand( unsigned int( time(NULL) ) );
initialize_weights();
std::ifstream ifs(settings.load_file.c_str());
if (!ifs) std::cerr <<"Can't open input file!n";
load(ifs);
ifs.close();
learn();
std::ofstream ofs(settings.save_file.c_str());
if (!ofs) std::cerr <<"Can't open output file!n";
save(ofs);
ofs.close();
Perceptron::REAL Perceptron::random_real() return (REAL)rand() / (REAL)RAND_MAX;
void Perceptron::initialize_weights()
for (size_t i = 0; i < 3; ++i) weights.push_back( random_real() );
void Perceptron::load (std::istream& ifs)
REAL xx, yy;
INT ou;
while (ifs >> xx >> yy >> ou)
input.emplace_back( Point(xx, yy) );
desired_outputs.emplace_back(ou);
pattern_count++;
/* Activation function: Heaviside step function. */
Perceptron::INT Perceptron::calculate_output(const Perceptron::Point& p)
REAL w_sum = p.x * weights[0] + p.y * weights[1] + weights[2]; /* sum_i=1^n (w_i * x_i + bias). */
return (w_sum >= 0) ? 1 : -1; /* Threshold = 0. */
void Perceptron::learn()
REAL global_error;
INT iteration = 0;
do /* Start learning. */
iteration++;
global_error = 0;
for (INT p = 0; p < pattern_count; ++p)
INT output = calculate_output(input[p]); /* Calculate the output. */
REAL local_error = desired_outputs[p] - output; /* Update the weights. */
weights[0] += learning_rate * local_error * input[p].x;
weights[1] += learning_rate * local_error * input[p].y;
weights[2] += learning_rate * local_error;
global_error += (local_error * local_error);
stats.emplace_back( Statistics(sqrt(global_error / pattern_count), iteration) );
while (global_error != 0 && iteration <= max_iterations);
std::cout <<"Decision Equation: "<< weights[0] <<"x + "<< weights[1] <<"y + "<< weights[2] <<" = 0n";
void Perceptron::save (std::ostream& ofs)
ofs <<"Decision-boundary line equation coefficientsn";
ofs << weights[0] <<" "<<weights[1] <<" "<<weights[2] <<"n";
ofs <<"Error Iterationn";
for (size_t i = 0; i < stats.size(); ++i)
ofs << std::setprecision(3) << stats[i].RMSE <<" "<< stats[i].iteration <<'n';
The input data file is in the format:
x y 0 (or 1)
Here is an example of what I get:
-0.65x + 0.13y + 1.47 = 0Done.
The above, together with the input data looks like:

The statistics (RMSE) from all the iterations are:

Questions:
- Any comments on the code structure and style are welcome.
- I'm looking for directions on how to generalize the code (to Nodes and Links, probably?), so that it can be used for Multilayer Perceptron.
- Does the result look remotely reasonable?
- What would be a good extension, what else to include in the class?
c++ neural-network
add a comment |Â
up vote
4
down vote
favorite
Here is an attempt at implementing the simplest Neural Network, which is an algorithm for learning a binary classifier. In this specific case, it can decide whether an input, of a pair of Cartesian coordinates, belongs to some specific class or not.
main.cpp
#include <iostream>
#include <iomanip>
#include <fstream>
#include <string>
#include <vector>
#include <time.h>
#include "Perceptron.h"
int main()
Settings s(0.1, 100, "Perceptron_test1.txt", "Perceptron_statst1.txt");
Perceptron p(s);
std::cout <<"Donen";
Perceptron.h
#ifndef PERCEPTRON_H
#define PERCEPTRON_H
struct Settings
Settings(double lr, int mi, const std::string& l, const std::string& s)
: learning_rate(lr), max_iterations(mi), load_file(l), save_file(s)
double learning_rate;
int max_iterations;
std::string load_file;
std::string save_file;
;
class Perceptron
private:
typedef int INT;
typedef double REAL;
struct Point
Point (REAL xx, REAL yy) : x(xx), y(yy)
REAL x;
REAL y;
;
struct Statistics
Statistics(REAL rmse, INT i) : RMSE(rmse), iteration(i)
REAL RMSE; /* Measure accuracy with Root Mean Sqaured Error. */
INT iteration; /* Epoch. */
;
public:
Perceptron();
Perceptron(const Settings& settings);
private:
REAL learning_rate;
INT max_iterations;
INT pattern_count;
std::vector<REAL> weights;
std::vector<Perceptron::Point> input;
std::vector<INT> desired_outputs; /* Supervised Learning. */
std::vector<Perceptron::Statistics> stats;
private:
REAL random_real();
void initialize_weights();
void load (std::istream& ifs);
INT calculate_output(const Perceptron::Point& p);
void learn();
void save (std::ostream& ofs);
;
#include "PerceptronDef.cpp"
#endif
PerceptronDef.cpp
Perceptron::Perceptron(const Settings& settings)
: learning_rate(settings.learning_rate), max_iterations(settings.max_iterations), pattern_count(0)
srand( unsigned int( time(NULL) ) );
initialize_weights();
std::ifstream ifs(settings.load_file.c_str());
if (!ifs) std::cerr <<"Can't open input file!n";
load(ifs);
ifs.close();
learn();
std::ofstream ofs(settings.save_file.c_str());
if (!ofs) std::cerr <<"Can't open output file!n";
save(ofs);
ofs.close();
Perceptron::REAL Perceptron::random_real() return (REAL)rand() / (REAL)RAND_MAX;
void Perceptron::initialize_weights()
for (size_t i = 0; i < 3; ++i) weights.push_back( random_real() );
void Perceptron::load (std::istream& ifs)
REAL xx, yy;
INT ou;
while (ifs >> xx >> yy >> ou)
input.emplace_back( Point(xx, yy) );
desired_outputs.emplace_back(ou);
pattern_count++;
/* Activation function: Heaviside step function. */
Perceptron::INT Perceptron::calculate_output(const Perceptron::Point& p)
REAL w_sum = p.x * weights[0] + p.y * weights[1] + weights[2]; /* sum_i=1^n (w_i * x_i + bias). */
return (w_sum >= 0) ? 1 : -1; /* Threshold = 0. */
void Perceptron::learn()
REAL global_error;
INT iteration = 0;
do /* Start learning. */
iteration++;
global_error = 0;
for (INT p = 0; p < pattern_count; ++p)
INT output = calculate_output(input[p]); /* Calculate the output. */
REAL local_error = desired_outputs[p] - output; /* Update the weights. */
weights[0] += learning_rate * local_error * input[p].x;
weights[1] += learning_rate * local_error * input[p].y;
weights[2] += learning_rate * local_error;
global_error += (local_error * local_error);
stats.emplace_back( Statistics(sqrt(global_error / pattern_count), iteration) );
while (global_error != 0 && iteration <= max_iterations);
std::cout <<"Decision Equation: "<< weights[0] <<"x + "<< weights[1] <<"y + "<< weights[2] <<" = 0n";
void Perceptron::save (std::ostream& ofs)
ofs <<"Decision-boundary line equation coefficientsn";
ofs << weights[0] <<" "<<weights[1] <<" "<<weights[2] <<"n";
ofs <<"Error Iterationn";
for (size_t i = 0; i < stats.size(); ++i)
ofs << std::setprecision(3) << stats[i].RMSE <<" "<< stats[i].iteration <<'n';
The input data file is in the format:
x y 0 (or 1)
Here is an example of what I get:
-0.65x + 0.13y + 1.47 = 0Done.
The above, together with the input data looks like:

The statistics (RMSE) from all the iterations are:

Questions:
- Any comments on the code structure and style are welcome.
- I'm looking for directions on how to generalize the code (to Nodes and Links, probably?), so that it can be used for Multilayer Perceptron.
- Does the result look remotely reasonable?
- What would be a good extension, what else to include in the class?
c++ neural-network
add a comment |Â
up vote
4
down vote
favorite
up vote
4
down vote
favorite
Here is an attempt at implementing the simplest Neural Network, which is an algorithm for learning a binary classifier. In this specific case, it can decide whether an input, of a pair of Cartesian coordinates, belongs to some specific class or not.
main.cpp
#include <iostream>
#include <iomanip>
#include <fstream>
#include <string>
#include <vector>
#include <time.h>
#include "Perceptron.h"
int main()
Settings s(0.1, 100, "Perceptron_test1.txt", "Perceptron_statst1.txt");
Perceptron p(s);
std::cout <<"Donen";
Perceptron.h
#ifndef PERCEPTRON_H
#define PERCEPTRON_H
struct Settings
Settings(double lr, int mi, const std::string& l, const std::string& s)
: learning_rate(lr), max_iterations(mi), load_file(l), save_file(s)
double learning_rate;
int max_iterations;
std::string load_file;
std::string save_file;
;
class Perceptron
private:
typedef int INT;
typedef double REAL;
struct Point
Point (REAL xx, REAL yy) : x(xx), y(yy)
REAL x;
REAL y;
;
struct Statistics
Statistics(REAL rmse, INT i) : RMSE(rmse), iteration(i)
REAL RMSE; /* Measure accuracy with Root Mean Sqaured Error. */
INT iteration; /* Epoch. */
;
public:
Perceptron();
Perceptron(const Settings& settings);
private:
REAL learning_rate;
INT max_iterations;
INT pattern_count;
std::vector<REAL> weights;
std::vector<Perceptron::Point> input;
std::vector<INT> desired_outputs; /* Supervised Learning. */
std::vector<Perceptron::Statistics> stats;
private:
REAL random_real();
void initialize_weights();
void load (std::istream& ifs);
INT calculate_output(const Perceptron::Point& p);
void learn();
void save (std::ostream& ofs);
;
#include "PerceptronDef.cpp"
#endif
PerceptronDef.cpp
Perceptron::Perceptron(const Settings& settings)
: learning_rate(settings.learning_rate), max_iterations(settings.max_iterations), pattern_count(0)
srand( unsigned int( time(NULL) ) );
initialize_weights();
std::ifstream ifs(settings.load_file.c_str());
if (!ifs) std::cerr <<"Can't open input file!n";
load(ifs);
ifs.close();
learn();
std::ofstream ofs(settings.save_file.c_str());
if (!ofs) std::cerr <<"Can't open output file!n";
save(ofs);
ofs.close();
Perceptron::REAL Perceptron::random_real() return (REAL)rand() / (REAL)RAND_MAX;
void Perceptron::initialize_weights()
for (size_t i = 0; i < 3; ++i) weights.push_back( random_real() );
void Perceptron::load (std::istream& ifs)
REAL xx, yy;
INT ou;
while (ifs >> xx >> yy >> ou)
input.emplace_back( Point(xx, yy) );
desired_outputs.emplace_back(ou);
pattern_count++;
/* Activation function: Heaviside step function. */
Perceptron::INT Perceptron::calculate_output(const Perceptron::Point& p)
REAL w_sum = p.x * weights[0] + p.y * weights[1] + weights[2]; /* sum_i=1^n (w_i * x_i + bias). */
return (w_sum >= 0) ? 1 : -1; /* Threshold = 0. */
void Perceptron::learn()
REAL global_error;
INT iteration = 0;
do /* Start learning. */
iteration++;
global_error = 0;
for (INT p = 0; p < pattern_count; ++p)
INT output = calculate_output(input[p]); /* Calculate the output. */
REAL local_error = desired_outputs[p] - output; /* Update the weights. */
weights[0] += learning_rate * local_error * input[p].x;
weights[1] += learning_rate * local_error * input[p].y;
weights[2] += learning_rate * local_error;
global_error += (local_error * local_error);
stats.emplace_back( Statistics(sqrt(global_error / pattern_count), iteration) );
while (global_error != 0 && iteration <= max_iterations);
std::cout <<"Decision Equation: "<< weights[0] <<"x + "<< weights[1] <<"y + "<< weights[2] <<" = 0n";
void Perceptron::save (std::ostream& ofs)
ofs <<"Decision-boundary line equation coefficientsn";
ofs << weights[0] <<" "<<weights[1] <<" "<<weights[2] <<"n";
ofs <<"Error Iterationn";
for (size_t i = 0; i < stats.size(); ++i)
ofs << std::setprecision(3) << stats[i].RMSE <<" "<< stats[i].iteration <<'n';
The input data file is in the format:
x y 0 (or 1)
Here is an example of what I get:
-0.65x + 0.13y + 1.47 = 0Done.
The above, together with the input data looks like:

The statistics (RMSE) from all the iterations are:

Questions:
- Any comments on the code structure and style are welcome.
- I'm looking for directions on how to generalize the code (to Nodes and Links, probably?), so that it can be used for Multilayer Perceptron.
- Does the result look remotely reasonable?
- What would be a good extension, what else to include in the class?
c++ neural-network
Here is an attempt at implementing the simplest Neural Network, which is an algorithm for learning a binary classifier. In this specific case, it can decide whether an input, of a pair of Cartesian coordinates, belongs to some specific class or not.
main.cpp
#include <iostream>
#include <iomanip>
#include <fstream>
#include <string>
#include <vector>
#include <time.h>
#include "Perceptron.h"
int main()
Settings s(0.1, 100, "Perceptron_test1.txt", "Perceptron_statst1.txt");
Perceptron p(s);
std::cout <<"Donen";
Perceptron.h
#ifndef PERCEPTRON_H
#define PERCEPTRON_H
struct Settings
Settings(double lr, int mi, const std::string& l, const std::string& s)
: learning_rate(lr), max_iterations(mi), load_file(l), save_file(s)
double learning_rate;
int max_iterations;
std::string load_file;
std::string save_file;
;
class Perceptron
private:
typedef int INT;
typedef double REAL;
struct Point
Point (REAL xx, REAL yy) : x(xx), y(yy)
REAL x;
REAL y;
;
struct Statistics
Statistics(REAL rmse, INT i) : RMSE(rmse), iteration(i)
REAL RMSE; /* Measure accuracy with Root Mean Sqaured Error. */
INT iteration; /* Epoch. */
;
public:
Perceptron();
Perceptron(const Settings& settings);
private:
REAL learning_rate;
INT max_iterations;
INT pattern_count;
std::vector<REAL> weights;
std::vector<Perceptron::Point> input;
std::vector<INT> desired_outputs; /* Supervised Learning. */
std::vector<Perceptron::Statistics> stats;
private:
REAL random_real();
void initialize_weights();
void load (std::istream& ifs);
INT calculate_output(const Perceptron::Point& p);
void learn();
void save (std::ostream& ofs);
;
#include "PerceptronDef.cpp"
#endif
PerceptronDef.cpp
Perceptron::Perceptron(const Settings& settings)
: learning_rate(settings.learning_rate), max_iterations(settings.max_iterations), pattern_count(0)
srand( unsigned int( time(NULL) ) );
initialize_weights();
std::ifstream ifs(settings.load_file.c_str());
if (!ifs) std::cerr <<"Can't open input file!n";
load(ifs);
ifs.close();
learn();
std::ofstream ofs(settings.save_file.c_str());
if (!ofs) std::cerr <<"Can't open output file!n";
save(ofs);
ofs.close();
Perceptron::REAL Perceptron::random_real() return (REAL)rand() / (REAL)RAND_MAX;
void Perceptron::initialize_weights()
for (size_t i = 0; i < 3; ++i) weights.push_back( random_real() );
void Perceptron::load (std::istream& ifs)
REAL xx, yy;
INT ou;
while (ifs >> xx >> yy >> ou)
input.emplace_back( Point(xx, yy) );
desired_outputs.emplace_back(ou);
pattern_count++;
/* Activation function: Heaviside step function. */
Perceptron::INT Perceptron::calculate_output(const Perceptron::Point& p)
REAL w_sum = p.x * weights[0] + p.y * weights[1] + weights[2]; /* sum_i=1^n (w_i * x_i + bias). */
return (w_sum >= 0) ? 1 : -1; /* Threshold = 0. */
void Perceptron::learn()
REAL global_error;
INT iteration = 0;
do /* Start learning. */
iteration++;
global_error = 0;
for (INT p = 0; p < pattern_count; ++p)
INT output = calculate_output(input[p]); /* Calculate the output. */
REAL local_error = desired_outputs[p] - output; /* Update the weights. */
weights[0] += learning_rate * local_error * input[p].x;
weights[1] += learning_rate * local_error * input[p].y;
weights[2] += learning_rate * local_error;
global_error += (local_error * local_error);
stats.emplace_back( Statistics(sqrt(global_error / pattern_count), iteration) );
while (global_error != 0 && iteration <= max_iterations);
std::cout <<"Decision Equation: "<< weights[0] <<"x + "<< weights[1] <<"y + "<< weights[2] <<" = 0n";
void Perceptron::save (std::ostream& ofs)
ofs <<"Decision-boundary line equation coefficientsn";
ofs << weights[0] <<" "<<weights[1] <<" "<<weights[2] <<"n";
ofs <<"Error Iterationn";
for (size_t i = 0; i < stats.size(); ++i)
ofs << std::setprecision(3) << stats[i].RMSE <<" "<< stats[i].iteration <<'n';
The input data file is in the format:
x y 0 (or 1)
Here is an example of what I get:
-0.65x + 0.13y + 1.47 = 0Done.
The above, together with the input data looks like:

The statistics (RMSE) from all the iterations are:

Questions:
- Any comments on the code structure and style are welcome.
- I'm looking for directions on how to generalize the code (to Nodes and Links, probably?), so that it can be used for Multilayer Perceptron.
- Does the result look remotely reasonable?
- What would be a good extension, what else to include in the class?
c++ neural-network
edited Apr 19 at 7:52
asked Apr 18 at 22:26
Ziezi
6691926
6691926
add a comment |Â
add a comment |Â
2 Answers
2
active
oldest
votes
up vote
5
down vote
accepted
Focusing on C++ style:
It is generally backwards to #include "PerceptronDef.cpp" (or any cpp source file). You should keep #include for header files, and let the linker take care of source files.
Although typedef is a valid and often useful language feature, there is no advantage here to defining INT as a synonym for int. All such typedefs offer is the promise of confusion.
Avoid using old C style casts such as (REAL)RAND_MAX. It is better to use the safest suitable c++ alternative, which in this case would be static_cast<double>(RAND_MAX).
rand() is generally not considered random enough for many purposes and should be avoided by default.
That said, for this particular problem it may be better to avoid the srand( unsigned int( time(NULL) ) ); line. That makes different runs of the program behave differently, which is just really annoying for tracing through how the training behaves.
When writing doubles to a text file through an output stream, the precision of the values will be truncated. It would be better to use a binary format for storing doubles, especially if you plan to load them again for further training. If you really want to use a text format, you will probably want to use std::setprecision to avoid truncation.
The use of emplace_back with vectors when filling them with newly constructed objects is good. There are other things that would help use vectors more usefully. For example, rather than tracking pattern_count in load, you could simply get desired_outputs.size().
Briefly looking at the learning aspect:
If your input data is entirely one dimensional (and even if it's at a tilt), as it looks like it is on that first diagram then there are arbitrarily many equivalent separating lines (depending only on the point where they cross the line of data). To get a better test, you may want input data that is more scattered.
It may be helpful to shuffle your training data (taking care to keep the input and target output in sync), possibly doing so repeatedly throughout training. This helps avoid bias from the order: for example if all the blue data points came first followed by all the red ones the training could be skewed.
For most machine learning, when working with a learning rate, it is common practice to decrease the learning rate as training progresses. This is essentially to fine tune the process once in the approximate right place, and avoid jumping back and forth over the ideal location.
Although not a massive issue for a simple perceptron like this, because of its very limited parameter space, it is best practice to keep some of the data separate and never use it for training. This is used as a validation set to periodically check that the learning is generalisable and not overfitting.
add a comment |Â
up vote
3
down vote
This is a danger sign:
#include <iostream>
#include <iomanip>
#include <fstream>
#include <string>
#include <vector>
#include <time.h>
#include "Perceptron.h"
If you put your own headers first, before any library headers, you'll see that it's not self-contained. It's generally a bad idea for headers to depend on the order of inclusion, so make them self-contained:
#ifndef PERCEPTRON_H
#define PERCEPTRON_H
// These are required for the perceptron *interface*
#include <string>
#include <vector>
struct Settings
{
The implementation file Perceptron.cpp (which should be compiled separately) then needs
#include "Perceptron.h"
#include <cmath>
#include <cstdlib>
#include <ctime>
#include <iomanip>
#include <iostream>
#include <fstream>
And main.cpp just needs
#include "Perceptron.h"
#include <iostream>
On the point about separate compilation: don't include all your sources into one translation unit (i.e. one invocation of your compiler). Instead compile main.cpp and Perceptron.cpp to object files, and then link the object files and their libraries as a separate step.
With Make, it would look like this:
CXXFLAGS = -Wall -Wextra -Wpedantic -Weffc++
CXXFLAGS += -Wwrite-strings -Warray-bounds
CXXFLAGS += -Wno-parentheses
perceptron_test: main.o Perceptron.o
perceptron_test: LINK.o = $(LINK.cc)
(Make has default rules that know how to create the *.o files, so no need to write anything for them).
add a comment |Â
2 Answers
2
active
oldest
votes
2 Answers
2
active
oldest
votes
active
oldest
votes
active
oldest
votes
up vote
5
down vote
accepted
Focusing on C++ style:
It is generally backwards to #include "PerceptronDef.cpp" (or any cpp source file). You should keep #include for header files, and let the linker take care of source files.
Although typedef is a valid and often useful language feature, there is no advantage here to defining INT as a synonym for int. All such typedefs offer is the promise of confusion.
Avoid using old C style casts such as (REAL)RAND_MAX. It is better to use the safest suitable c++ alternative, which in this case would be static_cast<double>(RAND_MAX).
rand() is generally not considered random enough for many purposes and should be avoided by default.
That said, for this particular problem it may be better to avoid the srand( unsigned int( time(NULL) ) ); line. That makes different runs of the program behave differently, which is just really annoying for tracing through how the training behaves.
When writing doubles to a text file through an output stream, the precision of the values will be truncated. It would be better to use a binary format for storing doubles, especially if you plan to load them again for further training. If you really want to use a text format, you will probably want to use std::setprecision to avoid truncation.
The use of emplace_back with vectors when filling them with newly constructed objects is good. There are other things that would help use vectors more usefully. For example, rather than tracking pattern_count in load, you could simply get desired_outputs.size().
Briefly looking at the learning aspect:
If your input data is entirely one dimensional (and even if it's at a tilt), as it looks like it is on that first diagram then there are arbitrarily many equivalent separating lines (depending only on the point where they cross the line of data). To get a better test, you may want input data that is more scattered.
It may be helpful to shuffle your training data (taking care to keep the input and target output in sync), possibly doing so repeatedly throughout training. This helps avoid bias from the order: for example if all the blue data points came first followed by all the red ones the training could be skewed.
For most machine learning, when working with a learning rate, it is common practice to decrease the learning rate as training progresses. This is essentially to fine tune the process once in the approximate right place, and avoid jumping back and forth over the ideal location.
Although not a massive issue for a simple perceptron like this, because of its very limited parameter space, it is best practice to keep some of the data separate and never use it for training. This is used as a validation set to periodically check that the learning is generalisable and not overfitting.
add a comment |Â
up vote
5
down vote
accepted
Focusing on C++ style:
It is generally backwards to #include "PerceptronDef.cpp" (or any cpp source file). You should keep #include for header files, and let the linker take care of source files.
Although typedef is a valid and often useful language feature, there is no advantage here to defining INT as a synonym for int. All such typedefs offer is the promise of confusion.
Avoid using old C style casts such as (REAL)RAND_MAX. It is better to use the safest suitable c++ alternative, which in this case would be static_cast<double>(RAND_MAX).
rand() is generally not considered random enough for many purposes and should be avoided by default.
That said, for this particular problem it may be better to avoid the srand( unsigned int( time(NULL) ) ); line. That makes different runs of the program behave differently, which is just really annoying for tracing through how the training behaves.
When writing doubles to a text file through an output stream, the precision of the values will be truncated. It would be better to use a binary format for storing doubles, especially if you plan to load them again for further training. If you really want to use a text format, you will probably want to use std::setprecision to avoid truncation.
The use of emplace_back with vectors when filling them with newly constructed objects is good. There are other things that would help use vectors more usefully. For example, rather than tracking pattern_count in load, you could simply get desired_outputs.size().
Briefly looking at the learning aspect:
If your input data is entirely one dimensional (and even if it's at a tilt), as it looks like it is on that first diagram then there are arbitrarily many equivalent separating lines (depending only on the point where they cross the line of data). To get a better test, you may want input data that is more scattered.
It may be helpful to shuffle your training data (taking care to keep the input and target output in sync), possibly doing so repeatedly throughout training. This helps avoid bias from the order: for example if all the blue data points came first followed by all the red ones the training could be skewed.
For most machine learning, when working with a learning rate, it is common practice to decrease the learning rate as training progresses. This is essentially to fine tune the process once in the approximate right place, and avoid jumping back and forth over the ideal location.
Although not a massive issue for a simple perceptron like this, because of its very limited parameter space, it is best practice to keep some of the data separate and never use it for training. This is used as a validation set to periodically check that the learning is generalisable and not overfitting.
add a comment |Â
up vote
5
down vote
accepted
up vote
5
down vote
accepted
Focusing on C++ style:
It is generally backwards to #include "PerceptronDef.cpp" (or any cpp source file). You should keep #include for header files, and let the linker take care of source files.
Although typedef is a valid and often useful language feature, there is no advantage here to defining INT as a synonym for int. All such typedefs offer is the promise of confusion.
Avoid using old C style casts such as (REAL)RAND_MAX. It is better to use the safest suitable c++ alternative, which in this case would be static_cast<double>(RAND_MAX).
rand() is generally not considered random enough for many purposes and should be avoided by default.
That said, for this particular problem it may be better to avoid the srand( unsigned int( time(NULL) ) ); line. That makes different runs of the program behave differently, which is just really annoying for tracing through how the training behaves.
When writing doubles to a text file through an output stream, the precision of the values will be truncated. It would be better to use a binary format for storing doubles, especially if you plan to load them again for further training. If you really want to use a text format, you will probably want to use std::setprecision to avoid truncation.
The use of emplace_back with vectors when filling them with newly constructed objects is good. There are other things that would help use vectors more usefully. For example, rather than tracking pattern_count in load, you could simply get desired_outputs.size().
Briefly looking at the learning aspect:
If your input data is entirely one dimensional (and even if it's at a tilt), as it looks like it is on that first diagram then there are arbitrarily many equivalent separating lines (depending only on the point where they cross the line of data). To get a better test, you may want input data that is more scattered.
It may be helpful to shuffle your training data (taking care to keep the input and target output in sync), possibly doing so repeatedly throughout training. This helps avoid bias from the order: for example if all the blue data points came first followed by all the red ones the training could be skewed.
For most machine learning, when working with a learning rate, it is common practice to decrease the learning rate as training progresses. This is essentially to fine tune the process once in the approximate right place, and avoid jumping back and forth over the ideal location.
Although not a massive issue for a simple perceptron like this, because of its very limited parameter space, it is best practice to keep some of the data separate and never use it for training. This is used as a validation set to periodically check that the learning is generalisable and not overfitting.
Focusing on C++ style:
It is generally backwards to #include "PerceptronDef.cpp" (or any cpp source file). You should keep #include for header files, and let the linker take care of source files.
Although typedef is a valid and often useful language feature, there is no advantage here to defining INT as a synonym for int. All such typedefs offer is the promise of confusion.
Avoid using old C style casts such as (REAL)RAND_MAX. It is better to use the safest suitable c++ alternative, which in this case would be static_cast<double>(RAND_MAX).
rand() is generally not considered random enough for many purposes and should be avoided by default.
That said, for this particular problem it may be better to avoid the srand( unsigned int( time(NULL) ) ); line. That makes different runs of the program behave differently, which is just really annoying for tracing through how the training behaves.
When writing doubles to a text file through an output stream, the precision of the values will be truncated. It would be better to use a binary format for storing doubles, especially if you plan to load them again for further training. If you really want to use a text format, you will probably want to use std::setprecision to avoid truncation.
The use of emplace_back with vectors when filling them with newly constructed objects is good. There are other things that would help use vectors more usefully. For example, rather than tracking pattern_count in load, you could simply get desired_outputs.size().
Briefly looking at the learning aspect:
If your input data is entirely one dimensional (and even if it's at a tilt), as it looks like it is on that first diagram then there are arbitrarily many equivalent separating lines (depending only on the point where they cross the line of data). To get a better test, you may want input data that is more scattered.
It may be helpful to shuffle your training data (taking care to keep the input and target output in sync), possibly doing so repeatedly throughout training. This helps avoid bias from the order: for example if all the blue data points came first followed by all the red ones the training could be skewed.
For most machine learning, when working with a learning rate, it is common practice to decrease the learning rate as training progresses. This is essentially to fine tune the process once in the approximate right place, and avoid jumping back and forth over the ideal location.
Although not a massive issue for a simple perceptron like this, because of its very limited parameter space, it is best practice to keep some of the data separate and never use it for training. This is used as a validation set to periodically check that the learning is generalisable and not overfitting.
answered Apr 18 at 23:50
Josiah
3,182326
3,182326
add a comment |Â
add a comment |Â
up vote
3
down vote
This is a danger sign:
#include <iostream>
#include <iomanip>
#include <fstream>
#include <string>
#include <vector>
#include <time.h>
#include "Perceptron.h"
If you put your own headers first, before any library headers, you'll see that it's not self-contained. It's generally a bad idea for headers to depend on the order of inclusion, so make them self-contained:
#ifndef PERCEPTRON_H
#define PERCEPTRON_H
// These are required for the perceptron *interface*
#include <string>
#include <vector>
struct Settings
{
The implementation file Perceptron.cpp (which should be compiled separately) then needs
#include "Perceptron.h"
#include <cmath>
#include <cstdlib>
#include <ctime>
#include <iomanip>
#include <iostream>
#include <fstream>
And main.cpp just needs
#include "Perceptron.h"
#include <iostream>
On the point about separate compilation: don't include all your sources into one translation unit (i.e. one invocation of your compiler). Instead compile main.cpp and Perceptron.cpp to object files, and then link the object files and their libraries as a separate step.
With Make, it would look like this:
CXXFLAGS = -Wall -Wextra -Wpedantic -Weffc++
CXXFLAGS += -Wwrite-strings -Warray-bounds
CXXFLAGS += -Wno-parentheses
perceptron_test: main.o Perceptron.o
perceptron_test: LINK.o = $(LINK.cc)
(Make has default rules that know how to create the *.o files, so no need to write anything for them).
add a comment |Â
up vote
3
down vote
This is a danger sign:
#include <iostream>
#include <iomanip>
#include <fstream>
#include <string>
#include <vector>
#include <time.h>
#include "Perceptron.h"
If you put your own headers first, before any library headers, you'll see that it's not self-contained. It's generally a bad idea for headers to depend on the order of inclusion, so make them self-contained:
#ifndef PERCEPTRON_H
#define PERCEPTRON_H
// These are required for the perceptron *interface*
#include <string>
#include <vector>
struct Settings
{
The implementation file Perceptron.cpp (which should be compiled separately) then needs
#include "Perceptron.h"
#include <cmath>
#include <cstdlib>
#include <ctime>
#include <iomanip>
#include <iostream>
#include <fstream>
And main.cpp just needs
#include "Perceptron.h"
#include <iostream>
On the point about separate compilation: don't include all your sources into one translation unit (i.e. one invocation of your compiler). Instead compile main.cpp and Perceptron.cpp to object files, and then link the object files and their libraries as a separate step.
With Make, it would look like this:
CXXFLAGS = -Wall -Wextra -Wpedantic -Weffc++
CXXFLAGS += -Wwrite-strings -Warray-bounds
CXXFLAGS += -Wno-parentheses
perceptron_test: main.o Perceptron.o
perceptron_test: LINK.o = $(LINK.cc)
(Make has default rules that know how to create the *.o files, so no need to write anything for them).
add a comment |Â
up vote
3
down vote
up vote
3
down vote
This is a danger sign:
#include <iostream>
#include <iomanip>
#include <fstream>
#include <string>
#include <vector>
#include <time.h>
#include "Perceptron.h"
If you put your own headers first, before any library headers, you'll see that it's not self-contained. It's generally a bad idea for headers to depend on the order of inclusion, so make them self-contained:
#ifndef PERCEPTRON_H
#define PERCEPTRON_H
// These are required for the perceptron *interface*
#include <string>
#include <vector>
struct Settings
{
The implementation file Perceptron.cpp (which should be compiled separately) then needs
#include "Perceptron.h"
#include <cmath>
#include <cstdlib>
#include <ctime>
#include <iomanip>
#include <iostream>
#include <fstream>
And main.cpp just needs
#include "Perceptron.h"
#include <iostream>
On the point about separate compilation: don't include all your sources into one translation unit (i.e. one invocation of your compiler). Instead compile main.cpp and Perceptron.cpp to object files, and then link the object files and their libraries as a separate step.
With Make, it would look like this:
CXXFLAGS = -Wall -Wextra -Wpedantic -Weffc++
CXXFLAGS += -Wwrite-strings -Warray-bounds
CXXFLAGS += -Wno-parentheses
perceptron_test: main.o Perceptron.o
perceptron_test: LINK.o = $(LINK.cc)
(Make has default rules that know how to create the *.o files, so no need to write anything for them).
This is a danger sign:
#include <iostream>
#include <iomanip>
#include <fstream>
#include <string>
#include <vector>
#include <time.h>
#include "Perceptron.h"
If you put your own headers first, before any library headers, you'll see that it's not self-contained. It's generally a bad idea for headers to depend on the order of inclusion, so make them self-contained:
#ifndef PERCEPTRON_H
#define PERCEPTRON_H
// These are required for the perceptron *interface*
#include <string>
#include <vector>
struct Settings
{
The implementation file Perceptron.cpp (which should be compiled separately) then needs
#include "Perceptron.h"
#include <cmath>
#include <cstdlib>
#include <ctime>
#include <iomanip>
#include <iostream>
#include <fstream>
And main.cpp just needs
#include "Perceptron.h"
#include <iostream>
On the point about separate compilation: don't include all your sources into one translation unit (i.e. one invocation of your compiler). Instead compile main.cpp and Perceptron.cpp to object files, and then link the object files and their libraries as a separate step.
With Make, it would look like this:
CXXFLAGS = -Wall -Wextra -Wpedantic -Weffc++
CXXFLAGS += -Wwrite-strings -Warray-bounds
CXXFLAGS += -Wno-parentheses
perceptron_test: main.o Perceptron.o
perceptron_test: LINK.o = $(LINK.cc)
(Make has default rules that know how to create the *.o files, so no need to write anything for them).
answered Apr 19 at 9:17
Toby Speight
17.5k13489
17.5k13489
add a comment |Â
add a comment |Â
Sign up or log in
StackExchange.ready(function ()
StackExchange.helpers.onClickDraftSave('#login-link');
);
Sign up using Google
Sign up using Facebook
Sign up using Email and Password
Post as a guest
StackExchange.ready(
function ()
StackExchange.openid.initPostLogin('.new-post-login', 'https%3a%2f%2fcodereview.stackexchange.com%2fquestions%2f192411%2ffirst-attempt-at-implementing-a-perceptron%23new-answer', 'question_page');
);
Post as a guest
Sign up or log in
StackExchange.ready(function ()
StackExchange.helpers.onClickDraftSave('#login-link');
);
Sign up using Google
Sign up using Facebook
Sign up using Email and Password
Post as a guest
Sign up or log in
StackExchange.ready(function ()
StackExchange.helpers.onClickDraftSave('#login-link');
);
Sign up using Google
Sign up using Facebook
Sign up using Email and Password
Post as a guest
Sign up or log in
StackExchange.ready(function ()
StackExchange.helpers.onClickDraftSave('#login-link');
);
Sign up using Google
Sign up using Facebook
Sign up using Email and Password
Sign up using Google
Sign up using Facebook
Sign up using Email and Password