Introduction: Machine Learning Crawler Robot Using Reinforcement Learning, Neural Net and Q-Learning
This simple crawling robot uses and Arduino Uno and two micro-servos to learn how to move.
In the first video I use a reinforcement learning algorithm to randomly choose arm two arm positions. After moving from one position to the next, the distance moved is stored. After the training is over, the arm positions which produce the greatest movement are repeated several times to show what it has learned. The program is quite simple in operation but better than the following video which shows the Q-learning algorithm being used. Unfortunately, the Q-learning is a poor fit for this robot. It relies on moving only from one neighboring arm position to the next neighboring one. And it cheats by always starting from the highest arm position.
The Q-learning algorithm was taken from this website: https://planetachatbot.com/q-learning-con-arduino-...
You will need to use Google translate, if you don't know Spanish, to understand the website. It includes a link to a nice introductory example of Q-learning if you don't already understand this machine learning algorithm.
I modified the code and commented the code and include it here.
The robot is easy to build and only requires a couple of micro servos, an Arduino, an SR04 ultrasonic sensor, a couple of battery packs and a simple 3 or four wheel platform.
The Simple Reinforcement Learning algorithm is the easiest and probably best implementation of the four shown here.
I also implemented a Neural Net which does work but probably adds an unnecessary step. Would be interested in how others would implement a Neural Net.
A great introduction to reinforcement(rewarding) machine learning.
Code for all four algorithms are below.
Step 1: Closeup Photos of Robot
The arm servos must be oriented so that when they are rotating counter-clockwise then 0 degrees is up and 180 degrees is down. In the program they will always be zeroed out with servo 1 (closest to the body) at 0 degrees and servo 2 at 40 degrees.
I use a servo shield to make it easy to connect servos and sensors to the Arduino Uno.
The SR04 ultrasonic sensor is used to determine when the robot moves forward. One could also use a wheel encoder for the same purpose and then having a solid surface behind the robot at all times during the learning sessions would not be required.
Step 2: Q-learning Arduino Program...
Here is the code. I commented the code so it is easier to understand how the array tables work. The Q table keeps the rewards and provides a trail for the robot to follow once it has learned which movements from state to state will propel the robot forward with the greatest success.
(sorry about the formatting - instructables formatting is pretty pitiful)
/*
Q_Learning Robot
by: Erick M. Sirpa
// comments by Jim Demello Oct 2018
*
/ Servo setup: the servos must be oriented so that if the arm is rotating counter-clockwise to the left of the servo, then up is 0 degrees
// and down is 180 degrees, for both servos. Then when the arm is in it's highest postion, servo 1 (the servo closest to the
// body of the robot, will be at 0 degrees and servo 2 will be at 40 degrees.)
// Sonar: the sonar module should be placed facing the rear of the robot as it measures movement of the robot away from some
// solid structure like a wall.
#include "servo.h"
void Mostrar(float Q[][4]); // function to print Q table array on Serial Monitor
float distancia;
float tiempo;
int TRIGGER=7,ECHO=8; // sonar pins
Servo servo1,servo2;
int valor=0; // not used
int act=0;
int ang=40; // servo1 (initial value) angle
int ang2=0; // servo2 angle
int ang_t=0; // temp holders for servo write routine (slowing down servo)
int ang2_t=0;
float Q[16][4]={{ 0, 0, 0, 0}, // col 1 and 2 hold servo1 values and col3 and col4 hold servo2 values
{0, 0, 0, 0},
{0, 0, 0, 0},
{0, 0, 0, 0},
{ 0, 0, 0, 0},
{0, 0, 0, 0},
{0, 0, 0, 0},
{0, 0, 0, 0},
{ 0, 0, 0, 0},
{0, 0, 0, 0},
{0, 0, 0, 0},
{0, 0, 0, 0},
{0, 0, 0, 0},
{0, 0, 0, 0},
{0, 0, 0, 0},
{0, 0, 0, 0}};
int action=0;
int state=0;
int cont=0; // not used
float gamma = 0.8;
float Qmax=0;
float a=0,b=0;
int x=0;
int goal=15; // state 15 is goal
void setup (){
servo1.attach(9);
servo1.write(0); //starting position for servo 1
delay(1000);
servo2.attach(6);
servo2.write(0);
delay(1000);
pinMode(TRIGGER, OUTPUT); // setup sonar
pinMode(ECHO, INPUT);
Serial.begin(9600);
float R[16][4] = { // columns 1 and 2 are for servo1 and columns 3 and 4 are for servo2
{ 0, -1, 0, -1}, // servos in highest position cannot raise first servo nor raise second (servo angle of zero)
{-1, -1, 0, 0}, // can lower or raise 2nd servo
{-1, -1, 0, 0}, // can lower or raise 2nd servo
{-1, -1, -1, 0}, // can only raise 2nd servo
{ 0, 0, 0, -1}, // can lower and raise first servo AND lower 2nd servo
{-1, 0, 0, 0}, // can raise 1st servo AND lower and raise 2nd servo
{-1, 0, 0, 0}, // can raise 1st servo AND lower and raise 2nd servo
{-1, 0, -1, 0}, // can raise 1st servo and raise 2nd servo
{ 0, 0, 0, -1}, // can lower AND raise 1st servo and lower 2nd servo
{-1, 0, 0, 0}, //9 can raise 1st servo and lower and raise 2nd servo
{-1, 0, 0, 0}, //10 can raise 1st servo and lower and raise 2nd servo
{-1, 0, -1, 0}, //11 can raise 1st servo and can raise 2nd servo
{-1, 0, 0, -1}, //12 can raise 1st servo and can lower 2nd servo
{-1, -1, 0, 0}, //13 can raise and lower 2nd servo
{-1, -1, 1000,0}, // can raise and lower 2nd servo
{-1, 0, -1, 0}}; // servos in their lowest position - can raise 1st servo and can raise 2nd servo
// pos array is valid action table - it contains 3 columns because there can be up to 3 distinct valid actions
int pos[16][3]={ // 4 possible action values: 0=servo1 down -- 1=servo1 up -- 2=servo2 down -- 3=servo2 up rows are states
{0,2,0},
{2,3,2}, //01 230 (old values)
{2,3,3}, //02 - use 233 - the second 3 is just a place holder as must fill all 3 column values with valid value
{3,3,3}, //03 330
{0,1,2},
{2,3,3}, //05 230
{2,3,3}, //06 230
{3,3,3}, //07 330
{0,1,2}, //08
{2,3,3}, //09 230
{2,3,3}, //10 230
{3,3,3}, //11
{1,2,1}, //12
{2,3,3}, //13
{2,3,3}, //14
{1,3,3}, //15
};
int nstate=0;
float diff=0,d_prom=0,d_ant=0, d_new=0;
float point=0;
int cc=0; // not used
for(int d=0;d<20;d++){ // get starting distance - average over 20 mearsurements (probably not necessary to average)
d_prom=dist()+d_prom;
delay(100);
}
d_ant=d_prom/20;
Serial.println(d_ant);
delay(1000);
// exit(0); // exit here to just test SR04 sensor
for (int epoca=0;epoca<2;epoca++) // for 10 episodes (even 5 episodes usually works)
{
randomSeed(analogRead(0));
// state=random(15); // randomly select a state 0 to 14 - problem with this is that it allows negative
// servo positions to be generated - not too good for servo
state = 0; // start at highest arm position every time
ang=40; // servo 1 starting position
ang2=0; // servo 2 starting position
while(state!=goal){
ang_t =ang; // used to write from old angle to new one in servo writing function (for slowing down servo speed)
ang2_t=ang2; // same
cc=0; // not used
cont++; // not used
//x=random(2); //original progam was only using 2 possible actions but this limits the number of states being accessed
x=random(3); // get one of 3 possible action numbers accesses more states
//x=2;
action=pos[state][x]; // choose a valid a ction for that state from pos array
// if action 0 or 1 then select next closest servo 1 position
// if action 2 or 3 then select next closest servo 2 position
if(action==0){ // servo 1 down and servo2 at 0
nstate=state+4; // make next state where servo 1 moves down
ang=ang+20;
ang2=0;
}
else if(action==1){ // servo 1 up and servo2 at 0
nstate=state-4; // make next state where servo 1 moves up
ang=ang-20;
ang2=0;
}
else if(action==2){ // servo 2 down
nstate=state+1; // make next state where servo2 moves down
ang2=ang2+45;
}
else if(action==3){ // servo 2 up
nstate=state-1; // make next state where servo2 moves up
ang2=ang2-45;
}
// move servos //
Serial.print(" episode = ");Serial.print(epoca);
Serial.print(" state = ");Serial.print(state);
Serial.print(" action= ");Serial.print(action);
Serial.print(" ang = ");Serial.print(ang);
Serial.print(" ang2 = ");Serial.println(ang2);
servoVelocidad(servo1,ang_t,ang,5); // // move servo1 ----5 is just delay speed for writing servo(0=full speed)
servoVelocidad(servo2,ang2_t,ang2,5); // move servo2
d_new=dist(); // get distance
diff=d_new-d_ant; // see how much moved from last distance
d_ant=d_new;
if(diff>=1.9 ){ // if more than 1.9 cm then...
point=map(diff,1,4,5,10); // maps from 1-4 to 5-10
R[nstate][action]=point; // increase reward for movement forward in next state (future reward)
Serial.println(point);
}
Serial.println(" ");
a = -10;
for (int i = 0; i < 4; i++) {
if (a < Q[nstate][i]) { // get max value of next state
a = Q[nstate][i];
}
}
Qmax = a * gamma; // get percentage of max value
Q[state][action] = R[state][action] + Qmax; // calculate and store Q value
state = nstate;
} // while not goal
} // end each epoch
Mostrar(R); // show reward table
Serial.println(" ");
Serial.println(" ");
Mostrar(Q); // show q table
} // end setup
void loop(){ // main loop reads Qtable and performs actions
//state = random(3); // randomly choose state from 0 to 3 so that goal state is not chosen first I assume
state=0; // start out at zero state every time (highest position of arm)
ang=40; // starting angle of servo1
ang2=0; // starting angle of servo2
Serial.println("Starting main loop... ");
while(state!=goal){
b = -10;
for (int i = 0; i < 4; i++) { // find highest value in Qtable for selected state and get that action number
if (b <= Q[state][i]) {
b = Q[state][i];
act = i;
}
}
ang_t=ang;
ang2_t=ang2;
Serial.print(" b = ");Serial.print(b);
Serial.print(" state = ");Serial.print(state);
if(act==0){ // servo1 down and servo2 at 0
state=state+4;
ang=ang+20;
ang2=0;
}
else if(act==1){ // servo1 up and servo2 at 0
state=state-4;
ang=ang-20;
ang2=0;
}
else if(act==2){ // servo2 down
state=state+1;
ang2=ang2+45;
}
else if(act==3){ // servo2 up
state=state-1;
ang2=ang2-45;
}
Serial.print(" act= ");Serial.print(act);
Serial.print(" ang = ");Serial.print(ang);
Serial.print(" ang2 = ");Serial.println(ang2);
servoVelocidad(servo1,ang_t,ang,25); // move servo1 at speed 25
servoVelocidad(servo2,ang2_t,ang2,25); // move servo2 at speed 25
} // end while not goal
Serial.println("End main loop. ");
} // end loop
void Mostrar(float Q[][4]){ // routine to print Qtable on monitor
for (int i=0;i<16;i++){
for(int j=0;j<4;j++){
Serial.print(Q[i][j]);
Serial.print(" ");
}
Serial.println(" ");
}
} // end show Q table
float dist() { // routine to measure distance
digitalWrite(TRIGGER, LOW);
delayMicroseconds(5);
digitalWrite(TRIGGER, HIGH);
delayMicroseconds(10);
digitalWrite(TRIGGER, LOW);
// Calcula la distancia midiendo el tiempo del estado alto del pin ECHO
tiempo = pulseIn(ECHO, HIGH);
distancia = tiempo / 58.00;
Serial.print(tiempo);Serial.print(" ");
Serial.print(distancia);
Serial.println("cm");
delay(100);
return distancia;
}// end get sonar distance routine
void servoVelocidad(Servo servo, int anguloA, int anguloB, int velocidad) { // routine to control speed of servo
if (anguloA
for (int angulo = anguloA; angulo <= anguloB; angulo=angulo+2)
{
servo.write(angulo);
delay(velocidad);
}
}
else {
for (int angulo = anguloA; angulo >= anguloB; angulo=angulo-2)
{
servo.write(angulo);
delay(velocidad);
}
}
} // end servo write routine
Step 3: Reinforcement Learning Arduino Program...
/*
Mmachine learnng crawling Robot Reinforcement Learning Unsupervised - Version 1
by: jim demello
*
/ Servo setup: the servos must be oriented so that if the arm is rotating counter-clockwise to the left of the servo, then up is 0 degrees
// and down is 180 degrees, for both servos. Then when the arm is in it's highest postion, servo 1 (the servo closest to the
// body of the robot, will be at 0 degrees and servo 2 will be at 40 degrees.)
// Sonar: the ultrasonic module should be placed facing the rear of the robot as it measures movement of the robot away from some
// solid structure like a wall.
// TODO: 1. smooth servo arm movements (simultaneous moves) - done
// 2. lower servo starting positions
// 3. try for 3 positions in success table
// 4. try using wheel encoder rather than ultrasonic module for greater precision and so that robot does not have to
// measure distance from another object.
// algorithm: this is a positive reinforcement (perhaps a simple greedy epsilon algorithm) unsupervised learning algorithm
// It chooses a random state (arm position) for the first position then the second time in the loop randomly gets the second arm position
// and then moves the arm from the first to the second position, gets distance moved and then if it is greater than
// 2cms, it stores these two arm movements in the successes table array.
// When it has looped through the episodes number of cycles, it loops through the successes table to find the highest distance moved.
// Then it just cycles back and forth through those two arm movements.
// improvements to algorithm: sometimes using just two arm positions is too little to produce much movement (although sometimes it is reallly good)
// so could change successes table to store 3 arm movements and then I think it would produce greater movement each time.
//
#include // use this lib rather than servo.h to control speed of servo
VarSpeedServo servo1,servo2;
float distance;
float sonarTime;
int TRIGGER=7,ECHO=8; // sonar pins
int pos[16][2]={ { 0,40}, // colum 1 holds servo1 positions and column 2 holds servo 2 positions
{ 0,85},
{ 0,130},
{ 0,175},
{ 30,40},
{ 30,85},
{ 30,130},
{ 30,175},
{ 60,40},
{ 60,85},
{ 60,130},
{ 60,175},
{ 90,40},
{ 90,85},
{ 90,130},
{ 90,175}};
int success[16] [5] = { // can have up to 16 successful moves. (from first arm postion) col1 is servo1, col2 is servo2 TO (second arm pos) col3 is servo2 and col4 is servo 2
// col 5 holds distance robot traveled after moving from arm position 1 to arm position 2
{0,0,0,0,0},
{0,0,0,0,0},
{0,0,0,0,0},
{0,0,0,0,0},
{0,0,0,0,0},
{0,0,0,0,0},
{0,0,0,0,0},
{0,0,0,0,0},
{0,0,0,0,0},
{0,0,0,0,0},
{0,0,0,0,0},
{0,0,0,0,0},
{0,0,0,0,0},
{0,0,0,0,0},
{0,0,0,0,0},
{0,0,0,0,0}};
int state=0;
int numberSuccesses = 0;
int episodes = 0;
int spos1 = 0;
int spos2 = 0;
int spos3 = 0;
int spos4 =0;
float distDifference=0,distPrevious=0,distCurrent=0;
void setup (){
servo1.attach( 9, 600, 2400 );
servo2.attach( 6, 600, 2400 );
servo1.write(0,50); //starting position for servo 1
servo2.write(0,50);
delay(1000);
pinMode(TRIGGER, OUTPUT); // setup sonar
pinMode(ECHO, INPUT);
Serial.begin(9600);
randomSeed(analogRead(0));
distPrevious = getDistance(); //get initial distance
Serial.println(distPrevious);
delay(1000);
// exit(0); // exit here to just test sonar
} // end setup
void doTraining() {
episodes = 40;
for (int episode=0;episode
{
state=random(16); // randomly select a state 0 to 15
if ( episode % 2 == 0) { // even episode so load spos1 and spos2
spos1 = pos[state][0];
spos2 = pos[state][1];
if (episode == 0) { // first episode so need to move arm from starting position to state position
// if we dont do this then starting really starts from highest position and gives false distance reading
// spos1 = pos[state][0];
// spos2 = pos[state][1];
servo1.write(spos1,60,false); // move servo 1 - false means don't wait for move to finish before going to next instruction
servo2.write(spos2,60,false);
servo1.wait(); // now wait until servo finishes
servo2.wait();
}
}
else // odd episode so load spos2 and spos3 and move servos, get distance and store successes in successes array
{
spos3 = pos[state][0];
spos4 = pos[state][1];
servo1.write(spos1,60,false); // move servo 1 - false means don't wait for move to finish before going to next instruction
servo2.write(spos2,60,false);
servo1.wait(); // now wait until servo finishes
servo2.wait();
servo1.write(spos3,60,false); // move
servo2.write(spos4,60,false);
servo1.wait();
servo2.wait();
distCurrent = getDistance(); // get distance - note this is not always accurate so sometimes robot will just claw the air
distDifference = distCurrent - distPrevious;
distPrevious = distCurrent;
Serial.print(" episode = ");Serial.print(episode);
Serial.print(" state = ");Serial.print(state);
Serial.print(" spos1 = ");Serial.print(spos1);
Serial.print(" spos2 = ");Serial.println(spos2);
Serial.print(" spos3 = ");Serial.print(spos3);
Serial.print(" spos4 = ");Serial.println(spos4);
Serial.print(" distance = ");Serial.println(distDifference);
Serial.println(" ");
if ( distDifference > 1.9) { // if moved forward 2 or more centimeters
success[numberSuccesses][0] = spos1; // servo position 1
success[numberSuccesses][1] = spos2; // servo position 2
success[numberSuccesses][2] = spos3; // servo position 1
success[numberSuccesses][3] = spos4; // servo position 2
success[numberSuccesses][4] = distDifference; // store distance
numberSuccesses = numberSuccesses + 1;
if (numberSuccesses > 14) episodes = 10000; // escape loop if successes array is full
}
} // end if mod 2
} // end each episode
Serial.print(" NumberSuccesses = ");Serial.println(numberSuccesses);
for (int i=0;i<16;i++){ // print success table
for(int j=0;j<5;j++){
Serial.print(success[i][j]);
Serial.print(" ");
}
Serial.println(" ");
}
} // end doTraining
void getLongestStep() {
Serial.println("Do getLongestStep...");
servo1.write(0,20); // start at 0 state
servo2.write(40,20);
delay(2000);
int prevDistance = 0; // local variables
int currDistance = 0;
int highDistance = 0;
spos1 = 0;
spos2 = 0;
spos3 = 0;
spos4 = 0;
for(int i=0;i<16;i++){ // find highest distance and use those servo postions to move robot
currDistance = success[i][4];
if (currDistance != 0 && currDistance> prevDistance) {
highDistance = currDistance;
spos1 = success[i][0];
spos2 = success[i][1];
spos3 = success[i][2];
spos4 = success[i][3];
}
prevDistance = currDistance;
} // end while
} // end doTraining()
void doLearnedBehavior() {
Serial.println("Do Learned behavior... ");
servo1.write(0,30,false); //starting position for servo 1
servo2.write(0,30,false);
servo1.wait();
servo2.wait();
for (int i=0;i<10;i++) {
Serial.print(" spos1 = ");Serial.print(spos1);
Serial.print(" spos2 = ");Serial.println(spos2);
Serial.print(" spos3 = ");Serial.print(spos3);
Serial.print(" spos4 = ");Serial.println(spos4);
servo1.write(spos1,50,false); // first servo positions
servo2.write(spos2,50,false);
servo1.wait();
servo2.wait();
servo1.write(spos3,50,false); // 2nd position
servo2.write(spos4,50,false);
servo1.wait();
servo2.wait();
state = state + 1;
} // doLearned
} // end loop
void loop(){ // main loop reads success table and performs actions
doTraining(); // trial and error training with distance reinforcement
getLongestStep(); // find servo positions for longest step
doLearnedBehavior(); // do longest step 10 times to make robot crawl
servo1.write(0,30,false); //return to starting position for servo 1
servo2.write(0,30,false); //return to starting position for servo 2
servo1.wait();
servo2.wait();
delay(2000);
exit(0); // quit program
} // end main loop
float getDistance() { // routine to measure distance = call and average it 10 times
int numberTriggers = 5;
int average = 0;
for(int i=0;i
digitalWrite(TRIGGER, LOW);
delayMicroseconds(5);
digitalWrite(TRIGGER, HIGH);
delayMicroseconds(10);
digitalWrite(TRIGGER, LOW);
sonarTime = pulseIn(ECHO, HIGH);
distance = sonarTime / 58.00;
average = average + distance;
Serial.print(sonarTime);Serial.print(" ");
Serial.print(distance);
Serial.println("cm");
delay(100);
} // end for i
average = average / numberTriggers;
return average;
}// end get sonar distance routine
Step 4: Simple Reinforcement Code:
/*Mmachine learnng crawling Robot using Simple Reinforcement Learning
//Mmachine learnng crawling Robot using Simple Reinforcement Learning
//by: jim demelloby: jim demello
//Mmachine learnng crawling Robot using Simple Reinforcement Learning
//by: jim demello Nov 2018
// Servo setup: the servos must be oriented so that if the arm is rotating counter-clockwise to the left of the servo, then up is 0 degrees
// and down is 180 degrees, for both servos. Then when the arm is in it's highest postion, servo 1 (the servo closest to the
// body of the robot, will be at 0 degrees and servo 2 will be at 40 degrees.)
// Sonar: the ultrasonic module should be placed facing the rear of the robot as it measures movement of the robot away from some
// solid structure like a wall.
// TODO: 1. smooth servo arm movements (simultaneous moves) - done
// 2. lower servo starting positions - done
// 3. try using wheel encoder rather than ultrasonic module for greater precision and so that robot does not have to
// measure distance from another object.
// goal: move between two arm positions that produce the greatest distance, without using arrays to store results.
// algorithm: this is a positive reinforcement unsupervised learning algorithm
// It chooses random servo positions for two arm positions -
// and then moves the arm from the first to the second position, gets distance moved and then if it is greater than
// 2cms, it keeps the arm positions that produce the greatest distance.
#include
Servo servo1,servo2;
#ifdef __arm__
// should use uinstd.h to define sbrk but Due causes a conflict
extern "C" char* sbrk(int incr);
#else // __ARM__
extern char *__brkval;
#endif // __arm__
int freeMemory() { // this routine reports on free ram space
char top;
#ifdef __arm__
return &top - reinterpret_cast(sbrk(0));
#elif defined(CORE_TEENSY) || (ARDUINO > 103 && ARDUINO != 151)
return &top - __brkval;
#else // __arm__
return __brkval ? &top - __brkval : &top - __malloc_heap_start;
#endif // __arm__
}
float distance;
float sonarTime;
int TRIGGER=7,ECHO=8; // sonar pins
//int state=0;
//int numberSuccesses = 0;
int episodes = 0;
int spos1 = 0; // servo1 position
int spos2 = 0; // servo2 position
int spos3 = 0; // servo1 position
int spos4 =0; // servo2 position
int spos1High =0; // servo1 highest position
int spos2High =0;
int spos3High =0;
int spos4High =0;
int distanceHigh=0;
float distDifference=0,distPrevious=0,distCurrent=0;
void setup (){
Serial.begin(9600);
servo1.attach( 9, 600, 2400 );
servo2.attach( 6, 600, 2400 );
myServo(servo1,0,1,8,1); // set servos to zero position
delay(1000);
myServo(servo2,0,1,8,1);
delay(1000);
pinMode(TRIGGER, OUTPUT); // setup sonar
pinMode(ECHO, INPUT);
randomSeed(analogRead(0)); // get real random number seed
distPrevious = getDistance(); //get initial distance
Serial.println(distPrevious);
delay(1000);
// exit(0); // exit here to just test sonar
} // end setup
float getDistance() { // routine to measure distance = call and average it 5 times
int numberTriggers = 5;
int average = 0;
for(int i=0;i
digitalWrite(TRIGGER, LOW);
delayMicroseconds(5);
digitalWrite(TRIGGER, HIGH);
delayMicroseconds(10);
digitalWrite(TRIGGER, LOW);
sonarTime = pulseIn(ECHO, HIGH);
distance = sonarTime / 58.00;
average = average + distance;
// Serial.print(sonarTime);Serial.print(" ");Serial.print(distance);Serial.println("cm");
delay(100);
} // end for i
average = average / numberTriggers;
Serial.println(average);
return average;
}// end get ultrasonic distance routine
void doTraining() {
Serial.println("Do doTraining...");
episodes = 40;
for (int episode=0;episode
{
spos1 = random(160);
spos2 = random(160);
spos3 = random(160);
spos4 = random(160);
myServo(servo1,spos1,1,7,1);
myServo(servo2,spos2,1,7,1);
myServo(servo1,spos3,1,7,1);
myServo(servo2,spos4,1,7,1);
distCurrent = getDistance(); // get distance - note this is not always accurate so sometimes robot will just claw the air
distDifference = distCurrent - distPrevious;
distPrevious = distCurrent;
Serial.print(" episode = ");Serial.print(episode);
// Serial.print(" state = ");Serial.print(state);
Serial.print(" spos1 = ");Serial.print(spos1);
Serial.print(" spos2 = ");Serial.print(spos2);
Serial.print(" spos3 = ");Serial.print(spos3);
Serial.print(" spos4 = ");Serial.print(spos4);
Serial.print(" distance = ");Serial.println(distDifference);
if ( distCurrent > distanceHigh) { // if current distancee is greater than highest then replace postions
spos1High = spos1; // servo position 1
spos2High = spos2; // servo position 2
spos3High = spos3; // servo position 3
spos4High = spos4; // servo position 4
distanceHigh = distCurrent;
// distPrevious = distCurrent;
}
} // end each episode
Serial.print("spos1High = ");Serial.println(spos1High);
Serial.println(" ");
} // end doTraining
void doLearnedBehavior() {
Serial.println("Do Learned behavior... ");
myServo(servo1,0,1,8,1);
myServo(servo2,0,1,8,1);
delay(2000);
for (int i=0;i<30;i++) {
Serial.print(" spos1High= "); Serial.print(spos1High);
Serial.print(" spos2High = ");Serial.print(spos2High);
Serial.print(" spos3High = ");Serial.print(spos3High);
Serial.print(" spos4High = ");Serial.println(spos4High);
myServo(servo1,spos1High,1,7,1);
myServo(servo2,spos2High,1,7,1);
myServo(servo1,spos3High,1,7,1);
myServo(servo2,spos4High,1,7,1);
} // doLearned
} // end loop
void loop(){ // main loop reads success table and performs actions
int freespace = freeMemory(); Serial.print("free memory= "); Serial.println(freespace);
doTraining(); // trial and error training with distance reinforcement
freespace = freeMemory(); Serial.print("free memory= "); Serial.println(freespace);
doLearnedBehavior(); // do longest step 10 times to make robot crawl
myServo(servo1,0,1,8,1);
myServo(servo2,0,1,8,1);
Serial.print("end program ");
delay(2000);
exit(0); // quit program
} // end main loop
void myServo(Servo servo,int newAngle,int angleInc,int incDelay,int servoNum) {
int curAngle = 0;
curAngle = servo.read();
if (curAngle < newAngle) {
for(int angle=curAngle;angle < newAngle;angle += angleInc) {
servo.write(angle);
delay(incDelay); }
}
else if (curAngle > newAngle) {
for(int angle=curAngle;angle > newAngle;angle -= angleInc) {
servo.write(angle);
delay(incDelay); }
}
} // end of myServo function
Step 5: Neural Net Code:
//Machine learnng crawling Robot
Reinforcement using Neural Net
//by: jim demello November 2018 at Shangluo University
//Adapted neural net from here: http://www.the-diy-life.com/running-an-artifical-...
// Servo setup: the servos must be oriented so that if the arm is rotating counter-clockwise to the left of the servo, then up is 0 degrees
// and down is 160(servoMax) degrees, for both servos. Then when the arm is in it's highest postion, servo 1 (the servo closest to the
// body of the robot, will be at 0 degrees and servo 2 will be at 0 degrees.)
// Sonar: the ultrasonic module should be placed facing the rear of the robot as it measures movement of the robot away from some
// solid structure like a wall.
// goal: move between two arm positions that produce the greatest distance, without using arrays to store results, only NN
// algorithm: this robot uses a NN to train on training data, then practice moving with the NN and then repeating the most successful movement (learned behavior)
// this algorithm is better than my previous RL algorithms because once the NN is trained, then any random servo positions between
// 0 and servoMax can be used.
// To make this NN work, it is all in setting up the input and training arrays. There may be better ways of doing than I have done here.
// Also you can play with the various NN settings.
// Note: while this algorithm is very entertaining, it seems that a simple Reinforment Learning algorithm is just as accurate without all the complexity of the NN. But it
// it is a fun application of a NN. Perhaps someone can find a better way to fit the application to a NN - perhaps by applying the Distance reading to the back-propagation
// rather than as an input to NN.
#include
Servo servo1,servo2;
#ifdef __arm__
// should use uinstd.h to define sbrk but Due causes a conflict
extern "C" char* sbrk(int incr);
#else // __ARM__
extern char *__brkval;
#endif // __arm__
int freeMemory() { // this routine reports on free ram space
char top;
#ifdef __arm__
return &top - reinterpret_cast(sbrk(0));
#elif defined(CORE_TEENSY) || (ARDUINO > 103 && ARDUINO != 151)
return &top - __brkval;
#else // __arm__
return __brkval ? &top - __brkval : &top - __malloc_heap_start;
#endif // __arm__
}
float distance;
float sonarTime;
int TRIGGER=7,ECHO=8; // ultrasonic sensor pins
int spos1 = 0; // servo1 position
int spos2 = 0; // servo2 position
int spos3 = 0; // servo1 position
int spos4 =0; // servo2 position
int spos1High =0; // servo1 highest position - which means these arm positions achieved the biggest distance reading and so will repeat them in final execution routine
// I know, I am terrible at variable names
int spos2High =0;
int spos3High =0;
int spos4High =0;
int distanceHigh=0; // save highest distance reading
float highOutput = 0.0;
int numberTrainingCycles = 100; // number of times to train robot on NN after the NN has trained on the Input and Target arrays
int servoMin = 0;
int servoMax = 160;
float distDifference=0,distPrevious=0,distCurrent=0;
#include "math.h"
/******************************************************************
Network Configuration - customized per network
******************************************************************/
const int PatternCount = 16;
const int InputNodes = 5;
const int HiddenNodes = 7;
const int OutputNodes = 1;
const float LearningRate = 0.2;
const float Momentum = 0.9;
const float InitialWeightMax = 0.5;
const float Success = 0.0015;
float Target[PatternCount][OutputNodes] = { // these are the successful outputs corresponding to arm positions in Input array
// 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16
{ 0 }, { 0 }, { 0 }, { 1 }, { 0 }, { 0 }, { 0,}, { 1 }, { 0 }, { 0 }, { 0 }, { 0 }, { 0 }, { 0 }, { 0 }, { 1 } };
const float Input [PatternCount][InputNodes] {
{ 0,0,0,0,0}, // colum 1 and 2 holds servo1 positions and column 3 and 4 holds servo 2 positions - col 5 holds distance
{ 0,0,0,1,0},
{ 0,0,1,0,0},
{ 0,0,1,1,1},
{ 1,0,0,0,0},
{ 1,0,0,1,0},
{ 1,0,1,0,0},
{ 1,0,1,1,1},
{ 1,1,0,0,0},
{ 1,1,0,1,0},
{ 1,1,1,0,0},
{ 1,1,1,1,0},
{ 0,1,0,0,0},
{ 0,1,0,1,0},
{ 0,1,1,0,0},
{ 0,1,1,1,1}};
/******************************************************************
End Network Configuration
******************************************************************/
int i, j, p, q, r;
int ReportEvery1000;
int RandomizedIndex[PatternCount];
long TrainingCycle;
float Rando;
float Error = 2;
float Accum;
float Hidden[HiddenNodes];
float Output[OutputNodes];
float HiddenWeights[InputNodes + 1][HiddenNodes];
float OutputWeights[HiddenNodes + 1][OutputNodes];
float HiddenDelta[HiddenNodes];
float OutputDelta[OutputNodes];
float ChangeHiddenWeights[InputNodes + 1][HiddenNodes];
float ChangeOutputWeights[HiddenNodes + 1][OutputNodes];
long previousMillis = 0;
unsigned long currentMillis;
long loopTimer = 10; // do the main processing every 10 milliseconds (probably not necessary to use this, I just copied it in from another program)
void setup (){
Serial.begin(115200);
Serial.println("Starting program");
randomSeed(analogRead(A1)); //Collect a random ADC sample for Randomization.
ReportEvery1000 = 1;
for ( p = 0 ; p < PatternCount ; p++ ) {
RandomizedIndex[p] = p ;
}
Serial.println("do train_nn"); // do NN training
train_nn();
delay(1000);
servo1.attach( 9, 600, 2400 );
servo2.attach( 6, 600, 2400 );
myServo(servo1,0,1,8,1); // set servos to zero position
delay(1000);
myServo(servo2,0,1,8,1);
delay(1000);
pinMode(TRIGGER, OUTPUT); // setup ultrasonic sensor
pinMode(ECHO, INPUT);
distPrevious = getDistance(); //get initial distance
Serial.print("Initial distance= ");Serial.println(distPrevious);
delay(1000);
// exit(0); // exit here to just test sonar
} // end setup
float getDistance() { // routine to measure distance = call and average it 5 times
int numberTriggers = 5;
int average = 0;
for(int i=0;i
digitalWrite(TRIGGER, LOW);
delayMicroseconds(5);
digitalWrite(TRIGGER, HIGH);
delayMicroseconds(10);
digitalWrite(TRIGGER, LOW);
sonarTime = pulseIn(ECHO, HIGH);
distance = sonarTime / 58.00;
average = average + distance;
delay(100);
} // end for i
average = average / numberTriggers;
Serial.print("Distance = ");Serial.println(average);
return average;
}// end get sonar distance routine
void doLearnedBehavior() {
Serial.println("Do Learned behavior... ");
myServo(servo1,0,1,8,1);
myServo(servo2,0,1,8,1);
delay(2000);
for (int i=0;i<30;i++) {
Serial.print(" spos1High= "); Serial.print(spos1High);
Serial.print(" spos2High = ");Serial.print(spos2High);
Serial.print(" spos3High = ");Serial.print(spos3High);
Serial.print(" spos4High = ");Serial.println(spos4High);
myServo(servo1,spos1High,1,7,1);
myServo(servo2,spos2High,1,7,1);
myServo(servo1,spos3High,1,7,1);
myServo(servo2,spos4High,1,7,1);
} // doLearned
} // end loop
void loop(){ // main loop reads success table and performs actions
int freespace = freeMemory(); Serial.print("free memory= "); Serial.println(freespace); // just wanted to see if memory ever became a problem
drive_nn();
freespace = freeMemory(); Serial.print("free memory= "); Serial.println(freespace);
doLearnedBehavior();
myServo(servo1,0,1,8,1);
myServo(servo2,0,1,8,1);
Serial.print("end program ");
delay(2000);
exit(0); // quit program
} // end main loop
void myServo(Servo servo,int newAngle,int angleInc,int incDelay,int servoNum) {// routine to read current servo angle, advance it to newAngle and control speed
int curAngle = 0;
curAngle = servo.read();
//Serial.print("curAngle = "); Serial.println(curAngle);
if (curAngle < newAngle) {
for(int angle=curAngle;angle < newAngle;angle += angleInc) {
servo.write(angle);
delay(incDelay); }
}
else if (curAngle > newAngle) {
for(int angle=curAngle;angle > newAngle;angle -= angleInc) {
servo.write(angle);
delay(incDelay); }
}
} // end of myServo function
//////////////////////////////////////////////////////////////////////////////////////////////////////////////
//////////////////////////////////////////////////////////////////////////////////////////////////////////////
/after having trained the NN, now drive the robot on the NN, and store the highest distance yielding servo positions
void drive_nn()
{
Serial.println("Running NN Drive ");
numberTrainingCycles = 20; // number of times to try random servo positions and get distances, then store the highest for final walking
for (int x=0;x < numberTrainingCycles;x++) {
currentMillis = millis();
float TestInput[] = {0, 0};
if(currentMillis - previousMillis > loopTimer) { //do calculation every 5 or more milliseconds
// Serial.print("currentMillis= ");Serial.println(currentMillis);
int randomNo = random(servoMax);
float pos1 = map(randomNo,0,servoMax,0,100); // randomly get servo 1 position between 0 and servoMax
pos1 = pos1/100;
randomNo = random(servoMax);
float pos2 = map(randomNo,0,servoMax,0,100);
pos2 = pos2/100;
randomNo = random(servoMax);
float pos3 = map(randomNo,0,servoMax,0,100);
pos3 = pos3/100;
randomNo = random(servoMax);
float pos4= map(randomNo,0,servoMax,0,100);
pos4 = pos4/100;
// move robot with new random positions
myServo(servo1,pos1 * servoMax,1,7,1);
myServo(servo2,pos2 * servoMax,1,7,1);
myServo(servo1,pos3 * servoMax,1,7,1);
myServo(servo2,pos4 * servoMax,1,7,1);
/////////////////////////
// get distance for pos5
/////////////////////////
//float temp = getDistance(); // get distance
distCurrent = getDistance(); // get distance
distDifference = distCurrent - distPrevious;
distPrevious = distCurrent;
Serial.print("===> distDifference = ");Serial.println(distDifference);
// if ((pos1 < .70) && (pos3 > .8)) temp = 3; //testing distance
// else temp = 0;
//////////////////////////////////////////////////////////////
// may have to increase range to make distance more powerful
/////////////////////////////////////////////////////////////
float temp = map(distDifference,0,10,0,100);
float pos5 = temp/100;
InputToOutput(pos1,pos2,pos3,pos4,pos5); //input to NN to get Output[]
pos1 = pos1 * servoMax; pos2 = pos2 * servoMax; pos3 = pos3 * servoMax; pos4 = pos4 * servoMax; //pos5 = pos5 * servoMax;
Serial.print(" pos1= ");Serial.print(pos1);Serial.print(" pos2= ");Serial.print(pos2);Serial.print(" pos3= ");Serial.print(pos3);
Serial.print(" pos4= ");Serial.print(pos4);Serial.print(" pos5= ");Serial.print(pos5);
Serial.print("Output from NN =");Serial.println(Output[0]);
///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
// IF output is greater than .10 then use those positions to move robot and store the highest output positions achieved
///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
if (Output[0] > .10) {
if (Output[0] > highOutput) {
highOutput = Output[0];
spos1High = pos1;
spos2High = pos2;
spos3High = pos3;
spos4High = pos4;
Serial.print(" --------> spos1High= ");Serial.print(spos1High);Serial.print(" spos2High= ");Serial.print(spos2High);Serial.print(" spos3High= ");Serial.print(spos3High);
Serial.print(" spos4High= ");Serial.print(spos4High);Serial.print(" Output= ");Serial.println(Output[0]);
}
}
previousMillis = currentMillis;
} // end millis loop
}
} //end of drive_nn() function
//DISPLAYS INFORMATION WHILE TRAINING
void toTerminal()
{
for ( p = 0 ; p < PatternCount ; p++ ) {
Serial.println();
Serial.print (" Training Pattern: ");
Serial.println (p);
Serial.print (" Input ");
for ( i = 0 ; i < InputNodes ; i++ ) {
Serial.print (Input[p][i], DEC);
Serial.print (" ");
}
Serial.print (" Target ");
for ( i = 0 ; i < OutputNodes ; i++ ) {
Serial.print (Target[p][i], DEC);
Serial.print (" ");
}
/******************************************************************
Compute hidden layer activations
******************************************************************/
for ( i = 0 ; i < HiddenNodes ; i++ ) {
Accum = HiddenWeights[InputNodes][i] ;
for ( j = 0 ; j < InputNodes ; j++ ) {
Accum += Input[p][j] * HiddenWeights[j][i] ;
}
Hidden[i] = 1.0 / (1.0 + exp(-Accum)) ; // activation function
}
/******************************************************************
Compute output layer activations and calculate errors
******************************************************************/
for ( i = 0 ; i < OutputNodes ; i++ ) {
Accum = OutputWeights[HiddenNodes][i] ;
for ( j = 0 ; j < HiddenNodes ; j++ ) {
Accum += Hidden[j] * OutputWeights[j][i] ;
}
Output[i] = 1.0 / (1.0 + exp(-Accum)) ;
}
Serial.print (" Output ");
for ( i = 0 ; i < OutputNodes ; i++ ) {
Serial.print (Output[i], 5);
Serial.print (" ");
}
}
}
void InputToOutput(float In1,float In2, float In3, float In4, float In5)
{
float TestInput[] = {0,0,0,0,0};
// Serial.print("In1 = ");Serial.println(In1);
TestInput[0] = In1; //first servo arm position - servo 1
TestInput[1] = In2; //first servo arm position - servo 2
TestInput[2] = In3; // 2nd servo arm position - servo 1
TestInput[3] = In4; // 2nd servo arm position - servo 2
TestInput[4] = In5; // distance
/******************************************************************
Compute hidden layer activations
******************************************************************/
for ( i = 0 ; i < HiddenNodes ; i++ ) {
Accum = HiddenWeights[InputNodes][i] ;
for ( j = 0 ; j < InputNodes ; j++ ) {
Accum += TestInput[j] * HiddenWeights[j][i] ;
}
Hidden[i] = 1.0 / (1.0 + exp(-Accum)) ;
}
/******************************************************************
Compute output layer activations and calculate errors
******************************************************************/
for ( i = 0 ; i < OutputNodes ; i++ ) {
Accum = OutputWeights[HiddenNodes][i] ;
for ( j = 0 ; j < HiddenNodes ; j++ ) {
Accum += Hidden[j] * OutputWeights[j][i] ;
}
Output[i] = 1.0 / (1.0 + exp(-Accum)) ;
}
//#ifdef DEBUG
Serial.print (" Output ");
for ( i = 0 ; i < OutputNodes ; i++ ) {
Serial.print (Output[i], 5);
Serial.print (" ");
}
//#endif
}
//TRAINS THE NEURAL NETWORK
void train_nn() {
/******************************************************************
Initialize HiddenWeights and ChangeHiddenWeights
******************************************************************/
int prog_start = 0;
Serial.println("start training...");
//digitalWrite(LEDYEL, LOW);
for ( i = 0 ; i < HiddenNodes ; i++ ) {
for ( j = 0 ; j <= InputNodes ; j++ ) {
ChangeHiddenWeights[j][i] = 0.0 ;
Rando = float(random(100)) / 100;
HiddenWeights[j][i] = 2.0 * ( Rando - 0.5 ) * InitialWeightMax ;
}
}
//digitalWrite(LEDYEL, HIGH);
/******************************************************************
Initialize OutputWeights and ChangeOutputWeights
******************************************************************/
//digitalWrite(LEDRED, LOW);
for ( i = 0 ; i < OutputNodes ; i ++ ) {
for ( j = 0 ; j <= HiddenNodes ; j++ ) {
ChangeOutputWeights[j][i] = 0.0 ;
Rando = float(random(100)) / 100;
OutputWeights[j][i] = 2.0 * ( Rando - 0.5 ) * InitialWeightMax ;
}
}
//digitalWrite(LEDRED, HIGH);
//SerialUSB.println("Initial/Untrained Outputs: ");
//toTerminal();
/******************************************************************
Begin training
******************************************************************/
for ( TrainingCycle = 1 ; TrainingCycle < 2147483647 ; TrainingCycle++) {
/******************************************************************
Randomize order of training patterns
******************************************************************/
for ( p = 0 ; p < PatternCount ; p++) {
q = random(PatternCount);
r = RandomizedIndex[p] ;
RandomizedIndex[p] = RandomizedIndex[q] ;
RandomizedIndex[q] = r ;
}
Error = 0.0 ;
/******************************************************************
Cycle through each training pattern in the randomized order
******************************************************************/
for ( q = 0 ; q < PatternCount ; q++ ) {
p = RandomizedIndex[q];
/******************************************************************
Compute hidden layer activations
******************************************************************/
//digitalWrite(LEDYEL, LOW);
for ( i = 0 ; i < HiddenNodes ; i++ ) {
Accum = HiddenWeights[InputNodes][i] ;
for ( j = 0 ; j < InputNodes ; j++ ) {
Accum += Input[p][j] * HiddenWeights[j][i] ;
}
Hidden[i] = 1.0 / (1.0 + exp(-Accum)) ;
}
//digitalWrite(LEDYEL, HIGH);
/******************************************************************
Compute output layer activations and calculate errors
******************************************************************/
//digitalWrite(LEDRED, LOW);
for ( i = 0 ; i < OutputNodes ; i++ ) {
Accum = OutputWeights[HiddenNodes][i] ;
for ( j = 0 ; j < HiddenNodes ; j++ ) {
Accum += Hidden[j] * OutputWeights[j][i] ;
}
Output[i] = 1.0 / (1.0 + exp(-Accum)) ;
OutputDelta[i] = (Target[p][i] - Output[i]) * Output[i] * (1.0 - Output[i]) ;
Error += 0.5 * (Target[p][i] - Output[i]) * (Target[p][i] - Output[i]) ;
}
// Serial.println(Output[0]*100);
//digitalWrite(LEDRED, HIGH);
/******************************************************************
Backpropagate errors to hidden layer
******************************************************************/
//digitalWrite(LEDYEL, LOW);
for ( i = 0 ; i < HiddenNodes ; i++ ) {
Accum = 0.0 ;
for ( j = 0 ; j < OutputNodes ; j++ ) {
Accum += OutputWeights[i][j] * OutputDelta[j] ;
}
HiddenDelta[i] = Accum * Hidden[i] * (1.0 - Hidden[i]) ;
}
//digitalWrite(LEDYEL, HIGH);
/******************************************************************
Update Inner-->Hidden Weights
******************************************************************/
//digitalWrite(LEDRED, LOW);
for ( i = 0 ; i < HiddenNodes ; i++ ) {
ChangeHiddenWeights[InputNodes][i] = LearningRate * HiddenDelta[i] + Momentum * ChangeHiddenWeights[InputNodes][i] ;
HiddenWeights[InputNodes][i] += ChangeHiddenWeights[InputNodes][i] ;
for ( j = 0 ; j < InputNodes ; j++ ) {
ChangeHiddenWeights[j][i] = LearningRate * Input[p][j] * HiddenDelta[i] + Momentum * ChangeHiddenWeights[j][i];
HiddenWeights[j][i] += ChangeHiddenWeights[j][i] ;
}
}
//digitalWrite(LEDRED, HIGH);
/******************************************************************
Update Hidden-->Output Weights
******************************************************************/
//digitalWrite(LEDYEL, LOW);
for ( i = 0 ; i < OutputNodes ; i ++ ) {
ChangeOutputWeights[HiddenNodes][i] = LearningRate * OutputDelta[i] + Momentum * ChangeOutputWeights[HiddenNodes][i] ;
OutputWeights[HiddenNodes][i] += ChangeOutputWeights[HiddenNodes][i] ;
for ( j = 0 ; j < HiddenNodes ; j++ ) {
ChangeOutputWeights[j][i] = LearningRate * Hidden[j] * OutputDelta[i] + Momentum * ChangeOutputWeights[j][i] ;
OutputWeights[j][i] += ChangeOutputWeights[j][i] ;
}
}
//digitalWrite(LEDYEL, HIGH);
}
/******************************************************************
Every 100 cycles send data to terminal for display and draws the graph on OLED
******************************************************************/
ReportEvery1000 = ReportEvery1000 - 1;
if (ReportEvery1000 == 0)
{
int graphNum = TrainingCycle / 100;
int graphE1 = Error * 1000;
int graphE = map(graphE1, 3, 80, 47, 0);
Serial.print ("TrainingCycle: ");
Serial.print (TrainingCycle);
Serial.print (" Error = ");
Serial.println (Error, 5);
toTerminal();
if (TrainingCycle == 1)
{
ReportEvery1000 = 99;
}
else
{
ReportEvery1000 = 100;
}
}
/******************************************************************
If error rate is less than pre-determined threshold then end
******************************************************************/
if ( Error < Success ) break ;
}
Serial.println("End training.");
}