// exercise on Q learning
// bartj@arti.vub.ac.be

// motors on 1 and 3
// light sensors on 1 and 3
// touch sensors both on 2


#include <conio.h>
#include <unistd.h>
#include <dsensor.h>
#include <dmotor.h>
#include <dsound.h>
#include <stdlib.h>
#include <sys/lcd.h>

#define nrstates 6
#define nractions 5

// the Q table
float Qtable[nrstates][nractions];

// a table with funcion pointers for the actions
void* Atable[nractions];

int runningforward = 0;
int gettingbrighter = 0;
int previousleft = 0;
int previousright = 0;


// for debugging, print a float.
// as this is not possible, multiple by hundred and print as int
// beep to notice that something changed.

void printfloat(float v)
{
	int r = (int)(100 * v);
	lcd_int(r);
	dsound_system(DSOUND_BEEP);
}

// a bunch of actions
void moveforward(void)
{
	motor_a_speed(MAX_SPEED/5);
	motor_c_speed(MAX_SPEED/5);
	motor_a_dir(fwd);
	motor_c_dir(fwd);
	runningforward = 1;
	msleep(1000);
}

void movebackward(void)
{
	motor_a_speed(MAX_SPEED/5);
	motor_c_speed(MAX_SPEED/5);
	motor_a_dir(rev);
	motor_c_dir(rev);
	msleep(500);
}


// random direction and random time.
void turnbackwardsrandom()
{
	int direction = random()%2;
	if(direction)
	{
		motor_a_speed(0);
		motor_c_speed(MAX_SPEED/5);
		motor_a_dir(rev);
		motor_c_dir(rev);
		msleep(500 + random()%300);
	}
	else
	{
		motor_a_speed(MAX_SPEED/5);
		motor_c_speed(0);
		motor_a_dir(rev);
		motor_c_dir(rev);
		msleep(500 + random()%300);
	}
}

void turnleft(void)
{
	motor_a_speed(0);
	motor_c_speed(MAX_SPEED/5);
	motor_a_dir(fwd);
	motor_c_dir(fwd);
	runningforward = 1;
	msleep(500);
}

void turnright(void)
{
	motor_a_speed(MAX_SPEED/5);
	motor_c_speed(0);
	motor_a_dir(fwd);
	motor_c_dir(fwd);
	runningforward = 1;
	msleep(500);
}


// populate the action table with all actions
void initAtable()
{
	Atable[0] = &movebackward;
	Atable[1] = &moveforward;
	Atable[2] = &turnleft;
	Atable[3] = &turnright;
	Atable[4] = &turnbackwardsrandom;
}

// select the index of a random action
int selectRandomAction()
{
	return random()%nractions;
}

// execute the action with given index in the action table
void executeAction(int action)
{
	void (*actionfunction)() = Atable[action];
	(*actionfunction)();
}


// according to the Qtable, what is the maximal possible reward one could get in a given state
// the reward is returned, the action one needs to execute is stored in the variable bestaction
float maxReward(int state, int* bestaction)
{
	int i;
	float maxvalue = 0.0;
	*bestaction = 0;
	for(i=0;i<nractions;i++)
	{
		if (Qtable[state][i] > maxvalue)
		{
			maxvalue = Qtable[state][i];
			*bestaction = i;
		}
	}
	return maxvalue;
}


// determine the current state
int readCurrentState()
{
	int state;
	// bla bla bla
	return state;
}

// get the current reward
float getReward()
{
	float reward = 0;
	// bla bla bla
	return reward;
}

// Qlearning itself
void qlearn(int nrexplorations,float lambda)
{
	// bla bla bla
}


// behave optimal depending on the state you are in.
void run()
{
	int currentaction;
	while(1)
	{
		int currentstate = readCurrentState();
		maxReward(currentstate,&currentaction);
		executeAction(currentaction);
	}
}


int main(int argc, char *argv[])
{
	// init some stuff ...
	ds_active(&SENSOR_1);
	ds_active(&SENSOR_3);
	srandom(12345);
	initAtable();
	// learn the Q table
	qlearn(500,0.9);
	// and behave optimal after learning
	run();
	return 1;
}




