How to use:
1. Create a spherical prim and place the script below in the sphere.
2. Create a prim far away from the SARSA sphere (max 96m). Make sure this prim is named "Goal".
3. Scatter some obstacle prims in between the SARSA sphere and the goal prim.
4. Say "start", sit back and watch the SARSA sphere figure its way towards the goal prim.
If the sphere wanders too far off course, say "stop" to prevent it wandering farther.
To reset what the script has learnt, say "unlearn" to wipe its memory (takes a few seconds).
// Based on work done by:
// G. Rummery and M. Niranjan, ``On-line q-learning using connectionist systems,'',
// Tech. Rep. Technical Report CUED/F-INFENG/TR 166,
// Cambridge, University Engineering Department, 1994.
// Specify Target
string target_name = "Goal";
key target_key = "";
integer target_type = PASSIVE;
// Some temporary variables
vector direction;
float last_distance = 0.0;
integer collided = 0;
integer last_blocked = 0;
rotation ROT45;
// State function. Given information about the ball's direction,
// distance and whether it struck an obstacle, return the appropriate
// state to SARSA.
integer get_state(vector direction, float distance, integer blocked) {
integer base = 0;
if (blocked) base = 10;
return (integer)(distance / 10.0) + base;
}
// Reward function. SARSA is positively rewarded for moving towards
// the target and penalized for moving away from the target or
// colliding with obstacles.
float get_reward(float distance, integer blocked) {
float r = 0.0; // Reward variable
if (last_distance < 0) {
r = distance - last_distance;
}
else {
r = last_distance - distance;
}
if (blocked) {
// Penalize any collision into obstacles
r = r - blocked - last_blocked;
}
else if (last_blocked) {
// Reward for avoiding an obstacle (not colliding again)
r = r + last_blocked;
}
// Limit the reward to prevent biases
if (r > 1.0) r = 1.0;
if (r < -2.0) r = -2.0;
last_distance = distance;
last_blocked = blocked;
return r;
}
// Move the ball in the direction instructed by SARSA
do_action(integer action) {
direction = direction / llVecMag(direction);
integer i;
for (i = 0; i < action; i++) {
direction *= ROT45;
}
vector impulse = 2.0 * llGetMass() * direction;
llApplyImpulse(impulse, 0);
}
// SARSA variables
integer is_enabled = 1;
integer num_states = 20;
integer num_actions = 8;
// Q-values
list Q;
// Learning rate
float alpha = 0.1;
// Discount factor
float lambda = 0.9;
// Exploration rate
float epsilon = 0.2;
// Prior state
integer q_state = 0;
// Prior action
integer action = 0;
// Initialize the Q-values list with random numbers between 0 to 1.
initQ() {
integer s;
integer a;
Q = [];
for (s = 0; s < num_states; s++) {
for (a = 0; a < num_actions; a++) {
Q = (Q = []) + Q + [ llFrand(1.0) ];
}
}
float memory = (float)llGetFreeMemory() * 100.0 / 16384.0;
llOwnerSay( (string)((integer)memory) + "% memory free" );
}
// Print Q-values for debugging purposes.
reportQ() {
integer s;
for (s = 0; s < num_states; s++) {
llOwnerSay("State " + (string)s + ": " + llDumpList2String(llList2List(Q, s * num_actions, s * num_actions + num_actions - 1), ", "

}
}
// Get the Q-value for a specific state-action pair
float getQ(integer s, integer a) {
return llList2Float(Q, s * num_actions + a);
}
// Update the Q-value for a specific state-action pair
setQ(integer s, integer a, float newQ) {
integer index = s * num_actions + a;
Q = (Q=[]) + llListReplaceList(Q, [newQ], index, index);
}
// e-greedy function that decides when SARSA takes the optimal action.
integer epsilon_greedy(integer s) {
if (llFrand(1.0) < epsilon) return (integer)llFrand(num_actions + 1);
integer best_action = 0;
float largest_reward = -1000.0;
integer i;
for(i = 0; i < num_actions; i++) {
float reward = getQ(s, i);
if (reward > largest_reward) {
best_action = i;
largest_reward = reward;
}
}
return best_action;
}
// Update SARSA Q-value for the current action
updateQ(integer prior_state, integer prior_action, float reward, integer posterior_state, integer posterior_action) {
// Debug
// llOwnerSay("SARSA = "+(string)prior_state+", "+(string)prior_action+", "+(string)reward+", "+(string)posterior_state+", "+(string)posterior_action);
float priorQ = getQ(prior_state, prior_action);
float posteriorQ = getQ(posterior_state, posterior_action);
float newQ = priorQ + alpha * (reward + lambda * posteriorQ - priorQ);
setQ(prior_state, prior_action, newQ);
}
// Call this function to activate SARSA's on-line policy learning
start_SARSA() {
is_enabled = 1;
llSensorRepeat(target_name, target_key, target_type, 96.0, PI, 5.0);
}
// Call this function to stop all SARSA learning.
stop_SARSA() {
is_enabled = 0;
llSensorRemove();
}
default {
state_entry() {
stop_SARSA();
initQ();
ROT45 = llEuler2Rot(<0.0, 0.0, 45.0> * DEG_TO_RAD);
llSetPrimitiveParams([PRIM_PHYSICS, TRUE]);
llListen( 0, "", llGetOwner(), "stop"

llListen( 0, "", llGetOwner(), "start"

llListen( 0, "", llGetOwner(), "unlearn"

llListen( 0, "", llGetOwner(), "qstates"

llSay(0, "Hi, I am SARSA the self-learning ball. Build obstacles and watch me learn how to avoid them."

}
no_sensor() {
llOwnerSay("I cannot sense the goal! Stopping!"

stop_SARSA();
}
sensor(integer count) {
// Only use first sensor reading
direction = llGetPos() - llDetectedPos(0);
float distance = llVecDist(llGetPos(), llDetectedPos(0));
// Update Q values
integer current_state = get_state(direction, distance, collided);
integer current_action = epsilon_greedy(current_state);
float reward = get_reward(distance, collided);
updateQ(q_state, action, reward, current_state, current_action);
do_action(current_action);
q_state = current_state;
action = current_action;
collided = 0;
}
collision(integer count) {
if (is_enabled) collided = 1;
// Did the ball collide with the goal target?
integer goal = 0;
if ((target_name != ""

if ((target_key != ""

if (goal) {
collided = 0;
stop_SARSA();
llSay(0, "I found the goal!"

}
}
listen(integer channel, string name, key id, string message) {
if (message == "stop"

stop_SARSA();
llSay(0, "Learning mode OFF"

}
else if (message == "start"

start_SARSA();
llSay(0, "Learning mode ON"

}
else if (message == "unlearn"

stop_SARSA();
initQ();
llSay(0, "Memory cleaned"

}
else if (message == "qstates"

reportQ();
}
}
}