diff --git a/test/test-rl2.js b/test/test-rl2.js new file mode 100644 index 0000000..b3a35ed --- /dev/null +++ b/test/test-rl2.js @@ -0,0 +1,131 @@ +// Maze of Torment World +// Dynamic Programming (DP) + +var height=7,width=7,start=0; +var UPDATES=15 +// 0: free place, 1: start, 2: destination, -1: wall +var f=0,s=1,d=2,w=-1 +var maze = [ +[s,f,w,d,w,f,f], +[f,f,w,f,w,f,f], +[f,f,w,f,f,f,f], +[f,f,w,w,w,f,f], +[f,f,f,f,f,f,f], +[f,f,f,f,w,w,w], +[f,w,f,f,f,f,f], +] + +var states = [] +maze.forEach(function (row) { + states=states.concat(row) +}) +var rewards = states.map(function (s) { + return s==w?-1:(s==d?1:0) +}) + +var actions = ['left','right','up','down'] + +var env = {}; + +env.steps = 0; +env.iteration = 0; + +var way = [] +function reset (pr) { + if (pr) print(way.join('\n')) + way = maze.map(function (row) { + return row.map(function (col) { return col==s?1:(col==w?'w':0) })}) + env.steps=0; +} + +// required by learner +env.getNumStates = function() { return height*width; } +env.getMaxNumActions = function() { return actions.length; } +env.nextState = function(state,action,pr) { + var nx, ny, nextstate; + var x = env.stox(state); + var y = env.stoy(state); + switch (states[state]) { + case f: + case s: + // free place to move around + switch (action) { + case 'left' : nx=x-1; ny=y; break; + case 'right' : nx=x+1; ny=y; break; + case 'up' : ny=y-1; nx=x; break; + case 'down' : ny=y+1; nx=x; break; + } + nextstate = ny*width+nx; + way[ny][nx]=1; + env.steps++; + break; + case w: + // cliff! oh no! Should not happend - see below + // print('Back to start...') + nextstate=start; + reset() + env.iteration++; + break; + case d: + // agent wins! teleport to start + if (pr) print('['+env.iteration+'] Found destination !!!!!!! steps='+env.steps) + reset(pr) + nextstate=start; + env.iteration++; + break; + } +//print(state,action,nextstate) + return nextstate; +} +env.reward = function (state,action,nextstate) { + // reward of being in s, taking action a, and ending up in ns + var reward; + // If the destination was found, weight the reward with the number of steps + // return best reward for shortest path + if (states[state]==d) reward = rewards[state]; + else reward = rewards[state]; + return reward; +} +env.allowedActions = function(state) { + var x = env.stox(state), y = env.stoy(state); + var actions=[]; + if (x>0) actions.push('left'); + if (y>0) actions.push('up'); + if (x