diff --git a/test/test-rl3.js b/test/test-rl3.js new file mode 100644 index 0000000..ac914d9 --- /dev/null +++ b/test/test-rl3.js @@ -0,0 +1,171 @@ +// Maze of Torment World +// Deep-Q Learning (DQN) + +var height=7,width=7,start,dest; +// 0: free place, 1: start, 2: destination, -1: wall +var f=0,s=1,d=2,w=-1 +var maze = [ +[s,f,w,d,w,f,f], +[f,f,w,f,w,f,f], +[f,f,w,f,f,f,f], +[f,f,w,w,w,f,f], +[f,f,f,f,f,f,f], +[f,f,f,f,w,w,w], +[f,w,f,f,f,f,f], +] + +// world states +var states = [] +maze.forEach(function (row,j) { + states=states.concat(row) + row.forEach(function (cell,i) { + if (cell==s) start=i+j*width; + if (cell==d) dest={x:i,y:j} + }) +}) + +var way = [] +function reset (pr) { + if (pr) print(way.join('\n')) + way = maze.map(function (row) { + return row.map(function (col) { return col==s?1:(col==w?'w':0) })}) + env.steps=0; + env.good=0; + env.error=0; + env.iteration++; +} +var actions = ['left','right','up','down'] + +// Agent sensor states (perception) +// Distances {N,S,W,E} to boundaries and walls, distance +var sensors = [0,0,0,0,0] + +var env = {}; + +env.steps = 0; +env.iteration = 0; +env.error = 0; +env.good = 0; +env.last = 0; + +// required by learner +env.getNumStates = function() { return sensors.length /*!!*/ } +env.getMaxNumActions = function() { return actions.length; } + +// internals +env.nextState = function(state,action) { + var nx, ny, nextstate; + var x = env.stox(state); + var y = env.stoy(state); + // free place to move around + switch (action) { + case 'left' : nx=x-1; ny=y; break; + case 'right' : nx=x+1; ny=y; break; + case 'up' : ny=y-1; nx=x; break; + case 'down' : ny=y+1; nx=x; break; + } + nextstate = env.xytos(nx,ny); + if (nx<0 || ny<0 || nx >= width || ny >= height || + states[nextstate]==w) { + nextstate=-1; + return nextstate; + } + way[ny][nx]=1; + env.steps++; + return nextstate; +} +env.reward = function (state,action,nextstate) { + // reward of being in s, taking action a, and ending up in ns + var reward; + var dist1=Math.sqrt(Math.pow(dest.x-env.stox(nextstate),2)+ + Math.pow(dest.y-env.stoy(nextstate),2)) + var dist2=Math.sqrt(Math.pow(dest.x-env.stox(state),2)+ + Math.pow(dest.y-env.stoy(state),2)) + if (nextstate==env.laststate) reward = -10; // avoid ping-pong + else if (nextstate==-1) reward = -100; // wall hit or outside world + else if (dist1 < 1) reward = 100-env.steps/10; // destination found + else reward = (dist1-dist2)<0?dist1/10:-dist1/10; // on the way + env.laststate=nextstate; + return reward; +} + +// Update sensors +env.perception = function (state) { + var i, + dist=Math.sqrt(Math.pow(dest.x-env.stox(state),2)+ + Math.pow(dest.y-env.stoy(state),2)), + x = env.stox(state), + y = env.stoy(state), + sensors = [0,0,0,0,dist]; // N S W E + // Distances to obstacles + for(i=y;i>0;i--) { if (states[env.xytos(x,i)]==w) break } + sensors[0]=y-i-1; + for(i=y;i0;i--) { if (states[env.xytos(i,y)]==w) break } + sensors[2]=x-i-1; + for(i=x;i