// Maze of Torment World // Deep-Q Learning (DQN) var height=7,width=7,start,dest; // 0: free place, 1: start, 2: destination, -1: wall var f=0,s=1,d=2,w=-1 var maze = [ [s,f,w,d,w,f,f], [f,f,w,f,w,f,f], [f,f,w,f,f,f,f], [f,f,w,w,w,f,f], [f,f,f,f,f,f,f], [f,f,f,f,w,w,w], [f,w,f,f,f,f,f], ] // world states var states = [] maze.forEach(function (row,j) { states=states.concat(row) row.forEach(function (cell,i) { if (cell==s) start=i+j*width; if (cell==d) dest={x:i,y:j} }) }) var way = [] function reset (pr) { if (pr) print(way.join('\n')) way = maze.map(function (row) { return row.map(function (col) { return col==s?1:(col==w?'w':0) })}) env.steps=0; env.good=0; env.error=0; env.iteration++; } var actions = ['left','right','up','down'] // Agent sensor states (perception) // Distances {N,S,W,E} to boundaries and walls, distance var sensors = [0,0,0,0,0] var env = {}; env.steps = 0; env.iteration = 0; env.error = 0; env.good = 0; env.last = 0; // required by learner env.getNumStates = function() { return sensors.length /*!!*/ } env.getMaxNumActions = function() { return actions.length; } // internals env.nextState = function(state,action) { var nx, ny, nextstate; var x = env.stox(state); var y = env.stoy(state); // free place to move around switch (action) { case 'left' : nx=x-1; ny=y; break; case 'right' : nx=x+1; ny=y; break; case 'up' : ny=y-1; nx=x; break; case 'down' : ny=y+1; nx=x; break; } nextstate = env.xytos(nx,ny); if (nx<0 || ny<0 || nx >= width || ny >= height || states[nextstate]==w) { nextstate=-1; return nextstate; } way[ny][nx]=1; env.steps++; return nextstate; } env.reward = function (state,action,nextstate) { // reward of being in s, taking action a, and ending up in ns var reward; var dist1=Math.sqrt(Math.pow(dest.x-env.stox(nextstate),2)+ Math.pow(dest.y-env.stoy(nextstate),2)) var dist2=Math.sqrt(Math.pow(dest.x-env.stox(state),2)+ Math.pow(dest.y-env.stoy(state),2)) if (nextstate==env.laststate) reward = -10; // avoid ping-pong else if (nextstate==-1) reward = -100; // wall hit or outside world else if (dist1 < 1) reward = 100-env.steps/10; // destination found else reward = (dist1-dist2)<0?dist1/10:-dist1/10; // on the way env.laststate=nextstate; return reward; } // Update sensors env.perception = function (state) { var i, dist=Math.sqrt(Math.pow(dest.x-env.stox(state),2)+ Math.pow(dest.y-env.stoy(state),2)), x = env.stox(state), y = env.stoy(state), sensors = [0,0,0,0,dist]; // N S W E // Distances to obstacles for(i=y;i>0;i--) { if (states[env.xytos(x,i)]==w) break } sensors[0]=y-i-1; for(i=y;i0;i--) { if (states[env.xytos(i,y)]==w) break } sensors[2]=x-i-1; for(i=x;i