// Maze of Torment World // Temporal Difference Learning (TD) var height=7,width=7,start=0; // 0: free place, 1: start, 2: destination, -1: wall var f=0,s=1,d=2,w=-1 var maze = [ [s,f,w,d,w,f,f], [f,f,w,f,w,f,f], [f,f,w,f,f,f,f], [f,f,w,w,w,f,f], [f,f,f,f,f,f,f], [f,f,f,f,w,w,w], [f,w,f,f,f,f,f], ] var states = [] maze.forEach(function (row) { states=states.concat(row) }) var way = [] function reset (pr) { if (pr) print(way.join('\n')) way = maze.map(function (row) { return row.map(function (col) { return col==s?1:(col==w?'w':0) })}) env.steps=0; } var actions = ['left','right','up','down'] var env = {}; env.steps = 0; env.iteration = 0; // required by learner env.getNumStates = function() { return height*width; } env.getMaxNumActions = function() { return actions.length; } env.nextState = function(state,action) { var nx, ny, nextstate; var x = env.stox(state); var y = env.stoy(state); switch (states[state]) { case f: case s: // free place to move around switch (action) { case 'left' : nx=x-1; ny=y; break; case 'right' : nx=x+1; ny=y; break; case 'up' : ny=y-1; nx=x; break; case 'down' : ny=y+1; nx=x; break; } nextstate = ny*width+nx; way[ny][nx]=1; env.steps++; break; case w: // cliff! oh no! Should not happend - see below // print('Back to start...') nextstate=start; reset(false) env.iteration++; break; case d: // agent wins! teleport to start print('['+env.iteration+'] Found destination !!!!!!! steps='+env.steps) reset(true); nextstate=start; env.iteration++; break; } //print(state,action,nextstate) return nextstate; } env.reward = function (state,action,nextstate) { // reward of being in s, taking action a, and ending up in ns var reward; // If the destination was found, weight the reward with the number of steps // return best reward for shortest path if (states[state]==d) reward = 1.0-(env.steps/100) else if (states[state]==w) reward = -1; else reward = 0; return reward; } env.allowedActions = function(state) { var x = env.stox(state), y = env.stoy(state); var actions=[]; if (x>0) actions.push('left'); if (y>0) actions.push('up'); if (x