From a6e752e33c7977cbc86d1cb73d5a38e531373389 Mon Sep 17 00:00:00 2001 From: sbosse Date: Tue, 27 Aug 2024 00:15:44 +0200 Subject: [PATCH] Tue 27 Aug 00:14:56 CEST 2024 --- test/test-rl1.js | 134 +++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 134 insertions(+) create mode 100644 test/test-rl1.js diff --git a/test/test-rl1.js b/test/test-rl1.js new file mode 100644 index 0000000..f2c4fd8 --- /dev/null +++ b/test/test-rl1.js @@ -0,0 +1,134 @@ +// Maze of Torment World +// Temporal Difference Learning (TD) + +var height=7,width=7,start=0; +// 0: free place, 1: start, 2: destination, -1: wall +var f=0,s=1,d=2,w=-1 +var maze = [ +[s,f,w,d,w,f,f], +[f,f,w,f,w,f,f], +[f,f,w,f,f,f,f], +[f,f,w,w,w,f,f], +[f,f,f,f,f,f,f], +[f,f,f,f,w,w,w], +[f,w,f,f,f,f,f], +] + +var states = [] +maze.forEach(function (row) { + states=states.concat(row) +}) + +var way = [] +function reset (pr) { + if (pr) print(way.join('\n')) + way = maze.map(function (row) { + return row.map(function (col) { return col==s?1:(col==w?'w':0) })}) + env.steps=0; +} +var actions = ['left','right','up','down'] + +var env = {}; + +env.steps = 0; +env.iteration = 0; + +// required by learner +env.getNumStates = function() { return height*width; } +env.getMaxNumActions = function() { return actions.length; } +env.nextState = function(state,action) { + var nx, ny, nextstate; + var x = env.stox(state); + var y = env.stoy(state); + switch (states[state]) { + case f: + case s: + // free place to move around + switch (action) { + case 'left' : nx=x-1; ny=y; break; + case 'right' : nx=x+1; ny=y; break; + case 'up' : ny=y-1; nx=x; break; + case 'down' : ny=y+1; nx=x; break; + } + nextstate = ny*width+nx; + way[ny][nx]=1; + env.steps++; + break; + case w: + // cliff! oh no! Should not happend - see below + // print('Back to start...') + nextstate=start; + reset(false) + env.iteration++; + break; + case d: + // agent wins! teleport to start + print('['+env.iteration+'] Found destination !!!!!!! steps='+env.steps) + reset(true); + nextstate=start; + env.iteration++; + break; + } +//print(state,action,nextstate) + return nextstate; +} +env.reward = function (state,action,nextstate) { + // reward of being in s, taking action a, and ending up in ns + var reward; + // If the destination was found, weight the reward with the number of steps + // return best reward for shortest path + if (states[state]==d) reward = 1.0-(env.steps/100) + else if (states[state]==w) reward = -1; + else reward = 0; + return reward; +} +env.allowedActions = function(state) { + var x = env.stox(state), y = env.stoy(state); + var actions=[]; + if (x>0) actions.push('left'); + if (y>0) actions.push('up'); + if (x