132 lines
3.2 KiB
JavaScript
132 lines
3.2 KiB
JavaScript
|
// Maze of Torment World
|
||
|
// Dynamic Programming (DP)
|
||
|
|
||
|
var height=7,width=7,start=0;
|
||
|
var UPDATES=15
|
||
|
// 0: free place, 1: start, 2: destination, -1: wall
|
||
|
var f=0,s=1,d=2,w=-1
|
||
|
var maze = [
|
||
|
[s,f,w,d,w,f,f],
|
||
|
[f,f,w,f,w,f,f],
|
||
|
[f,f,w,f,f,f,f],
|
||
|
[f,f,w,w,w,f,f],
|
||
|
[f,f,f,f,f,f,f],
|
||
|
[f,f,f,f,w,w,w],
|
||
|
[f,w,f,f,f,f,f],
|
||
|
]
|
||
|
|
||
|
var states = []
|
||
|
maze.forEach(function (row) {
|
||
|
states=states.concat(row)
|
||
|
})
|
||
|
var rewards = states.map(function (s) {
|
||
|
return s==w?-1:(s==d?1:0)
|
||
|
})
|
||
|
|
||
|
var actions = ['left','right','up','down']
|
||
|
|
||
|
var env = {};
|
||
|
|
||
|
env.steps = 0;
|
||
|
env.iteration = 0;
|
||
|
|
||
|
var way = []
|
||
|
function reset (pr) {
|
||
|
if (pr) print(way.join('\n'))
|
||
|
way = maze.map(function (row) {
|
||
|
return row.map(function (col) { return col==s?1:(col==w?'w':0) })})
|
||
|
env.steps=0;
|
||
|
}
|
||
|
|
||
|
// required by learner
|
||
|
env.getNumStates = function() { return height*width; }
|
||
|
env.getMaxNumActions = function() { return actions.length; }
|
||
|
env.nextState = function(state,action,pr) {
|
||
|
var nx, ny, nextstate;
|
||
|
var x = env.stox(state);
|
||
|
var y = env.stoy(state);
|
||
|
switch (states[state]) {
|
||
|
case f:
|
||
|
case s:
|
||
|
// free place to move around
|
||
|
switch (action) {
|
||
|
case 'left' : nx=x-1; ny=y; break;
|
||
|
case 'right' : nx=x+1; ny=y; break;
|
||
|
case 'up' : ny=y-1; nx=x; break;
|
||
|
case 'down' : ny=y+1; nx=x; break;
|
||
|
}
|
||
|
nextstate = ny*width+nx;
|
||
|
way[ny][nx]=1;
|
||
|
env.steps++;
|
||
|
break;
|
||
|
case w:
|
||
|
// cliff! oh no! Should not happend - see below
|
||
|
// print('Back to start...')
|
||
|
nextstate=start;
|
||
|
reset()
|
||
|
env.iteration++;
|
||
|
break;
|
||
|
case d:
|
||
|
// agent wins! teleport to start
|
||
|
if (pr) print('['+env.iteration+'] Found destination !!!!!!! steps='+env.steps)
|
||
|
reset(pr)
|
||
|
nextstate=start;
|
||
|
env.iteration++;
|
||
|
break;
|
||
|
}
|
||
|
//print(state,action,nextstate)
|
||
|
return nextstate;
|
||
|
}
|
||
|
env.reward = function (state,action,nextstate) {
|
||
|
// reward of being in s, taking action a, and ending up in ns
|
||
|
var reward;
|
||
|
// If the destination was found, weight the reward with the number of steps
|
||
|
// return best reward for shortest path
|
||
|
if (states[state]==d) reward = rewards[state];
|
||
|
else reward = rewards[state];
|
||
|
return reward;
|
||
|
}
|
||
|
env.allowedActions = function(state) {
|
||
|
var x = env.stox(state), y = env.stoy(state);
|
||
|
var actions=[];
|
||
|
if (x>0) actions.push('left');
|
||
|
if (y>0) actions.push('up');
|
||
|
if (x<width-1) actions.push('right');
|
||
|
if (y<height-1) actions.push('down');
|
||
|
return actions
|
||
|
}
|
||
|
|
||
|
// utils
|
||
|
env.stox = function (s) { return s % width }
|
||
|
env.stoy = function (s) { return Math.floor(s / width) }
|
||
|
|
||
|
|
||
|
// create the DQN agent
|
||
|
var model = ml.learn({
|
||
|
algorithm : ml.ML.RL,
|
||
|
kind : ml.ML.DPAgent,
|
||
|
actions : actions,
|
||
|
|
||
|
gamma : 0.9, // discount factor, [0, 1)
|
||
|
|
||
|
environment : env
|
||
|
});
|
||
|
|
||
|
print(model)
|
||
|
print(toJSON(model).length+' Bytes')
|
||
|
|
||
|
reset()
|
||
|
var state = start; // uppel left corner
|
||
|
for(var i=0;i<UPDATES;i++) ml.update(model)
|
||
|
print('Required '+env.iteration+' iterations')
|
||
|
reset()
|
||
|
var timer = setInterval(function(){ // start the learning loop
|
||
|
var action = ml.action(model,state); // s is an integer
|
||
|
//... execute action in environment and get the reward
|
||
|
// print(state,action,states[state])
|
||
|
var ns = env.nextState(state,action,true);
|
||
|
//var reward = env.reward(ns)-0.01
|
||
|
//ml.update(model)
|
||
|
state = ns
|
||
|
}, 100);
|