Tue 27 Aug 00:14:56 CEST 2024
This commit is contained in:
		
							parent
							
								
									2bb30de385
								
							
						
					
					
						commit
						00ee12d73c
					
				
							
								
								
									
										171
									
								
								test/test-rl3.js
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										171
									
								
								test/test-rl3.js
									
									
									
									
									
										Normal file
									
								
							|  | @ -0,0 +1,171 @@ | |||
| // Maze of Torment World
 | ||||
| // Deep-Q Learning (DQN)
 | ||||
| 
 | ||||
| var height=7,width=7,start,dest; | ||||
| // 0: free place, 1: start, 2: destination, -1: wall
 | ||||
| var f=0,s=1,d=2,w=-1 | ||||
| var maze = [ | ||||
| [s,f,w,d,w,f,f],  | ||||
| [f,f,w,f,w,f,f],  | ||||
| [f,f,w,f,f,f,f],  | ||||
| [f,f,w,w,w,f,f],  | ||||
| [f,f,f,f,f,f,f],  | ||||
| [f,f,f,f,w,w,w],  | ||||
| [f,w,f,f,f,f,f],  | ||||
| ] | ||||
| 
 | ||||
| // world states
 | ||||
| var states = [] | ||||
| maze.forEach(function (row,j) { | ||||
|   states=states.concat(row) | ||||
|   row.forEach(function (cell,i) { | ||||
|     if (cell==s) start=i+j*width; | ||||
|     if (cell==d) dest={x:i,y:j} | ||||
|   }) | ||||
| }) | ||||
| 
 | ||||
| var way = [] | ||||
| function reset (pr) { | ||||
|   if (pr) print(way.join('\n')) | ||||
|   way = maze.map(function (row) {  | ||||
|     return row.map(function (col) { return col==s?1:(col==w?'w':0) })}) | ||||
|   env.steps=0; | ||||
|   env.good=0; | ||||
|   env.error=0; | ||||
|   env.iteration++; | ||||
| } | ||||
| var actions = ['left','right','up','down'] | ||||
| 
 | ||||
| // Agent sensor states (perception)
 | ||||
| // Distances {N,S,W,E} to boundaries and walls, distance
 | ||||
| var sensors = [0,0,0,0,0] | ||||
| 
 | ||||
| var env = {}; | ||||
| 
 | ||||
| env.steps = 0; | ||||
| env.iteration = 0; | ||||
| env.error = 0; | ||||
| env.good = 0; | ||||
| env.last = 0; | ||||
| 
 | ||||
| // required by learner
 | ||||
| env.getNumStates      = function() { return sensors.length /*!!*/ } | ||||
| env.getMaxNumActions  = function() { return actions.length; } | ||||
| 
 | ||||
| // internals
 | ||||
| env.nextState = function(state,action) { | ||||
|   var nx, ny, nextstate; | ||||
|   var x = env.stox(state); | ||||
|   var y = env.stoy(state); | ||||
|   // free place to move around
 | ||||
|   switch (action) { | ||||
|     case 'left'  : nx=x-1; ny=y; break; | ||||
|     case 'right' : nx=x+1; ny=y; break; | ||||
|     case 'up'    : ny=y-1; nx=x; break; | ||||
|     case 'down'  : ny=y+1; nx=x; break; | ||||
|   } | ||||
|   nextstate = env.xytos(nx,ny); | ||||
|   if (nx<0 || ny<0 || nx >= width || ny >= height || | ||||
|       states[nextstate]==w) { | ||||
|     nextstate=-1; | ||||
|     return nextstate; | ||||
|   } | ||||
|   way[ny][nx]=1; | ||||
|   env.steps++; | ||||
|   return nextstate; | ||||
| } | ||||
| env.reward = function (state,action,nextstate) { | ||||
|   // reward of being in s, taking action a, and ending up in ns
 | ||||
|   var reward; | ||||
|   var dist1=Math.sqrt(Math.pow(dest.x-env.stox(nextstate),2)+ | ||||
|                       Math.pow(dest.y-env.stoy(nextstate),2)) | ||||
|   var dist2=Math.sqrt(Math.pow(dest.x-env.stox(state),2)+ | ||||
|                       Math.pow(dest.y-env.stoy(state),2)) | ||||
|   if (nextstate==env.laststate) reward = -10; // avoid ping-pong
 | ||||
|   else if (nextstate==-1) reward = -100; // wall hit or outside world
 | ||||
|   else if (dist1 < 1) reward = 100-env.steps/10; // destination found
 | ||||
|   else reward = (dist1-dist2)<0?dist1/10:-dist1/10; // on the way
 | ||||
|   env.laststate=nextstate; | ||||
|   return reward; | ||||
| } | ||||
| 
 | ||||
| // Update sensors
 | ||||
| env.perception = function (state) { | ||||
|   var i, | ||||
|       dist=Math.sqrt(Math.pow(dest.x-env.stox(state),2)+ | ||||
|                      Math.pow(dest.y-env.stoy(state),2)), | ||||
|       x = env.stox(state), | ||||
|       y = env.stoy(state), | ||||
|       sensors = [0,0,0,0,dist]; // N S W E
 | ||||
|   // Distances to obstacles
 | ||||
|   for(i=y;i>0;i--) { if (states[env.xytos(x,i)]==w) break } | ||||
|   sensors[0]=y-i-1; | ||||
|   for(i=y;i<height;i++) { if (states[env.xytos(x,i)]==w) break } | ||||
|   sensors[1]=i-y-1; | ||||
|   for(i=x;i>0;i--) { if (states[env.xytos(i,y)]==w) break } | ||||
|   sensors[2]=x-i-1; | ||||
|   for(i=x;i<width;i++) { if (states[env.xytos(i,y)]==w) break } | ||||
|   sensors[3]=i-x-1; | ||||
|   return sensors | ||||
| } | ||||
| // utils
 | ||||
| env.stox = function (s)    { return s % width } | ||||
| env.stoy = function (s)    { return Math.floor(s / width) } | ||||
| env.xytos = function (x,y) { return x+y*width } | ||||
| 
 | ||||
| reset() | ||||
| 
 | ||||
| // create the DQN agent
 | ||||
| var model = ml.learn({ | ||||
|   algorithm   : ml.ML.RL, | ||||
|   kind        : ml.ML.DQNAgent, | ||||
|   actions     : actions, | ||||
|    | ||||
|   // specs
 | ||||
|   update : 'qlearn', // qlearn | sarsa
 | ||||
|   gamma : 0.9, // discount factor, [0, 1)
 | ||||
|   epsilon : 0.2, // initial epsilon for epsilon-greedy policy, [0, 1)
 | ||||
|   alpha : 0.005, // value function learning rate
 | ||||
|   experience_add_every : 5, // number of time steps before we add another experience to replay memory
 | ||||
|   experience_size : 10000, // size of experience
 | ||||
|   learning_steps_per_iteration : 5, | ||||
|   tderror_clamp : 1.0, // for robustness
 | ||||
|   num_hidden_units : 100, // number of neurons in hidden layer
 | ||||
|    | ||||
|   environment : env | ||||
| });  | ||||
| 
 | ||||
| print(model) | ||||
| print(toJSON(model).length+' Bytes') | ||||
| 
 | ||||
| var state = start;  // world state. upper left corner
 | ||||
| 
 | ||||
| // The agent searches the destination with random walk
 | ||||
| // If the the destination was found, it jumps back to the start
 | ||||
| later(1,function(task){ // start the learning loop
 | ||||
|   sensors = env.perception(state); | ||||
|   var action = ml.action(model,sensors); // s is a vector
 | ||||
|   //... execute action in environment and get the reward
 | ||||
|   var ns = env.nextState(state,action); | ||||
|   var reward = env.reward(state,action,ns) | ||||
|   if (states[ns]==d) { | ||||
|     // destination found
 | ||||
|     print('iteration='+env.iteration,', reward='+reward,' action: steps='+env.good,'error='+env.error+' tderror='+ | ||||
|           model.tderror) | ||||
|     ns=start; | ||||
|     reset(true); | ||||
|   } | ||||
|   if (ns==-1) env.error++; | ||||
|   else env.good++; | ||||
| // print(state,ns,sensors,reward)    
 | ||||
|   ml.update(model,reward) | ||||
|   state = ns==-1?state:ns | ||||
|   // state = ns==-1?start:ns
 | ||||
|   if (reward > 10) { | ||||
|     save('/tmp/rl.json',model); | ||||
|     print('continue with test-rl4.js ...') | ||||
|     kill(task); | ||||
|   } | ||||
|   return true | ||||
| }); | ||||
| 
 | ||||
		Loading…
	
		Reference in New Issue
	
	Block a user