/** * This applet demonstrates a simple game. It isn't designed to be general or reusable.

* This program gives core of the simulation. The GUI is in SGameGUI.java. The environemnt code is at SGameEnv.java. The controller is at SGameController.java.

This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version.

This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.

You should have received a copy of the GNU General Public License along with this program; if not, write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. * @author David Poole poole@cs.ubc.ca * @version 0.41 2007-09-09 */ public class SGameAdvController extends SGameQController { /** * Construct a new controller with the given environment */ SGameAdvController() { environment = new SGameAdvEnv(); title = "Q-learning with Adversary"; } /** * does one step. * * carries out the action in the environment. This may be a place * to record what the agent has learned from its experience. *

The actions are

0 is up
1 is right
2 is down
3 is left

* @param action the action that the agent does */ public void dostep(int action) { int oldX = environment.currX; int oldY = environment.currY; int oldPrize = environment.prize; boolean oldDamaged = environment.damaged; int oldState = state(environment.currX,environment.currY,environment.prize,environment.damaged); // adversary chooses minimum Q value int adversary = 0; double minVal = qvalues[state(oldX,oldY,adversary,oldDamaged)][action]; for (int i=1; i<4;i++) if( qvalues[state(oldX,oldY,i,oldDamaged)][action] < minVal) { adversary = i; minVal = qvalues[state(oldX,oldY,i,oldDamaged)][action]; } double reward = ((SGameAdvEnv)environment).dostep(action,adversary); int newState = state(environment.currX,environment.currY,environment.prize,environment.damaged); double newVal= value(newState); double newDatum=reward+discount*newVal; visits[oldState][action]++; if (!alphaFixed) alpha = 1.0/visits[oldState][action]; qvalues[oldState][action]= (1-alpha) * qvalues[oldState][action] + alpha*newDatum; } }