/**
 * This applet demonstrates a simple game. It isn't designed to be general or reusable.
<p>
 * Copyright (C) 2006  <A HREF="http://www.cs.ubc.ca/spider/poole/">David Poole</A>.
<p>
 * This program gives core of the simulation. The GUI is in <A HREF=SGameGUI.java">SGameGUI.java</A>.  The environemnt code is at <A HREF="SGameEnv.java">SGameEnv.java</A>. The controller is at <A HREF="SGameController.java">SGameController.java</A>.
<p>
 This program is free software; you can redistribute it and/or
 modify it under the terms of the GNU General Public License
 as published by the Free Software Foundation; either version 2
 of the License, or (at your option) any later version.
<p>
 This program is distributed in the hope that it will be useful,
 but WITHOUT ANY WARRANTY; without even the implied warranty of
 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 GNU General Public License for more details.
<p>
 You should have received a copy of the GNU General Public License
 along with this program; if not, write to the Free Software
 Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.


 * @author David Poole  poole@cs.ubc.ca
 * @version 0.41 2007-09-09 */

public class SGameAdvController extends SGameQController
{

	/**
	 * Construct a new controller with the given environment
	 */
	SGameAdvController() {
		environment = new SGameAdvEnv();
		title = "Q-learning with Adversary";
	}

	   /**
     * does one step.
     *
     * carries out the action in the environment. This may be a place
     * to record what the agent has learned from its experience.
     *
     <p>
     The actions are
     <ul>
     <li> 0 is up
     <li> 1 is right
     <li> 2 is down
     <li> 3 is left
     </ul>
     * @param action  the action that the agent does
     */
    public void dostep(int action)  { 
        int oldX = environment.currX;
	int oldY = environment.currY;
	int oldPrize = environment.prize;
	boolean oldDamaged = environment.damaged;
	int oldState = state(environment.currX,environment.currY,environment.prize,environment.damaged);

    // adversary chooses minimum Q value
	int adversary = 0;
	double minVal = qvalues[state(oldX,oldY,adversary,oldDamaged)][action];
	for (int i=1; i<4;i++)
	    if( qvalues[state(oldX,oldY,i,oldDamaged)][action] < minVal)
		{   adversary = i;
		    minVal = qvalues[state(oldX,oldY,i,oldDamaged)][action];
		}
	double reward = ((SGameAdvEnv)environment).dostep(action,adversary);

	int newState = state(environment.currX,environment.currY,environment.prize,environment.damaged);

	double newVal= value(newState);
	double newDatum=reward+discount*newVal;
	visits[oldState][action]++;
	if (!alphaFixed)
	    alpha = 1.0/visits[oldState][action];
		
	qvalues[oldState][action]=
	    (1-alpha) * qvalues[oldState][action] 
	    + alpha*newDatum;
    }


}

