
/**
 * This applet demonstrates a simple game. It isn't designed to be general or reusable.
<p>
 * Copyright (C) 2006  <A HREF="http://www.cs.ubc.ca/spider/poole/">David Poole</A>.
<p>
 * This program gives core of the simulation. The GUI is in <A HREF=SGameGUI.java">SGameGUI.java</A>. The environemnt code is at <A HREF="SGameEnv.java">SGameEnv.java</A>. The controller is at <A HREF="SGameController.java">SGameController.java</A>.
<p>
 This program is free software; you can redistribute it and/or
 modify it under the terms of the GNU General Public License
 as published by the Free Software Foundation; either version 2
 of the License, or (at your option) any later version.
<p>
 This program is distributed in the hope that it will be useful,
 but WITHOUT ANY WARRANTY; without even the implied warranty of
 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 GNU General Public License for more details.
<p>
 You should have received a copy of the GNU General Public License
 along with this program; if not, write to the Free Software
 Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.


 * @author David Poole  poole@cs.ubc.ca
 * @version 0.4 2006-12-04 */

public class SGameEnv
{
    public final int xDim = 5;
    public final int yDim = 5;  //these should be the same!!! 
                         //(Exercise: fix this restriction (look at SGameGUI too))
    public int numberOfSteps=0;
    public double totalReward=0.0;

    public double minReward=0.0;
    public int minStep=0;
    public int zeroCrossing=0;

    public int currX = (int) (Math.random() * xDim);  // current X position
    public int currY = (int) (Math.random() * yDim);  // current Y position
    public boolean tracing=false;

    // Monsters
    public boolean m21 = Math.random() < 0.5;
    public boolean m42 = Math.random() < 0.5;
    public boolean m03 = Math.random() < 0.5;
    public boolean m13 = Math.random() < 0.5;
    public boolean m33 = Math.random() < 0.5;

    double m21appearsProb = 0.4;
    double m42appearsProb = 0.4;
    double m03appearsProb = 0.4;
    double m13appearsProb = 0.4;
    double m33appearsProb = 0.4;

    /** Prize Location.
	<ul>
	<li> 4 means there is no prize
	<li> 0 means prize is at the top left
	<li> 1 means the prize is at the top right
	<li> 2 means the prize is ar the bottom left
	<li> 3 means the prize is at the bottom right
	</ul>
    */
    public int prize = 4;  // 4 means no prize
    double prizeAppearsProb = 0.3;
    double prizeReward = 10;

    // Damage
    public boolean damaged = false;
    
    double rewardMonsterWhenDamaged = -10;

    // Rewards
    public double crashReward = -1.0;



    /**
     * resets the number of steps and the reward.
     */
    public void doreset()
    {     
	numberOfSteps=0;
	totalReward=0.0;
	minReward=0.0;
	minStep=0;
	zeroCrossing=0;
	damaged = false;
    }

    /**
     * does one step.
     *
     <p>
     The actions are
     <ul>
     <li> 0 is up
     <li> 1 is right
     <li> 2 is down
     <li> 3 is left
     </ul>
     * @param action  the action that the agent does
     * @return reward
     */
    public double dostep(int action)  { 
	int actualDirection;
	double reward = 0.0;
        int newX,newY;

	// Determine monster appearances
	
	m21 = Math.random() < m21appearsProb;
	m42 = Math.random() < m42appearsProb;
	m03 = Math.random() < m03appearsProb;
	m13 = Math.random() < m13appearsProb;
	m33 = Math.random() < m33appearsProb;

	// Determine if prize appears

	if (prize== 4  // no prize was previously present
	    && Math.random()< prizeAppearsProb)  // a prize appears
	    prize = (int) (Math.random() * 4);   // the corner is chosen

	// determine actual direction
	int rand = (int) (Math.random() * 10); 
	if (rand < 4) actualDirection=rand;
	else actualDirection=action;
	

	// Determine where the agent ends up (plus crash reward)

	if (actualDirection==1   // going right
	    && ( (currY==0 && currX<2) || (currY==1 && currX==0))) 
	    // hit internal wall
	    {
 		reward=crashReward;
		newX = currX;
		newY = currY;
	    }
	else if (actualDirection==3   // going left
	    && ( (currX<3 && currY==0) || (currX==1 && currY==1))) 
	    // hit internal wall (or going left from 0,0)
	    {
 		reward=crashReward;
		newX = currX;
		newY = currY;
	    }
	else 
	    {
		switch (actualDirection) {
		case 0: // Up
		    if (currY==0) {
			newY=currY;
			newX=currX;
			reward=crashReward;}
		    else {
			newY=currY-1;
			newX=currX;}
		    break;
		case 1: // Right
		    if (currX==xDim-1) {
			newY=currY;
			newX=currX;
			reward=crashReward;}
		    else {
			newY=currY;
			newX=currX+1;}
		    break;
		case 2: // Down
		    if (currY==yDim-1) {
			newY=currY;
			newX=currX;
			reward=crashReward;}
		    else {
			newY=currY+1;
			newX=currX;}
		    break;
		case 3: // Left
		    if (currX==0) {
			newY=currY;
			newX=currX;
			reward=crashReward;}
		    else {
			newY=currY;
			newX=currX-1;}
		    break;
		default:   // should never occur
		    {
			newX=0;
			newY=0;
			reward=0.0;
		    }
		}
	    }

	// Determine if monster got the agent
	if ((newX==2 && newY==1 && m21) ||
	    (newX==4 && newY==2 && m42) ||
	    (newX==0 && newY==3 && m03) ||
	    (newX==1 && newY==3 && m13) ||
	    (newX==3 && newY==3 && m33))
	    {
		if (damaged)
		    reward += rewardMonsterWhenDamaged;
		else
		    damaged = true;
	    }

	// Determine if agent gets repaired
	if (damaged && newX==1 && newY==0)
	    damaged = false;

	// Determine if agent gets the prize
	if (prize < 4 &&
	    newX==(prize%2)*(xDim-1) &&
	    newY==(prize/2)*(yDim-1))
	    {
		reward += prizeReward;
		prize = 4;
	    }

	numberOfSteps++;
	totalReward+= reward;
	if (totalReward < minReward)
	    {
		minReward=totalReward;
		minStep=numberOfSteps;
	    }
	if (totalReward>0 && reward>totalReward)
	    zeroCrossing=numberOfSteps;

	if (tracing && numberOfSteps%100==0) 
 	    System.out.println(numberOfSteps+"  "+totalReward);

	currX=newX;
	currY=newY;
	return reward;
    }

}
