"""Simple Reflex Agent -- That learns!

Your tasks:

(1) Change the reward method of the Environment class to reflect
    what you think the reward structure should be.

(2) Change the learn method of the ReflexAgent class to learn from
    the percept/action/reward cycle.

(3) a piece of software engineering: change perceive of the ReflexAgent
    class to have the percept list *not* grow without bound.

In the interest of getting this out quickly, I haven't done extensive
testing.

Have fun with this. Please work on your own, except for questions about
Python. I'll grade this on a mild curve -- if you do the bare mininum, and it
all works, you'll probably get a B, where 'minimum' is defined by the
what working programs I do get turned in.
"""

__author__ = "Will Fitzgerald (wfitzg@kzoo.edu)"
__version__ = "$Revision: 1.1$"
__date__ = "$Date: 2003/01/05 $"
__copyright__ = "Copyright (c) 2003 Will Fitzgerald"
__license__ = "Python"

import random

class ActionTable(dict):
    pass

## -- replace the reward method with your own reward system ...
class Environment(object):
    def reward(self,agent,percept,action):
        print "Rewarding ",agent,percept,action
        return 1
    
class ReflexAgent(object):
    
    def flee(self,enemy):
        print "I'm fleeing",enemy
    def fight(self,enemy):
        print "I'm fighting",enemy
    def feed(self,food):
        print "I'm eating",food
    def reproduce(self,mate):
        print "kiss, kiss"

    def __init__(self,environment=Environment()):
        self.__actionTable = ActionTable()
        self.__actionList = [self.flee, self.fight, self.feed, self.reproduce]
        self.__perceptList = []
        self.__environment = environment
        
    # -- accessors / learned actions
    def getLearnedActionFor(self,percept):
        print self.__actionTable.get(percept,None)
        return self.__actionTable.get(percept,None)
    def setLearnedActionFor(self,percept,action):
        self.__actionTable.self[percept]=action
    learnedActionFor=property(getLearnedActionFor,setLearnedActionFor)
    # -- accessors / action list
    def getActionList(self):
        return self.__actionList
    def setActionList(self,actionList):
        self.__actionList = actionList
    actionList = property(getActionList,setActionList)
    # -- accessors / percept list
    def getPerceptList(self):
        return self.__perceptList
    def setPerceptList(self,perceptList):
        self.__perceptList = perceptList
    perceptList = property(getPerceptList,setPerceptList)
    # -- accessors / environment
    def getEnv(self):
        return self.__environment
    def setEnv(self,environment):
        self.__environment = environment
    environment = property(getEnv,setEnv)

    # -- peceiving just adds to the percept list. Change this so that the list
    # -- doesn't get too long ...
    def perceive(self,percept):
        self.perceptList.append(percept) # danger, will robinson: percept list grows w.o bound

    ## -- you might want to change the action selection part of this method.
    def react(self,percept):
        self.perceive(percept)
        action = self.getLearnedActionFor(percept)
        if action==None:
            action = self.random_action()
        action(percept)
        self.perceive(action)
        env = self.environment
        reward = self.environment.reward(self,percept,action)
        self.perceive(reward)
        self.learn()
            
    def random_action(self):
        act = self.actionList[random.randrange(len(self.actionList))]
        print "Randomly chose",act
        return act

    ## -- here's where learning takes place ... current learns everything ...
    def learn(self):
        print "Last three percepts: ", self.perceptList[-3:]
        self.__actionTable["bigenemy"] = self.flee
        self.__actionTable["smallenemy"] = self.fight
        self.__actionTable["food"]  = self.feed
        self.__actionTable["mate"] = self.reproduce