"""Simple Reflex Agent -- That learns! Your tasks: (1) Change the reward method of the Environment class to reflect what you think the reward structure should be. (2) Change the learn method of the ReflexAgent class to learn from the percept/action/reward cycle. (3) a piece of software engineering: change perceive of the ReflexAgent class to have the percept list *not* grow without bound. In the interest of getting this out quickly, I haven't done extensive testing. Have fun with this. Please work on your own, except for questions about Python. I'll grade this on a mild curve -- if you do the bare mininum, and it all works, you'll probably get a B, where 'minimum' is defined by the what working programs I do get turned in. """ __author__ = "Will Fitzgerald (wfitzg@kzoo.edu)" __version__ = "$Revision: 1.1$" __date__ = "$Date: 2003/01/05 $" __copyright__ = "Copyright (c) 2003 Will Fitzgerald" __license__ = "Python" import random class ActionTable(dict): pass ## -- replace the reward method with your own reward system ... class Environment(object): def reward(self,agent,percept,action): print "Rewarding ",agent,percept,action return 1 class ReflexAgent(object): def flee(self,enemy): print "I'm fleeing",enemy def fight(self,enemy): print "I'm fighting",enemy def feed(self,food): print "I'm eating",food def reproduce(self,mate): print "kiss, kiss" def __init__(self,environment=Environment()): self.__actionTable = ActionTable() self.__actionList = [self.flee, self.fight, self.feed, self.reproduce] self.__perceptList = [] self.__environment = environment # -- accessors / learned actions def getLearnedActionFor(self,percept): print self.__actionTable.get(percept,None) return self.__actionTable.get(percept,None) def setLearnedActionFor(self,percept,action): self.__actionTable.self[percept]=action learnedActionFor=property(getLearnedActionFor,setLearnedActionFor) # -- accessors / action list def getActionList(self): return self.__actionList def setActionList(self,actionList): self.__actionList = actionList actionList = property(getActionList,setActionList) # -- accessors / percept list def getPerceptList(self): return self.__perceptList def setPerceptList(self,perceptList): self.__perceptList = perceptList perceptList = property(getPerceptList,setPerceptList) # -- accessors / environment def getEnv(self): return self.__environment def setEnv(self,environment): self.__environment = environment environment = property(getEnv,setEnv) # -- peceiving just adds to the percept list. Change this so that the list # -- doesn't get too long ... def perceive(self,percept): self.perceptList.append(percept) # danger, will robinson: percept list grows w.o bound ## -- you might want to change the action selection part of this method. def react(self,percept): self.perceive(percept) action = self.getLearnedActionFor(percept) if action==None: action = self.random_action() action(percept) self.perceive(action) env = self.environment reward = self.environment.reward(self,percept,action) self.perceive(reward) self.learn() def random_action(self): act = self.actionList[random.randrange(len(self.actionList))] print "Randomly chose",act return act ## -- here's where learning takes place ... current learns everything ... def learn(self): print "Last three percepts: ", self.perceptList[-3:] self.__actionTable["bigenemy"] = self.flee self.__actionTable["smallenemy"] = self.fight self.__actionTable["food"] = self.feed self.__actionTable["mate"] = self.reproduce