Qualia  0.2
QLearningAgent.h
Go to the documentation of this file.
1 /*
2  * QLearningAgent.h
3  *
4  * An agent that behaves according to an action-value, TD-lambda reinforcement
5  * learning algorithm.
6  *
7  * This file is part of Qualia https://github.com/sofian/qualia
8  *
9  * (c) 2011 Sofian Audry -- info(@)sofianaudry(.)com
10  *
11  * This program is free software: you can redistribute it and/or modify
12  * it under the terms of the GNU General Public License as published by
13  * the Free Software Foundation, either version 3 of the License, or
14  * (at your option) any later version.
15  *
16  * This program is distributed in the hope that it will be useful,
17  * but WITHOUT ANY WARRANTY; without even the implied warranty of
18  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
19  * GNU General Public License for more details.
20  *
21  * You should have received a copy of the GNU General Public License
22  * along with this program. If not, see <http://www.gnu.org/licenses/>.
23  */
24 #ifndef QLEARNINGAGENT_H_
25 #define QLEARNINGAGENT_H_
26 
27 #include <qualia/core/Agent.h>
28 #include <qualia/util/random.h>
29 #include <qualia/rl/QFunction.h>
30 #include <qualia/rl/TDTrainer.h>
31 
32 #include "RLObservation.h"
33 #include "Policy.h"
34 
35 #include <string.h>
36 
42 class QLearningAgent : public Agent {
43 
44 public:
45  // Configurable parameters /////
46 
47  // NOTICE: These parameters can be changed during the course of learning, although the user must be
48  // aware of the consequences on learning (which might be severely hampered).
49 
51  bool isLearning;
52 
55 
58 
61 
62  // Internal use ////////////////
63 
64  // TODO: if we ever make subclasses of Action we will need to change this...
68 
69  // Parameters.
70 
71  // Shortcut values.
72  unsigned int observationDim; // == lastObservation.dim()
73 
74  // Interface ///////////////////
75 
76  // Constructor/destructor.
78  Policy* policy,
79  unsigned int observationDim,
80  ActionProperties* actionProperties,
81  float lambda, float gamma, bool offPolicy = false);
82  virtual ~QLearningAgent();
83 
84  // Public methods.
85  virtual void init();
86  virtual Action* start(const Observation* observation);
87  virtual Action* step(const Observation* observation);
88  virtual void end(const Observation* observation);
89 
90  virtual void save(XFile* file);
91  virtual void load(XFile* file);
92 };
93 
94 #endif /* QLEARNINGAGENT_H_ */