Qualia  0.2
TDTrainer.h
Go to the documentation of this file.
1 /*
2  * TDTrainer.h
3  *
4  * (c) 2013 Sofian Audry -- info(@)sofianaudry(.)com
5  *
6  * This program is free software: you can redistribute it and/or modify
7  * it under the terms of the GNU General Public License as published by
8  * the Free Software Foundation, either version 3 of the License, or
9  * (at your option) any later version.
10  *
11  * This program is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14  * GNU General Public License for more details.
15  *
16  * You should have received a copy of the GNU General Public License
17  * along with this program. If not, see <http://www.gnu.org/licenses/>.
18  */
19 
20 #ifndef TDTRAINER_H_
21 #define TDTRAINER_H_
22 
23 #include <qualia/core/common.h>
24 #include <qualia/core/Action.h>
26 #include <qualia/rl/QFunction.h>
28 
32 class TDTrainer : public Trainer {
33 public:
36 
37  // Configurable parameters /////
38 
39  // NOTICE: These parameters can be changed during the course of learning, although the user must be
40  // aware of the consequences on learning (which might be severely hampered).
41 
47  float gamma;
48 
55  float lambda;
56 
64  bool offPolicy;
65 
67  unsigned int observationDim;
68  unsigned int actionDim;
69 
70  TDTrainer(QFunction* qFunction,
71  unsigned int observationDim, ActionProperties* actionProperties,
72  float lambda, float gamma, bool offPolicy=false);
73  virtual ~TDTrainer();
74 
75  virtual void init();
76 
78  virtual void step(const RLObservation* lastObservation, const Action* lastAction,
79  const RLObservation* observation, const Action* action);
80 };
81 
82 #endif /* TDTRAINER_H_ */