Qualia  0.2
QFunction.h
Go to the documentation of this file.
1 /*
2  * QFunction.h
3  *
4  * (c) 2013 Sofian Audry -- info(@)sofianaudry(.)com
5  *
6  * This program is free software: you can redistribute it and/or modify
7  * it under the terms of the GNU General Public License as published by
8  * the Free Software Foundation, either version 3 of the License, or
9  * (at your option) any later version.
10  *
11  * This program is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14  * GNU General Public License for more details.
15  *
16  * You should have received a copy of the GNU General Public License
17  * along with this program. If not, see <http://www.gnu.org/licenses/>.
18  */
19 
20 #ifndef QFUNCTION_H_
21 #define QFUNCTION_H_
22 
23 #include <qualia/core/common.h>
24 #include <qualia/core/Action.h>
27 
28 #include <qualia/util/map.h>
29 
30 class QFunction: public GradientFunction {
31 public:
32  GradientFunction* function;
33 
34  // Buffer for the neural network inputs.
35  // TODO: possibly change (a bit inefficient memory).
37 
38  unsigned int observationDim;
40 
42  virtual ~QFunction();
43 
44  virtual void init() { function->init(); }
45  virtual unsigned int nInputs() const { return function->nInputs(); }
46  virtual unsigned int nOutputs() const { return function->nOutputs(); }
47  virtual unsigned int nParams() const { return function->nParams(); }
48 
49  virtual void setInput(int i, real x) { function->setInput(i, x); }
50  virtual real getOutput(int i) const { return function->getOutput(i); }
51 
52  virtual void propagate() { function->propagate(); }
53  virtual void backpropagate(real* outputError) { function->backpropagate(outputError); }
54  virtual void update() { function->update(); }
55 
56  virtual void save(XFile* file) { function->save(file); }
57  virtual void load(XFile* file) { function->load(file); }
58 
59  // Returns Q(s,a) ie. the state-action value function (calls the approximator function).
60  real getValue(const Observation* observation, const Action* action);
61 
62  // Computes dst = argmax_a Q(observation, a) and (option) maxQ = max_a Q(observation, a).
63  void getMaxAction(Action* dst, const Observation* observation, real *maxQ = 0);
64 };
65 
66 #endif /* QFUNCTION_H_ */