Logo ROOT   6.14/05
Reference Guide
LossFunction.h
Go to the documentation of this file.
1 // @(#)root/tmva $Id$
2 // Author: Andreas Hoecker, Peter Speckmayer, Joerg Stelzer, Helge Voss, Jan Therhaag
3 
4 /**********************************************************************************
5  * Project: TMVA - a Root-integrated toolkit for multivariate data analysis *
6  * Package: TMVA *
7  * Class : Event *
8  * Web : http://tmva.sourceforge.net *
9  * *
10  * Description: *
11  * LossFunction and associated classes *
12  * *
13  * Authors (alphabetical): *
14  * Andreas Hoecker <Andreas.Hocker@cern.ch> - CERN, Switzerland *
15  * Joerg Stelzer <Joerg.Stelzer@cern.ch> - CERN, Switzerland *
16  * Peter Speckmayer <Peter.Speckmayer@cern.ch> - CERN, Switzerland *
17  * Jan Therhaag <Jan.Therhaag@cern.ch> - U of Bonn, Germany *
18  * Helge Voss <Helge.Voss@cern.ch> - MPI-K Heidelberg, Germany *
19  * *
20  * Copyright (c) 2005-2011: *
21  * CERN, Switzerland *
22  * U. of Victoria, Canada *
23  * MPI-K Heidelberg, Germany *
24  * U. of Bonn, Germany *
25  * *
26  * Redistribution and use in source and binary forms, with or without *
27  * modification, are permitted according to the terms listed in LICENSE *
28  * (http://mva.sourceforge.net/license.txt) *
29  **********************************************************************************/
30 
31 #ifndef ROOT_TMVA_LossFunction
32 #define ROOT_TMVA_LossFunction
33 
34 //#include <iosfwd>
35 #include <vector>
36 #include <map>
37 #include "TMVA/Event.h"
38 
39 #include "TMVA/Types.h"
40 
41 // multithreading only if the compilation flag is turned on
42 #ifdef R__USE_IMT
43 #include <ROOT/TThreadExecutor.hxx>
44 #include <memory>
45 #include "TSystem.h"
46 #endif
47 
48 namespace TMVA {
49 
50  ///////////////////////////////////////////////////////////////////////////////////////////////
51  // Data Structure used by LossFunction and LossFunctionBDT to calculate errors, targets, etc
52  ///////////////////////////////////////////////////////////////////////////////////////////////
53 
55 
56  public:
58  trueValue = 0.;
59  predictedValue = 0.;
60  weight = 0.;
61  };
62  LossFunctionEventInfo(Double_t trueValue_, Double_t predictedValue_, Double_t weight_){
63  trueValue = trueValue_;
64  predictedValue = predictedValue_;
65  weight = weight_;
66  }
68 
72  };
73 
74 
75  ///////////////////////////////////////////////////////////////////////////////////////////////
76  // Loss Function interface defining base class for general error calculations in
77  // regression/classification
78  ///////////////////////////////////////////////////////////////////////////////////////////////
79 
80  class LossFunction {
81 
82  public:
83 
84  // constructors
86  #ifdef R__USE_IMT
87  fNumPoolThreads = GetNumThreadsInPool();
88  #endif
89  };
90  virtual ~LossFunction(){};
91 
92  // abstract methods that need to be implemented
93  virtual Double_t CalculateLoss(LossFunctionEventInfo& e) = 0;
94  virtual Double_t CalculateNetLoss(std::vector<LossFunctionEventInfo>& evs) = 0;
95  virtual Double_t CalculateMeanLoss(std::vector<LossFunctionEventInfo>& evs) = 0;
96 
97  virtual TString Name() = 0;
98  virtual Int_t Id() = 0;
99 
100  protected:
101  // #### only use multithreading if the compilation flag is turned on
102  #ifdef R__USE_IMT
103  UInt_t fNumPoolThreads = 1;
104 
105  // #### number of threads in the pool
108  };
109  #endif
110  };
111 
112  ///////////////////////////////////////////////////////////////////////////////////////////////
113  // Loss Function interface for boosted decision trees. Inherits from LossFunction
114  ///////////////////////////////////////////////////////////////////////////////////////////////
115 
116  /* Must inherit LossFunction with the virtual keyword so that we only have to implement
117  * the LossFunction interface once.
118  *
119  * LossFunction
120  * / \
121  *SomeLossFunction LossFunctionBDT
122  * \ /
123  * \ /
124  * SomeLossFunctionBDT
125  *
126  * Without the virtual keyword the two would point to their own LossFunction objects
127  * and SomeLossFunctionBDT would have to implement the virtual functions of LossFunction twice, once
128  * for each object. See diagram below.
129  *
130  * LossFunction LossFunction
131  * | |
132  *SomeLossFunction LossFunctionBDT
133  * \ /
134  * \ /
135  * SomeLossFunctionBDT
136  *
137  * Multiple inheritance is often frowned upon. To avoid this, We could make LossFunctionBDT separate
138  * from LossFunction but it really is a type of loss function.
139  * We could also put LossFunction into LossFunctionBDT. In either of these scenarios, if you are doing
140  * different regression methods and want to compare the Loss this makes it more convoluted.
141  * I think that multiple inheritance seems justified in this case, but we could change it if it's a problem.
142  * Usually it isn't a big deal with interfaces and this results in the simplest code in this case.
143  */
144 
145  class LossFunctionBDT : public virtual LossFunction{
146 
147  public:
148 
149  // constructors
151  virtual ~LossFunctionBDT(){};
152 
153  // abstract methods that need to be implemented
154  virtual void Init(std::map<const TMVA::Event*, LossFunctionEventInfo>& evinfomap, std::vector<double>& boostWeights) = 0;
155  virtual void SetTargets(std::vector<const TMVA::Event*>& evs, std::map< const TMVA::Event*, LossFunctionEventInfo >& evinfomap) = 0;
156  virtual Double_t Target(LossFunctionEventInfo& e) = 0;
157  virtual Double_t Fit(std::vector<LossFunctionEventInfo>& evs) = 0;
158 
159  };
160 
161  ///////////////////////////////////////////////////////////////////////////////////////////////
162  // Huber loss function for regression error calculations
163  ///////////////////////////////////////////////////////////////////////////////////////////////
164 
165  class HuberLossFunction : public virtual LossFunction{
166 
167  public:
169  HuberLossFunction(Double_t quantile);
171 
172  // The LossFunction methods
173  Double_t CalculateLoss(LossFunctionEventInfo& e);
174  Double_t CalculateNetLoss(std::vector<LossFunctionEventInfo>& evs);
175  Double_t CalculateMeanLoss(std::vector<LossFunctionEventInfo>& evs);
176 
177  // We go ahead and implement the simple ones
178  TString Name(){ return TString("Huber"); };
179  Int_t Id(){ return 0; } ;
180 
181  // Functions needed beyond the interface
182  void Init(std::vector<LossFunctionEventInfo>& evs);
183  Double_t CalculateQuantile(std::vector<LossFunctionEventInfo>& evs, Double_t whichQuantile, Double_t sumOfWeights, bool abs);
184  Double_t CalculateSumOfWeights(std::vector<LossFunctionEventInfo>& evs);
185  void SetTransitionPoint(std::vector<LossFunctionEventInfo>& evs);
186  void SetSumOfWeights(std::vector<LossFunctionEventInfo>& evs);
187 
188  protected:
192  };
193 
194  ///////////////////////////////////////////////////////////////////////////////////////////////
195  // Huber loss function with boosted decision tree functionality
196  ///////////////////////////////////////////////////////////////////////////////////////////////
197 
198  // The bdt loss function implements the LossFunctionBDT interface and inherits the HuberLossFunction
199  // functionality.
201 
202  public:
206 
207  // The LossFunctionBDT methods
208  void Init(std::map<const TMVA::Event*, LossFunctionEventInfo>& evinfomap, std::vector<double>& boostWeights);
209  void SetTargets(std::vector<const TMVA::Event*>& evs, std::map< const TMVA::Event*, LossFunctionEventInfo >& evinfomap);
211  Double_t Fit(std::vector<LossFunctionEventInfo>& evs);
212 
213  private:
214  // some data fields
215  };
216 
217  ///////////////////////////////////////////////////////////////////////////////////////////////
218  // LeastSquares loss function for regression error calculations
219  ///////////////////////////////////////////////////////////////////////////////////////////////
220 
221  class LeastSquaresLossFunction : public virtual LossFunction{
222 
223  public:
226 
227  // The LossFunction methods
228  Double_t CalculateLoss(LossFunctionEventInfo& e);
229  Double_t CalculateNetLoss(std::vector<LossFunctionEventInfo>& evs);
230  Double_t CalculateMeanLoss(std::vector<LossFunctionEventInfo>& evs);
231 
232  // We go ahead and implement the simple ones
233  TString Name(){ return TString("LeastSquares"); };
234  Int_t Id(){ return 1; } ;
235  };
236 
237  ///////////////////////////////////////////////////////////////////////////////////////////////
238  // Least Squares loss function with boosted decision tree functionality
239  ///////////////////////////////////////////////////////////////////////////////////////////////
240 
241  // The bdt loss function implements the LossFunctionBDT interface and inherits the LeastSquaresLossFunction
242  // functionality.
244 
245  public:
248 
249  // The LossFunctionBDT methods
250  void Init(std::map<const TMVA::Event*, LossFunctionEventInfo>& evinfomap, std::vector<double>& boostWeights);
251  void SetTargets(std::vector<const TMVA::Event*>& evs, std::map< const TMVA::Event*, LossFunctionEventInfo >& evinfomap);
253  Double_t Fit(std::vector<LossFunctionEventInfo>& evs);
254  };
255 
256  ///////////////////////////////////////////////////////////////////////////////////////////////
257  // Absolute Deviation loss function for regression error calculations
258  ///////////////////////////////////////////////////////////////////////////////////////////////
259 
261 
262  public:
265 
266  // The LossFunction methods
267  Double_t CalculateLoss(LossFunctionEventInfo& e);
268  Double_t CalculateNetLoss(std::vector<LossFunctionEventInfo>& evs);
269  Double_t CalculateMeanLoss(std::vector<LossFunctionEventInfo>& evs);
270 
271  // We go ahead and implement the simple ones
272  TString Name(){ return TString("AbsoluteDeviation"); };
273  Int_t Id(){ return 2; } ;
274  };
275 
276  ///////////////////////////////////////////////////////////////////////////////////////////////
277  // Absolute Deviation loss function with boosted decision tree functionality
278  ///////////////////////////////////////////////////////////////////////////////////////////////
279 
280  // The bdt loss function implements the LossFunctionBDT interface and inherits the AbsoluteDeviationLossFunction
281  // functionality.
283 
284  public:
287 
288  // The LossFunctionBDT methods
289  void Init(std::map<const TMVA::Event*, LossFunctionEventInfo>& evinfomap, std::vector<double>& boostWeights);
290  void SetTargets(std::vector<const TMVA::Event*>& evs, std::map< const TMVA::Event*, LossFunctionEventInfo >& evinfomap);
292  Double_t Fit(std::vector<LossFunctionEventInfo>& evs);
293  };
294 }
295 
296 #endif
UInt_t GetImplicitMTPoolSize()
Returns the size of the pool used for implicit multi-threading.
Definition: TROOT.cxx:614
UInt_t GetNumThreadsInPool()
Definition: LossFunction.h:106
Huber Loss Function.
Definition: LossFunction.h:165
Absolute Deviation BDT Loss Function.
Definition: LossFunction.h:282
Basic string class.
Definition: TString.h:131
int Int_t
Definition: RtypesCore.h:41
const char * Name
Definition: TXMLSetup.cxx:66
Huber BDT Loss Function.
Definition: LossFunction.h:200
Least Squares Loss Function.
Definition: LossFunction.h:221
Absolute Deviation Loss Function.
Definition: LossFunction.h:260
void Init(TClassEdit::TInterpreterLookupHelper *helper)
Definition: TClassEdit.cxx:121
Least Squares BDT Loss Function.
Definition: LossFunction.h:243
unsigned int UInt_t
Definition: RtypesCore.h:42
HuberLossFunctionBDT(Double_t quantile)
Definition: LossFunction.h:204
virtual ~LossFunction()
Definition: LossFunction.h:90
double Double_t
Definition: RtypesCore.h:55
TFitResultPtr Fit(FitObject *h1, TF1 *f1, Foption_t &option, const ROOT::Math::MinimizerOptions &moption, const char *goption, ROOT::Fit::DataRange &range)
Definition: HFitImpl.cxx:134
you should not use this method at all Int_t Int_t Double_t Double_t Double_t e
Definition: TRolke.cxx:630
LossFunctionEventInfo(Double_t trueValue_, Double_t predictedValue_, Double_t weight_)
Definition: LossFunction.h:62
Abstract ClassifierFactory template that handles arbitrary types.
virtual ~LossFunctionBDT()
Definition: LossFunction.h:151