Logo ROOT  
Reference Guide
LossFunction.h
Go to the documentation of this file.
1 // @(#)root/tmva $Id$
2 // Author: Andreas Hoecker, Peter Speckmayer, Joerg Stelzer, Helge Voss, Jan Therhaag
3 
4 /**********************************************************************************
5  * Project: TMVA - a Root-integrated toolkit for multivariate data analysis *
6  * Package: TMVA *
7  * Class : Event *
8  * Web : http://tmva.sourceforge.net *
9  * *
10  * Description: *
11  * LossFunction and associated classes *
12  * *
13  * Authors (alphabetical): *
14  * Andreas Hoecker <Andreas.Hocker@cern.ch> - CERN, Switzerland *
15  * Joerg Stelzer <Joerg.Stelzer@cern.ch> - CERN, Switzerland *
16  * Peter Speckmayer <Peter.Speckmayer@cern.ch> - CERN, Switzerland *
17  * Jan Therhaag <Jan.Therhaag@cern.ch> - U of Bonn, Germany *
18  * Helge Voss <Helge.Voss@cern.ch> - MPI-K Heidelberg, Germany *
19  * *
20  * Copyright (c) 2005-2011: *
21  * CERN, Switzerland *
22  * U. of Victoria, Canada *
23  * MPI-K Heidelberg, Germany *
24  * U. of Bonn, Germany *
25  * *
26  * Redistribution and use in source and binary forms, with or without *
27  * modification, are permitted according to the terms listed in LICENSE *
28  * (http://mva.sourceforge.net/license.txt) *
29  **********************************************************************************/
30 
31 #ifndef ROOT_TMVA_LossFunction
32 #define ROOT_TMVA_LossFunction
33 
34 //#include <iosfwd>
35 #include <vector>
36 #include <map>
37 #include "TMVA/Event.h"
38 
39 #include "TMVA/Types.h"
40 
41 
42 namespace TMVA {
43 
44  ///////////////////////////////////////////////////////////////////////////////////////////////
45  // Data Structure used by LossFunction and LossFunctionBDT to calculate errors, targets, etc
46  ///////////////////////////////////////////////////////////////////////////////////////////////
47 
48  class LossFunctionEventInfo{
49 
50  public:
52  trueValue = 0.;
53  predictedValue = 0.;
54  weight = 0.;
55  };
56  LossFunctionEventInfo(Double_t trueValue_, Double_t predictedValue_, Double_t weight_){
57  trueValue = trueValue_;
58  predictedValue = predictedValue_;
59  weight = weight_;
60  }
62 
66  };
67 
68 
69  ///////////////////////////////////////////////////////////////////////////////////////////////
70  // Loss Function interface defining base class for general error calculations in
71  // regression/classification
72  ///////////////////////////////////////////////////////////////////////////////////////////////
73 
74  class LossFunction {
75 
76  public:
77 
78  // constructors
79  LossFunction(){};
80  virtual ~LossFunction(){};
81 
82  // abstract methods that need to be implemented
84  virtual Double_t CalculateNetLoss(std::vector<LossFunctionEventInfo>& evs) = 0;
85  virtual Double_t CalculateMeanLoss(std::vector<LossFunctionEventInfo>& evs) = 0;
86 
87  virtual TString Name() = 0;
88  virtual Int_t Id() = 0;
89  };
90 
91  ///////////////////////////////////////////////////////////////////////////////////////////////
92  // Loss Function interface for boosted decision trees. Inherits from LossFunction
93  ///////////////////////////////////////////////////////////////////////////////////////////////
94 
95  /* Must inherit LossFunction with the virtual keyword so that we only have to implement
96  * the LossFunction interface once.
97  *
98  * LossFunction
99  * / \
100  *SomeLossFunction LossFunctionBDT
101  * \ /
102  * \ /
103  * SomeLossFunctionBDT
104  *
105  * Without the virtual keyword the two would point to their own LossFunction objects
106  * and SomeLossFunctionBDT would have to implement the virtual functions of LossFunction twice, once
107  * for each object. See diagram below.
108  *
109  * LossFunction LossFunction
110  * | |
111  *SomeLossFunction LossFunctionBDT
112  * \ /
113  * \ /
114  * SomeLossFunctionBDT
115  *
116  * Multiple inheritance is often frowned upon. To avoid this, We could make LossFunctionBDT separate
117  * from LossFunction but it really is a type of loss function.
118  * We could also put LossFunction into LossFunctionBDT. In either of these scenarios, if you are doing
119  * different regression methods and want to compare the Loss this makes it more convoluted.
120  * I think that multiple inheritance seems justified in this case, but we could change it if it's a problem.
121  * Usually it isn't a big deal with interfaces and this results in the simplest code in this case.
122  */
123 
124  class LossFunctionBDT : public virtual LossFunction{
125 
126  public:
127 
128  // constructors
129  LossFunctionBDT(){};
130  virtual ~LossFunctionBDT(){};
131 
132  // abstract methods that need to be implemented
133  virtual void Init(std::map<const TMVA::Event*, LossFunctionEventInfo>& evinfomap, std::vector<double>& boostWeights) = 0;
134  virtual void SetTargets(std::vector<const TMVA::Event*>& evs, std::map< const TMVA::Event*, LossFunctionEventInfo >& evinfomap) = 0;
135  virtual Double_t Target(LossFunctionEventInfo& e) = 0;
136  virtual Double_t Fit(std::vector<LossFunctionEventInfo>& evs) = 0;
137 
138  };
139 
140  ///////////////////////////////////////////////////////////////////////////////////////////////
141  // Huber loss function for regression error calculations
142  ///////////////////////////////////////////////////////////////////////////////////////////////
143 
144  class HuberLossFunction : public virtual LossFunction{
145 
146  public:
148  HuberLossFunction(Double_t quantile);
150 
151  // The LossFunction methods
153  Double_t CalculateNetLoss(std::vector<LossFunctionEventInfo>& evs);
154  Double_t CalculateMeanLoss(std::vector<LossFunctionEventInfo>& evs);
155 
156  // We go ahead and implement the simple ones
157  TString Name(){ return TString("Huber"); };
158  Int_t Id(){ return 0; } ;
159 
160  // Functions needed beyond the interface
161  void Init(std::vector<LossFunctionEventInfo>& evs);
162  Double_t CalculateQuantile(std::vector<LossFunctionEventInfo>& evs, Double_t whichQuantile, Double_t sumOfWeights, bool abs);
163  Double_t CalculateSumOfWeights(const std::vector<LossFunctionEventInfo>& evs);
164  void SetTransitionPoint(std::vector<LossFunctionEventInfo>& evs);
165  void SetSumOfWeights(std::vector<LossFunctionEventInfo>& evs);
166 
167  protected:
171  };
172 
173  ///////////////////////////////////////////////////////////////////////////////////////////////
174  // Huber loss function with boosted decision tree functionality
175  ///////////////////////////////////////////////////////////////////////////////////////////////
176 
177  // The bdt loss function implements the LossFunctionBDT interface and inherits the HuberLossFunction
178  // functionality.
180 
181  public:
185 
186  // The LossFunctionBDT methods
187  void Init(std::map<const TMVA::Event*, LossFunctionEventInfo>& evinfomap, std::vector<double>& boostWeights);
188  void SetTargets(std::vector<const TMVA::Event*>& evs, std::map< const TMVA::Event*, LossFunctionEventInfo >& evinfomap);
189  Double_t Target(LossFunctionEventInfo& e);
190  Double_t Fit(std::vector<LossFunctionEventInfo>& evs);
191 
192  private:
193  // some data fields
194  };
195 
196  ///////////////////////////////////////////////////////////////////////////////////////////////
197  // LeastSquares loss function for regression error calculations
198  ///////////////////////////////////////////////////////////////////////////////////////////////
199 
200  class LeastSquaresLossFunction : public virtual LossFunction{
201 
202  public:
205 
206  // The LossFunction methods
208  Double_t CalculateNetLoss(std::vector<LossFunctionEventInfo>& evs);
209  Double_t CalculateMeanLoss(std::vector<LossFunctionEventInfo>& evs);
210 
211  // We go ahead and implement the simple ones
212  TString Name(){ return TString("LeastSquares"); };
213  Int_t Id(){ return 1; } ;
214  };
215 
216  ///////////////////////////////////////////////////////////////////////////////////////////////
217  // Least Squares loss function with boosted decision tree functionality
218  ///////////////////////////////////////////////////////////////////////////////////////////////
219 
220  // The bdt loss function implements the LossFunctionBDT interface and inherits the LeastSquaresLossFunction
221  // functionality.
222  class LeastSquaresLossFunctionBDT : public LossFunctionBDT, public LeastSquaresLossFunction{
223 
224  public:
227 
228  // The LossFunctionBDT methods
229  void Init(std::map<const TMVA::Event*, LossFunctionEventInfo>& evinfomap, std::vector<double>& boostWeights);
230  void SetTargets(std::vector<const TMVA::Event*>& evs, std::map< const TMVA::Event*, LossFunctionEventInfo >& evinfomap);
232  Double_t Fit(std::vector<LossFunctionEventInfo>& evs);
233  };
234 
235  ///////////////////////////////////////////////////////////////////////////////////////////////
236  // Absolute Deviation loss function for regression error calculations
237  ///////////////////////////////////////////////////////////////////////////////////////////////
238 
239  class AbsoluteDeviationLossFunction : public virtual LossFunction{
240 
241  public:
244 
245  // The LossFunction methods
247  Double_t CalculateNetLoss(std::vector<LossFunctionEventInfo>& evs);
248  Double_t CalculateMeanLoss(std::vector<LossFunctionEventInfo>& evs);
249 
250  // We go ahead and implement the simple ones
251  TString Name(){ return TString("AbsoluteDeviation"); };
252  Int_t Id(){ return 2; } ;
253  };
254 
255  ///////////////////////////////////////////////////////////////////////////////////////////////
256  // Absolute Deviation loss function with boosted decision tree functionality
257  ///////////////////////////////////////////////////////////////////////////////////////////////
258 
259  // The bdt loss function implements the LossFunctionBDT interface and inherits the AbsoluteDeviationLossFunction
260  // functionality.
261  class AbsoluteDeviationLossFunctionBDT : public LossFunctionBDT, public AbsoluteDeviationLossFunction{
262 
263  public:
266 
267  // The LossFunctionBDT methods
268  void Init(std::map<const TMVA::Event*, LossFunctionEventInfo>& evinfomap, std::vector<double>& boostWeights);
269  void SetTargets(std::vector<const TMVA::Event*>& evs, std::map< const TMVA::Event*, LossFunctionEventInfo >& evinfomap);
271  Double_t Fit(std::vector<LossFunctionEventInfo>& evs);
272  };
273 }
274 
275 #endif
TMVA::HuberLossFunction::CalculateMeanLoss
Double_t CalculateMeanLoss(std::vector< LossFunctionEventInfo > &evs)
huber, determine the mean loss for a collection of events
Definition: LossFunction.cxx:220
e
#define e(i)
Definition: RSha256.hxx:121
TMVA::HuberLossFunction::HuberLossFunction
HuberLossFunction()
huber constructor
Definition: LossFunction.cxx:56
TMVA::LossFunction::~LossFunction
virtual ~LossFunction()
Definition: LossFunction.h:105
TMVA::LossFunctionEventInfo::weight
Double_t weight
Definition: LossFunction.h:115
TMVA::LeastSquaresLossFunction::CalculateLoss
Double_t CalculateLoss(LossFunctionEventInfo &e)
least squares , determine the loss for a single event
Definition: LossFunction.cxx:368
TMVA::HuberLossFunction::Init
void Init(std::vector< LossFunctionEventInfo > &evs)
figure out the residual that determines the separation between the "core" and the "tails" of the resi...
Definition: LossFunction.cxx:80
TMVA::AbsoluteDeviationLossFunction::CalculateNetLoss
Double_t CalculateNetLoss(std::vector< LossFunctionEventInfo > &evs)
absolute deviation, determine the net loss for a collection of events
Definition: LossFunction.cxx:505
TMVA::AbsoluteDeviationLossFunctionBDT::Target
Double_t Target(LossFunctionEventInfo &e)
absolute deviation BDT, set the target for a single event
Definition: LossFunction.cxx:580
TMVA::LossFunction::LossFunction
LossFunction()
Definition: LossFunction.h:104
TMVA::LeastSquaresLossFunctionBDT::~LeastSquaresLossFunctionBDT
~LeastSquaresLossFunctionBDT()
Definition: LossFunction.h:251
TMVA::LossFunction::Id
virtual Int_t Id()=0
TMVA::LossFunction
Definition: LossFunction.h:99
TMVA::LossFunctionBDT::~LossFunctionBDT
virtual ~LossFunctionBDT()
Definition: LossFunction.h:155
TMVA::LossFunctionEventInfo::predictedValue
Double_t predictedValue
Definition: LossFunction.h:114
TMVA::LeastSquaresLossFunction::~LeastSquaresLossFunction
~LeastSquaresLossFunction()
Definition: LossFunction.h:229
TMVA::HuberLossFunction::CalculateSumOfWeights
Double_t CalculateSumOfWeights(const std::vector< LossFunctionEventInfo > &evs)
huber, calculate the sum of weights for the events in the vector
Definition: LossFunction.cxx:92
TMVA::HuberLossFunction::CalculateNetLoss
Double_t CalculateNetLoss(std::vector< LossFunctionEventInfo > &evs)
huber, determine the net loss for a collection of events
Definition: LossFunction.cxx:202
TMVA::LeastSquaresLossFunction::Name
TString Name()
Definition: LossFunction.h:237
TMVA::LossFunctionBDT::SetTargets
virtual void SetTargets(std::vector< const TMVA::Event * > &evs, std::map< const TMVA::Event *, LossFunctionEventInfo > &evinfomap)=0
TMVA::LossFunctionEventInfo::~LossFunctionEventInfo
~LossFunctionEventInfo()
Definition: LossFunction.h:111
TMVA::HuberLossFunctionBDT::Fit
Double_t Fit(std::vector< LossFunctionEventInfo > &evs)
huber BDT, determine the fit value for the terminal node based upon the events in the terminal node
Definition: LossFunction.cxx:334
TMVA::HuberLossFunctionBDT::HuberLossFunctionBDT
HuberLossFunctionBDT()
Definition: LossFunction.cxx:240
TMVA::LeastSquaresLossFunctionBDT::SetTargets
void SetTargets(std::vector< const TMVA::Event * > &evs, std::map< const TMVA::Event *, LossFunctionEventInfo > &evinfomap)
least squares BDT, set the targets for a collection of events
Definition: LossFunction.cxx:437
TMVA::HuberLossFunction::SetTransitionPoint
void SetTransitionPoint(std::vector< LossFunctionEventInfo > &evs)
huber, determine the transition point using the values for fQuantile and fSumOfWeights which presumab...
Definition: LossFunction.cxx:147
TMVA::HuberLossFunction::CalculateLoss
Double_t CalculateLoss(LossFunctionEventInfo &e)
huber, determine the loss for a single event
Definition: LossFunction.cxx:181
TString
Definition: TString.h:136
TMVA::LeastSquaresLossFunction::Id
Int_t Id()
Definition: LossFunction.h:238
TMVA::LeastSquaresLossFunction
Definition: LossFunction.h:225
TMVA::LossFunctionBDT
Definition: LossFunction.h:149
TMVA::AbsoluteDeviationLossFunction
Definition: LossFunction.h:264
TMVA::HuberLossFunction::fQuantile
Double_t fQuantile
Definition: LossFunction.h:193
TMVA::LossFunction::CalculateNetLoss
virtual Double_t CalculateNetLoss(std::vector< LossFunctionEventInfo > &evs)=0
TMVA::HuberLossFunctionBDT::SetTargets
void SetTargets(std::vector< const TMVA::Event * > &evs, std::map< const TMVA::Event *, LossFunctionEventInfo > &evinfomap)
huber BDT, set the targets for a collection of events
Definition: LossFunction.cxx:271
TMVA::LeastSquaresLossFunctionBDT::Init
void Init(std::map< const TMVA::Event *, LossFunctionEventInfo > &evinfomap, std::vector< double > &boostWeights)
least squares BDT, initialize the targets and prepare for the regression
Definition: LossFunction.cxx:413
TMVA::LossFunction::Name
virtual TString Name()=0
TMVA::LeastSquaresLossFunction::LeastSquaresLossFunction
LeastSquaresLossFunction()
Definition: LossFunction.h:228
TMVA::LossFunction::CalculateMeanLoss
virtual Double_t CalculateMeanLoss(std::vector< LossFunctionEventInfo > &evs)=0
TMVA::AbsoluteDeviationLossFunctionBDT::SetTargets
void SetTargets(std::vector< const TMVA::Event * > &evs, std::map< const TMVA::Event *, LossFunctionEventInfo > &evinfomap)
absolute deviation BDT, set the targets for a collection of events
Definition: LossFunction.cxx:559
TMVA::AbsoluteDeviationLossFunction::CalculateMeanLoss
Double_t CalculateMeanLoss(std::vector< LossFunctionEventInfo > &evs)
absolute deviation, determine the mean loss for a collection of events
Definition: LossFunction.cxx:516
TMVA::HuberLossFunction::Name
TString Name()
Definition: LossFunction.h:182
TMVA::HuberLossFunctionBDT::Init
void Init(std::map< const TMVA::Event *, LossFunctionEventInfo > &evinfomap, std::vector< double > &boostWeights)
huber BDT, initialize the targets and prepare for the regression
Definition: LossFunction.cxx:246
TMVA::LossFunction::CalculateLoss
virtual Double_t CalculateLoss(LossFunctionEventInfo &e)=0
Event.h
TMVA::AbsoluteDeviationLossFunction::~AbsoluteDeviationLossFunction
~AbsoluteDeviationLossFunction()
Definition: LossFunction.h:268
TMVA::HuberLossFunction::Id
Int_t Id()
Definition: LossFunction.h:183
Types.h
TMVA::LeastSquaresLossFunction::CalculateNetLoss
Double_t CalculateNetLoss(std::vector< LossFunctionEventInfo > &evs)
least squares , determine the net loss for a collection of events
Definition: LossFunction.cxx:378
TMVA::LossFunctionEventInfo
Definition: LossFunction.h:73
TMVA::HuberLossFunction::~HuberLossFunction
~HuberLossFunction()
huber destructor
Definition: LossFunction.cxx:72
TMVA::AbsoluteDeviationLossFunctionBDT::AbsoluteDeviationLossFunctionBDT
AbsoluteDeviationLossFunctionBDT()
Definition: LossFunction.h:289
Double_t
double Double_t
Definition: RtypesCore.h:59
TMVA::HuberLossFunctionBDT::Target
Double_t Target(LossFunctionEventInfo &e)
huber BDT, set the target for a single event
Definition: LossFunction.cxx:323
TMVA::LossFunctionBDT::Target
virtual Double_t Target(LossFunctionEventInfo &e)=0
TMVA::AbsoluteDeviationLossFunction::AbsoluteDeviationLossFunction
AbsoluteDeviationLossFunction()
Definition: LossFunction.h:267
TMVA::LossFunctionBDT::Init
virtual void Init(std::map< const TMVA::Event *, LossFunctionEventInfo > &evinfomap, std::vector< double > &boostWeights)=0
TMVA::LossFunctionBDT::Fit
virtual Double_t Fit(std::vector< LossFunctionEventInfo > &evs)=0
TMVA::HuberLossFunction
Definition: LossFunction.h:169
TMVA::AbsoluteDeviationLossFunction::CalculateLoss
Double_t CalculateLoss(LossFunctionEventInfo &e)
absolute deviation, determine the loss for a single event
Definition: LossFunction.cxx:497
TMVA::LeastSquaresLossFunctionBDT::Target
Double_t Target(LossFunctionEventInfo &e)
least squares BDT, set the target for a single event
Definition: LossFunction.cxx:459
TMVA::AbsoluteDeviationLossFunction::Id
Int_t Id()
Definition: LossFunction.h:277
TMVA::HuberLossFunctionBDT
Definition: LossFunction.h:204
TMVA::LossFunctionEventInfo::trueValue
Double_t trueValue
Definition: LossFunction.h:111
TMVA::HuberLossFunctionBDT::~HuberLossFunctionBDT
~HuberLossFunctionBDT()
Definition: LossFunction.h:209
TMVA::HuberLossFunction::fTransitionPoint
Double_t fTransitionPoint
Definition: LossFunction.h:194
TMVA::AbsoluteDeviationLossFunctionBDT::~AbsoluteDeviationLossFunctionBDT
~AbsoluteDeviationLossFunctionBDT()
Definition: LossFunction.h:290
TMVA::LossFunctionEventInfo::LossFunctionEventInfo
LossFunctionEventInfo()
Definition: LossFunction.h:101
TMVA::LossFunctionBDT::LossFunctionBDT
LossFunctionBDT()
Definition: LossFunction.h:154
TMVA::AbsoluteDeviationLossFunction::Name
TString Name()
Definition: LossFunction.h:276
TMVA::AbsoluteDeviationLossFunctionBDT::Init
void Init(std::map< const TMVA::Event *, LossFunctionEventInfo > &evinfomap, std::vector< double > &boostWeights)
absolute deviation BDT, initialize the targets and prepare for the regression
Definition: LossFunction.cxx:536
TMVA::HuberLossFunction::CalculateQuantile
Double_t CalculateQuantile(std::vector< LossFunctionEventInfo > &evs, Double_t whichQuantile, Double_t sumOfWeights, bool abs)
huber, determine the quantile for a given input
Definition: LossFunction.cxx:117
TMVA::LeastSquaresLossFunction::CalculateMeanLoss
Double_t CalculateMeanLoss(std::vector< LossFunctionEventInfo > &evs)
least squares , determine the mean loss for a collection of events
Definition: LossFunction.cxx:390
TMVA::LeastSquaresLossFunctionBDT::Fit
Double_t Fit(std::vector< LossFunctionEventInfo > &evs)
huber BDT, determine the fit value for the terminal node based upon the events in the terminal node
Definition: LossFunction.cxx:470
TMVA::HuberLossFunction::fSumOfWeights
Double_t fSumOfWeights
Definition: LossFunction.h:195
TMVA::HuberLossFunction::SetSumOfWeights
void SetSumOfWeights(std::vector< LossFunctionEventInfo > &evs)
huber, set the sum of weights given a collection of events
Definition: LossFunction.cxx:174
TMVA::LeastSquaresLossFunctionBDT::LeastSquaresLossFunctionBDT
LeastSquaresLossFunctionBDT()
Definition: LossFunction.h:250
TMVA::AbsoluteDeviationLossFunctionBDT::Fit
Double_t Fit(std::vector< LossFunctionEventInfo > &evs)
absolute deviation BDT, determine the fit value for the terminal node based upon the events in the te...
Definition: LossFunction.cxx:591
TMVA
create variable transformations
Definition: GeneticMinimizer.h:22
int