Logo ROOT  
Reference Guide
 
Loading...
Searching...
No Matches
LossFunction.cxx
Go to the documentation of this file.
1// @(#)root/tmva $Id$
2// Author: Andreas Hoecker, Peter Speckmayer, Joerg Stelzer, Helge Voss, Jan Therhaag
3
4/**********************************************************************************
5 * Project: TMVA - a Root-integrated toolkit for multivariate data analysis *
6 * Package: TMVA *
7 * Class : LossFunction *
8 * *
9 * *
10 * Description: *
11 * Implementation (see header for description) *
12 * *
13 * Authors (alphabetical): *
14 * Andreas Hoecker <Andreas.Hocker@cern.ch> - CERN, Switzerland *
15 * Peter Speckmayer <Peter.Speckmayer@cern.ch> - CERN, Switzerland *
16 * Joerg Stelzer <Joerg.Stelzer@cern.ch> - CERN, Switzerland *
17 * Jan Therhaag <Jan.Therhaag@cern.ch> - U of Bonn, Germany *
18 * Helge Voss <Helge.Voss@cern.ch> - MPI-K Heidelberg, Germany *
19 * *
20 * Copyright (c) 2005-2011: *
21 * CERN, Switzerland *
22 * U. of Victoria, Canada *
23 * MPI-K Heidelberg, Germany *
24 * U. of Bonn, Germany *
25 * *
26 * Redistribution and use in source and binary forms, with or without *
27 * modification, are permitted according to the terms listed in LICENSE *
28 * (http://mva.sourceforge.net/license.txt) *
29 **********************************************************************************/
30
31/*! \class TMVA::HuberLossFunction
32\ingroup TMVA
33
34Huber Loss Function.
35
36*/
37
38#include "TMVA/LossFunction.h"
39#include "TMVA/Config.h"
40
41#include "TMVA/MsgLogger.h"
42
43#include "Rtypes.h"
44#include "TMath.h"
45#include <iostream>
46
47// multithreading only if the compilation flag is turned on
48#ifdef R__USE_IMT
50#include "ROOT/TSeq.hxx"
51#endif
52
53////////////////////////////////////////////////////////////////////////////////
54/// huber constructor
55
57 fTransitionPoint = -9999;
58 fSumOfWeights = -9999;
59 fQuantile = 0.7; // the quantile value determines the bulk of the data, e.g. 0.7 defines
60 // the core as the first 70% and the tails as the last 30%
61}
62
64 fSumOfWeights = -9999;
65 fTransitionPoint = -9999;
66 fQuantile = quantile;
67}
68
69////////////////////////////////////////////////////////////////////////////////
70/// huber destructor
71
73
74}
75
76////////////////////////////////////////////////////////////////////////////////
77/// figure out the residual that determines the separation between the
78/// "core" and the "tails" of the residuals distribution
79
80void TMVA::HuberLossFunction::Init(std::vector<LossFunctionEventInfo>& evs){
81
82 // Calculate the residual that separates the core and the tails
83 SetSumOfWeights(evs);
84 SetTransitionPoint(evs);
85}
86
87////////////////////////////////////////////////////////////////////////////////
88/// huber, calculate the sum of weights for the events in the vector
89
90// Multithreaded version of HuberLossFunction::CalculateSumOfWeights
91#ifdef R__USE_IMT
92Double_t TMVA::HuberLossFunction::CalculateSumOfWeights(const std::vector<LossFunctionEventInfo>& evs){
93 // need a lambda function to pass to TThreadExecutor::MapReduce
94 auto mapFunc = [&evs](UInt_t i) { return evs[i].weight; };
95 auto redFunc = [](const std::vector<Double_t> &a) { return std::accumulate(a.begin(), a.end(), 0.0); };
96
98 mapFunc, ROOT::TSeqU(evs.size()), redFunc, TMVA::Config::Instance().GetThreadExecutor().GetPoolSize());
99}
100
101// Standard version of HuberLossFunction::CalculateSumOfWeights
102#else
103Double_t TMVA::HuberLossFunction::CalculateSumOfWeights(const std::vector<LossFunctionEventInfo>& evs){
104
105 // Calculate the sum of the weights
106 Double_t sumOfWeights = 0;
107 for(UInt_t i = 0; i<evs.size(); i++)
108 sumOfWeights+=evs[i].weight;
109
110 return sumOfWeights;
111}
112#endif
113
114////////////////////////////////////////////////////////////////////////////////
115/// huber, determine the quantile for a given input
116
117Double_t TMVA::HuberLossFunction::CalculateQuantile(std::vector<LossFunctionEventInfo>& evs, Double_t whichQuantile, Double_t sumOfWeights, bool abs){
118
119 // use a lambda function to tell the vector how to sort the LossFunctionEventInfo data structures
120 // (sort them in ascending order of residual magnitude) if abs is true
121 // otherwise sort them in ascending order of residual
122 if(abs)
123 std::sort(evs.begin(), evs.end(), [](LossFunctionEventInfo a, LossFunctionEventInfo b){
124 return TMath::Abs(a.trueValue-a.predictedValue) < TMath::Abs(b.trueValue-b.predictedValue); });
125 else
126 std::sort(evs.begin(), evs.end(), [](LossFunctionEventInfo a, LossFunctionEventInfo b){
127 return (a.trueValue-a.predictedValue) < (b.trueValue-b.predictedValue); });
128 UInt_t i = 0;
129 Double_t temp = 0.0;
130 while(i<evs.size()-1 && temp <= sumOfWeights*whichQuantile){
131 temp += evs[i].weight;
132 i++;
133 }
134 // edge cases
135 // Output warning for low return values
136 if(whichQuantile == 0) i=0; // assume 0th quantile to mean the 0th entry in the ordered series
137
138 // usual returns
139 if(abs) return TMath::Abs(evs[i].trueValue-evs[i].predictedValue);
140 else return evs[i].trueValue-evs[i].predictedValue;
141}
142
143////////////////////////////////////////////////////////////////////////////////
144/// huber, determine the transition point using the values for fQuantile and fSumOfWeights
145/// which presumably have already been set
146
147void TMVA::HuberLossFunction::SetTransitionPoint(std::vector<LossFunctionEventInfo>& evs){
148 fTransitionPoint = CalculateQuantile(evs, fQuantile, fSumOfWeights, true);
149
150 // if the transition point corresponding to the quantile is 0 then the loss function will not function
151 // the quantile was chosen too low. Let's use the first nonzero residual as the transition point instead.
152 if(fTransitionPoint == 0){
153 // evs should already be sorted according to the magnitude of the residuals, since CalculateQuantile does this
154 for(UInt_t i=0; i<evs.size(); i++){
155 Double_t residual = TMath::Abs(evs[i].trueValue - evs[i].predictedValue);
156 if(residual != 0){
157 fTransitionPoint = residual;
158 break;
159 }
160 }
161 }
162
163 // Let the user know that the transition point is zero and the loss function won't work properly
164 if(fTransitionPoint == 0){
165 //std::cout << "The residual transition point for the Huber loss function corresponding to quantile, " << fQuantile << ", is zero."
166 //<< " This implies that all of the residuals are zero and the events have been predicted perfectly. Perhaps the regression is too complex"
167 //<< " for the amount of data." << std::endl;
168 }
169}
170
171////////////////////////////////////////////////////////////////////////////////
172/// huber, set the sum of weights given a collection of events
173
174void TMVA::HuberLossFunction::SetSumOfWeights(std::vector<LossFunctionEventInfo>& evs){
175 fSumOfWeights = CalculateSumOfWeights(evs);
176}
177
178////////////////////////////////////////////////////////////////////////////////
179/// huber, determine the loss for a single event
180
182 // If the huber loss function is uninitialized then assume a group of one
183 // and initialize the transition point and weights for this single event
184 if(fSumOfWeights == -9999){
185 std::vector<LossFunctionEventInfo> evs{e};
186 SetSumOfWeights(evs);
187 SetTransitionPoint(evs);
188 }
189
190 Double_t residual = TMath::Abs(e.trueValue - e.predictedValue);
191 Double_t loss = 0;
192 // Quadratic loss in terms of the residual for small residuals
193 if(residual <= fTransitionPoint) loss = 0.5*residual*residual;
194 // Linear loss for large residuals, so that the tails don't dominate the net loss calculation
195 else loss = fQuantile*residual - 0.5*fQuantile*fQuantile;
196 return e.weight*loss;
197}
198
199////////////////////////////////////////////////////////////////////////////////
200/// huber, determine the net loss for a collection of events
201
202Double_t TMVA::HuberLossFunction::CalculateNetLoss(std::vector<LossFunctionEventInfo>& evs){
203 // Initialize the Huber Loss Function so that we can calculate the loss.
204 // The loss for each event depends on the other events in the group
205 // that define the cutoff quantile (fTransitionPoint).
206 SetSumOfWeights(evs);
207 SetTransitionPoint(evs);
208
209 Double_t netloss = 0;
210 for(UInt_t i=0; i<evs.size(); i++)
211 netloss+=CalculateLoss(evs[i]);
212 return netloss;
213 // should get a function to return the average loss as well
214 // return netloss/fSumOfWeights
215}
216
217////////////////////////////////////////////////////////////////////////////////
218/// huber, determine the mean loss for a collection of events
219
220Double_t TMVA::HuberLossFunction::CalculateMeanLoss(std::vector<LossFunctionEventInfo>& evs){
221 // Initialize the Huber Loss Function so that we can calculate the loss.
222 // The loss for each event depends on the other events in the group
223 // that define the cutoff quantile (fTransitionPoint).
224 SetSumOfWeights(evs);
225 SetTransitionPoint(evs);
226
227 Double_t netloss = 0;
228 for(UInt_t i=0; i<evs.size(); i++)
229 netloss+=CalculateLoss(evs[i]);
230 return netloss/fSumOfWeights;
231}
232
233/*! \class TMVA::HuberLossFunctionBDT
234\ingroup TMVA
235
236Huber BDT Loss Function.
237
238*/
239
241}
242
243////////////////////////////////////////////////////////////////////////////////
244/// huber BDT, initialize the targets and prepare for the regression
245
246void TMVA::HuberLossFunctionBDT::Init(std::map<const TMVA::Event*, LossFunctionEventInfo>& evinfomap, std::vector<double>& boostWeights){
247// Run this once before building the forest. Set initial prediction to weightedMedian.
248
249 std::vector<LossFunctionEventInfo> evinfovec(evinfomap.size());
250 for (auto &e: evinfomap){
251 evinfovec.push_back(LossFunctionEventInfo(e.second.trueValue, e.second.predictedValue, e.first->GetWeight()));
252 }
253
254 // Calculates fSumOfWeights and fTransitionPoint with the current residuals
255 SetSumOfWeights(evinfovec);
256 Double_t weightedMedian = CalculateQuantile(evinfovec, 0.5, fSumOfWeights, false);
257
258 //Store the weighted median as a first boosweight for later use
259 boostWeights.push_back(weightedMedian);
260 for (auto &e: evinfomap ) {
261 // set the initial prediction for all events to the median
262 e.second.predictedValue += weightedMedian;
263 }
264}
265
266////////////////////////////////////////////////////////////////////////////////
267/// huber BDT, set the targets for a collection of events
268
269// Multithreaded version of HuberLossFunctionBDT::SetTargets
270#ifdef R__USE_IMT
271void TMVA::HuberLossFunctionBDT::SetTargets(std::vector<const TMVA::Event*>& evs, std::map< const TMVA::Event*, LossFunctionEventInfo >& evinfomap){
272
273 std::vector<LossFunctionEventInfo> eventvec(evs.size());
274
275 // first we need to copy the events from evs into eventvec since we require a vector of LossFunctionEventInfo
276 // for SetSumOfWeights and SetTransitionPoint. We use TThreadExecutor to implement the copy in parallel
277 // need a lambda function to pass to TThreadExecutor::Map
278 auto fcopy = [&eventvec, &evs, &evinfomap](UInt_t i) {
279 eventvec[i] = LossFunctionEventInfo(evinfomap[evs[i]].trueValue, evinfomap[evs[i]].predictedValue, evs[i]->GetWeight());
280 };
281
283
284 // Recalculate the residual that separates the "core" of the data and the "tails"
285 // This residual is the quantile given by fQuantile, defaulted to 0.7
286 // the quantile corresponding to 0.5 would be the usual median
287 SetSumOfWeights(eventvec); // This was already set in init, but may change if there is subsampling for each tree
288 SetTransitionPoint(eventvec);
289
290 // ok now set the targets in parallel
291 // need a lambda function to pass to TThreadExecutor::Map
292 auto f = [this, &evinfomap](const TMVA::Event* ev) {
293 const_cast<TMVA::Event*>(ev)->SetTarget(0, Target(evinfomap[ev]));
294 };
295
296 TMVA::Config::Instance().GetThreadExecutor().Foreach(f, evs, TMVA::Config::Instance().GetThreadExecutor().GetPoolSize());
297}
298
299// Standard version of HuberLossFunctionBDT::SetTargets
300#else
301void TMVA::HuberLossFunctionBDT::SetTargets(std::vector<const TMVA::Event*>& evs, std::map< const TMVA::Event*, LossFunctionEventInfo >& evinfomap){
302
303 std::vector<LossFunctionEventInfo> eventvec(evs.size());
304 for (std::vector<const TMVA::Event*>::const_iterator e=evs.begin(); e!=evs.end();e++){
305 eventvec.push_back(LossFunctionEventInfo(evinfomap[*e].trueValue, evinfomap[*e].predictedValue, (*e)->GetWeight()));
306 }
307
308 // Recalculate the residual that separates the "core" of the data and the "tails"
309 // This residual is the quantile given by fQuantile, defaulted to 0.7
310 // the quantile corresponding to 0.5 would be the usual median
311 SetSumOfWeights(eventvec); // This was already set in init, but may change if there is subsampling for each tree
312 SetTransitionPoint(eventvec);
313
314 for (std::vector<const TMVA::Event*>::const_iterator e=evs.begin(); e!=evs.end();e++) {
315 const_cast<TMVA::Event*>(*e)->SetTarget(0,Target(evinfomap[*e]));
316 }
317}
318#endif
319
320////////////////////////////////////////////////////////////////////////////////
321/// huber BDT, set the target for a single event
322
324 Double_t residual = e.trueValue - e.predictedValue;
325 // The weight/target relationships are taken care of in the tmva decision tree operations so we don't need to worry about that here
326 if(TMath::Abs(residual) <= fTransitionPoint) return residual;
327 else return fTransitionPoint*(residual<0?-1.0:1.0);
328}
329
330////////////////////////////////////////////////////////////////////////////////
331/// huber BDT, determine the fit value for the terminal node based upon the
332/// events in the terminal node
333
334Double_t TMVA::HuberLossFunctionBDT::Fit(std::vector<LossFunctionEventInfo>& evs){
335// The fit in the terminal node for huber is basically the median of the residuals.
336// Then you add the average difference from the median to that.
337// The tails are discounted. If a residual is in the tails then we just use the
338// cutoff residual that sets the "core" and the "tails" instead of the large residual.
339// So we get something between least squares (mean as fit) and absolute deviation (median as fit).
340 Double_t sumOfWeights = CalculateSumOfWeights(evs);
341 Double_t shift=0,diff= 0;
342 Double_t residualMedian = CalculateQuantile(evs,0.5,sumOfWeights, false);
343 for(UInt_t j=0;j<evs.size();j++){
344 Double_t residual = evs[j].trueValue - evs[j].predictedValue;
345 diff = residual-residualMedian;
346 // if we are using weights then I'm not sure why this isn't weighted
347 shift+=1.0/evs.size()*((diff<0)?-1.0:1.0)*TMath::Min(fTransitionPoint,fabs(diff));
348 // I think this should be
349 // shift+=evs[j].weight/sumOfWeights*((diff<0)?-1.0:1.0)*TMath::Min(fTransitionPoint,fabs(diff));
350 // not sure why it was originally coded like this
351 }
352 return (residualMedian + shift);
353
354}
355
356/*! \class TMVA::LeastSquaresLossFunction
357\ingroup TMVA
358
359Least Squares Loss Function.
360
361*/
362
363// Constructor and destructor are in header file. They don't do anything.
364
365////////////////////////////////////////////////////////////////////////////////
366/// least squares , determine the loss for a single event
367
369 Double_t residual = (e.trueValue - e.predictedValue);
370 Double_t loss = 0;
371 loss = residual*residual;
372 return e.weight*loss;
373}
374
375////////////////////////////////////////////////////////////////////////////////
376/// least squares , determine the net loss for a collection of events
377
378Double_t TMVA::LeastSquaresLossFunction::CalculateNetLoss(std::vector<LossFunctionEventInfo>& evs){
379 Double_t netloss = 0;
380 for(UInt_t i=0; i<evs.size(); i++)
381 netloss+=CalculateLoss(evs[i]);
382 return netloss;
383 // should get a function to return the average loss as well
384 // return netloss/fSumOfWeights
385}
386
387////////////////////////////////////////////////////////////////////////////////
388/// least squares , determine the mean loss for a collection of events
389
390Double_t TMVA::LeastSquaresLossFunction::CalculateMeanLoss(std::vector<LossFunctionEventInfo>& evs){
391 Double_t netloss = 0;
392 Double_t sumOfWeights = 0;
393 for(UInt_t i=0; i<evs.size(); i++){
394 sumOfWeights+=evs[i].weight;
395 netloss+=CalculateLoss(evs[i]);
396 }
397 // return the weighted mean
398 return netloss/sumOfWeights;
399}
400
401/*! \class TMVA::LeastSquaresLossFunctionBDT
402\ingroup TMVA
403
404Least Squares BDT Loss Function.
405
406*/
407
408// Constructor and destructor defined in header. They don't do anything.
409
410////////////////////////////////////////////////////////////////////////////////
411/// least squares BDT, initialize the targets and prepare for the regression
412
413void TMVA::LeastSquaresLossFunctionBDT::Init(std::map<const TMVA::Event*, LossFunctionEventInfo>& evinfomap, std::vector<double>& boostWeights){
414// Run this once before building the forest. Set initial prediction to the weightedMean
415
416 std::vector<LossFunctionEventInfo> evinfovec(evinfomap.size());
417 for (auto &e: evinfomap){
418 evinfovec.push_back(LossFunctionEventInfo(e.second.trueValue, e.second.predictedValue, e.first->GetWeight()));
419 }
420
421 // Initial prediction for least squares is the weighted mean
422 Double_t weightedMean = Fit(evinfovec);
423
424 //Store the weighted median as a first boosweight for later use
425 boostWeights.push_back(weightedMean);
426 for (auto &e: evinfomap ) {
427 // set the initial prediction for all events to the median
428 e.second.predictedValue += weightedMean;
429 }
430}
431
432////////////////////////////////////////////////////////////////////////////////
433/// least squares BDT, set the targets for a collection of events
434
435// Multithreaded version of LeastSquaresLossFunctionBDT::SetTargets
436#ifdef R__USE_IMT
437void TMVA::LeastSquaresLossFunctionBDT::SetTargets(std::vector<const TMVA::Event*>& evs, std::map< const TMVA::Event*, LossFunctionEventInfo >& evinfomap) {
438
439 // need a lambda function to pass to TThreadExecutor::Map
440 auto f = [this, &evinfomap](const TMVA::Event* ev) {
441 const_cast<TMVA::Event*>(ev)->SetTarget(0, Target(evinfomap[ev]));
442 };
443
444 TMVA::Config::Instance().GetThreadExecutor().Foreach(f, evs, TMVA::Config::Instance().GetThreadExecutor().GetPoolSize());
445}
446// Standard version of LeastSquaresLossFunctionBDT::SetTargets
447#else
448void TMVA::LeastSquaresLossFunctionBDT::SetTargets(std::vector<const TMVA::Event*>& evs, std::map< const TMVA::Event*, LossFunctionEventInfo >& evinfomap){
449
450 for (std::vector<const TMVA::Event*>::const_iterator e=evs.begin(); e!=evs.end();e++) {
451 const_cast<TMVA::Event*>(*e)->SetTarget(0,Target(evinfomap[*e]));
452 }
453}
454#endif
455
456////////////////////////////////////////////////////////////////////////////////
457/// least squares BDT, set the target for a single event
458
460 Double_t residual = e.trueValue - e.predictedValue;
461 // The weight/target relationships are taken care of in the tmva decision tree operations. We don't need to worry about that here
462 // and we return the residual instead of the weight*residual.
463 return residual;
464}
465
466////////////////////////////////////////////////////////////////////////////////
467/// huber BDT, determine the fit value for the terminal node based upon the
468/// events in the terminal node
469
470Double_t TMVA::LeastSquaresLossFunctionBDT::Fit(std::vector<LossFunctionEventInfo>& evs){
471// The fit in the terminal node for least squares is the weighted average of the residuals.
472 Double_t sumOfWeights = 0;
473 Double_t weightedResidualSum = 0;
474 for(UInt_t j=0;j<evs.size();j++){
475 sumOfWeights += evs[j].weight;
476 Double_t residual = evs[j].trueValue - evs[j].predictedValue;
477 weightedResidualSum += evs[j].weight*residual;
478 }
479 Double_t weightedMean = weightedResidualSum/sumOfWeights;
480
481 // return the weighted mean
482 return weightedMean;
483}
484
485/*! \class TMVA::AbsoluteDeviationLossFunction
486\ingroup TMVA
487
488Absolute Deviation Loss Function.
489
490*/
491
492// Constructors in the header. They don't do anything.
493
494////////////////////////////////////////////////////////////////////////////////
495/// absolute deviation, determine the loss for a single event
496
498 Double_t residual = e.trueValue - e.predictedValue;
499 return e.weight*TMath::Abs(residual);
500}
501
502////////////////////////////////////////////////////////////////////////////////
503/// absolute deviation, determine the net loss for a collection of events
504
506
507 Double_t netloss = 0;
508 for(UInt_t i=0; i<evs.size(); i++)
509 netloss+=CalculateLoss(evs[i]);
510 return netloss;
511}
512
513////////////////////////////////////////////////////////////////////////////////
514/// absolute deviation, determine the mean loss for a collection of events
515
517 Double_t sumOfWeights = 0;
518 Double_t netloss = 0;
519 for(UInt_t i=0; i<evs.size(); i++){
520 sumOfWeights+=evs[i].weight;
521 netloss+=CalculateLoss(evs[i]);
522 }
523 return netloss/sumOfWeights;
524}
525
526/*! \class TMVA::AbsoluteDeviationLossFunctionBDT
527\ingroup TMVA
528
529Absolute Deviation BDT Loss Function.
530
531*/
532
533////////////////////////////////////////////////////////////////////////////////
534/// absolute deviation BDT, initialize the targets and prepare for the regression
535
536void TMVA::AbsoluteDeviationLossFunctionBDT::Init(std::map<const TMVA::Event*, LossFunctionEventInfo>& evinfomap, std::vector<double>& boostWeights){
537// Run this once before building the forest. Set initial prediction to weightedMedian.
538
539 std::vector<LossFunctionEventInfo> evinfovec(evinfomap.size());
540 for (auto &e: evinfomap){
541 evinfovec.push_back(LossFunctionEventInfo(e.second.trueValue, e.second.predictedValue, e.first->GetWeight()));
542 }
543
544 Double_t weightedMedian = Fit(evinfovec);
545
546 //Store the weighted median as a first boostweight for later use
547 boostWeights.push_back(weightedMedian);
548 for (auto &e: evinfomap ) {
549 // set the initial prediction for all events to the median
550 e.second.predictedValue += weightedMedian;
551 }
552}
553
554////////////////////////////////////////////////////////////////////////////////
555/// absolute deviation BDT, set the targets for a collection of events
556
557// Multithreaded version of AbsoluteDeviationLossFunctionBDT::SetTargets
558#ifdef R__USE_IMT
559void TMVA::AbsoluteDeviationLossFunctionBDT::SetTargets(std::vector<const TMVA::Event*>& evs, std::map< const TMVA::Event*, LossFunctionEventInfo >& evinfomap){
560 // need a lambda function to pass to TThreadExecutor::Map
561 auto f = [this, &evinfomap](const TMVA::Event* ev) {
562 const_cast<TMVA::Event*>(ev)->SetTarget(0, Target(evinfomap[ev]));
563 };
564
565 TMVA::Config::Instance().GetThreadExecutor().Foreach(f, evs, TMVA::Config::Instance().GetThreadExecutor().GetPoolSize());
566}
567// Standard version of AbsoluteDeviationLossFunctionBDT::SetTargets
568#else
569void TMVA::AbsoluteDeviationLossFunctionBDT::SetTargets(std::vector<const TMVA::Event*>& evs, std::map< const TMVA::Event*, LossFunctionEventInfo >& evinfomap){
570
571 for (std::vector<const TMVA::Event*>::const_iterator e=evs.begin(); e!=evs.end();e++) {
572 const_cast<TMVA::Event*>(*e)->SetTarget(0,Target(evinfomap[*e]));
573 }
574}
575#endif
576
577////////////////////////////////////////////////////////////////////////////////
578/// absolute deviation BDT, set the target for a single event
579
581// The target is the sign of the residual.
582 Double_t residual = e.trueValue - e.predictedValue;
583 // The weight/target relationships are taken care of in the tmva decision tree operations so we don't need to worry about that here
584 return (residual<0?-1.0:1.0);
585}
586
587////////////////////////////////////////////////////////////////////////////////
588/// absolute deviation BDT, determine the fit value for the terminal node based upon the
589/// events in the terminal node
590
591Double_t TMVA::AbsoluteDeviationLossFunctionBDT::Fit(std::vector<LossFunctionEventInfo>& evs){
592// For Absolute Deviation, the fit in each terminal node is the weighted residual median.
593
594 // use a lambda function to tell the vector how to sort the LossFunctionEventInfo data structures
595 // sort in ascending order of residual value
596 std::sort(evs.begin(), evs.end(), [](LossFunctionEventInfo a, LossFunctionEventInfo b){
597 return (a.trueValue-a.predictedValue) < (b.trueValue-b.predictedValue); });
598
599 // calculate the sum of weights, used in the weighted median calculation
600 Double_t sumOfWeights = 0;
601 for(UInt_t j=0; j<evs.size(); j++)
602 sumOfWeights+=evs[j].weight;
603
604 // get the index of the weighted median
605 UInt_t i = 0;
606 Double_t temp = 0.0;
607 while(i<evs.size() && temp <= sumOfWeights*0.5){
608 temp += evs[i].weight;
609 i++;
610 }
611 if (i >= evs.size()) return 0.; // prevent uncontrolled memory access in return value calculation
612
613 // return the median residual
614 return evs[i].trueValue-evs[i].predictedValue;
615}
#define b(i)
Definition RSha256.hxx:100
#define f(i)
Definition RSha256.hxx:104
#define a(i)
Definition RSha256.hxx:99
#define e(i)
Definition RSha256.hxx:103
unsigned int UInt_t
Definition RtypesCore.h:46
double Double_t
Definition RtypesCore.h:59
A pseudo container class which is a generator of indices.
Definition TSeq.hxx:67
Double_t Fit(std::vector< LossFunctionEventInfo > &evs)
absolute deviation BDT, determine the fit value for the terminal node based upon the events in the te...
void SetTargets(std::vector< const TMVA::Event * > &evs, std::map< const TMVA::Event *, LossFunctionEventInfo > &evinfomap)
absolute deviation BDT, set the targets for a collection of events
void Init(std::map< const TMVA::Event *, LossFunctionEventInfo > &evinfomap, std::vector< double > &boostWeights)
absolute deviation BDT, initialize the targets and prepare for the regression
Double_t Target(LossFunctionEventInfo &e)
absolute deviation BDT, set the target for a single event
Double_t CalculateNetLoss(std::vector< LossFunctionEventInfo > &evs)
absolute deviation, determine the net loss for a collection of events
Double_t CalculateMeanLoss(std::vector< LossFunctionEventInfo > &evs)
absolute deviation, determine the mean loss for a collection of events
Double_t CalculateLoss(LossFunctionEventInfo &e)
absolute deviation, determine the loss for a single event
Executor & GetThreadExecutor()
Get executor class for multi-thread usage In case when MT is not enabled will return a serial executo...
Definition Config.h:81
static Config & Instance()
static function: returns TMVA instance
Definition Config.cxx:98
void SetTarget(UInt_t itgt, Float_t value)
set the target value (dimension itgt) to value
Definition Event.cxx:367
void Foreach(Function func, unsigned int nTimes, unsigned nChunks=0)
wrap TExecutor::Foreach
Definition Executor.h:117
auto MapReduce(F func, ROOT::TSeq< INTEGER > args, R redfunc) -> InvokeResult_t< F, INTEGER >
Wrap TExecutor::MapReduce functions.
Definition Executor.h:154
unsigned int GetPoolSize() const
Definition Executor.h:106
Double_t Target(LossFunctionEventInfo &e)
huber BDT, set the target for a single event
Double_t Fit(std::vector< LossFunctionEventInfo > &evs)
huber BDT, determine the fit value for the terminal node based upon the events in the terminal node
void Init(std::map< const TMVA::Event *, LossFunctionEventInfo > &evinfomap, std::vector< double > &boostWeights)
huber BDT, initialize the targets and prepare for the regression
void SetTargets(std::vector< const TMVA::Event * > &evs, std::map< const TMVA::Event *, LossFunctionEventInfo > &evinfomap)
huber BDT, set the targets for a collection of events
HuberLossFunction()
huber constructor
void SetSumOfWeights(std::vector< LossFunctionEventInfo > &evs)
huber, set the sum of weights given a collection of events
void SetTransitionPoint(std::vector< LossFunctionEventInfo > &evs)
huber, determine the transition point using the values for fQuantile and fSumOfWeights which presumab...
Double_t CalculateNetLoss(std::vector< LossFunctionEventInfo > &evs)
huber, determine the net loss for a collection of events
Double_t CalculateLoss(LossFunctionEventInfo &e)
huber, determine the loss for a single event
~HuberLossFunction()
huber destructor
Double_t CalculateSumOfWeights(const std::vector< LossFunctionEventInfo > &evs)
huber, calculate the sum of weights for the events in the vector
Double_t CalculateMeanLoss(std::vector< LossFunctionEventInfo > &evs)
huber, determine the mean loss for a collection of events
void Init(std::vector< LossFunctionEventInfo > &evs)
figure out the residual that determines the separation between the "core" and the "tails" of the resi...
Double_t CalculateQuantile(std::vector< LossFunctionEventInfo > &evs, Double_t whichQuantile, Double_t sumOfWeights, bool abs)
huber, determine the quantile for a given input
void SetTargets(std::vector< const TMVA::Event * > &evs, std::map< const TMVA::Event *, LossFunctionEventInfo > &evinfomap)
least squares BDT, set the targets for a collection of events
Double_t Target(LossFunctionEventInfo &e)
least squares BDT, set the target for a single event
Double_t Fit(std::vector< LossFunctionEventInfo > &evs)
huber BDT, determine the fit value for the terminal node based upon the events in the terminal node
void Init(std::map< const TMVA::Event *, LossFunctionEventInfo > &evinfomap, std::vector< double > &boostWeights)
least squares BDT, initialize the targets and prepare for the regression
Double_t CalculateNetLoss(std::vector< LossFunctionEventInfo > &evs)
least squares , determine the net loss for a collection of events
Double_t CalculateMeanLoss(std::vector< LossFunctionEventInfo > &evs)
least squares , determine the mean loss for a collection of events
Double_t CalculateLoss(LossFunctionEventInfo &e)
least squares , determine the loss for a single event
Short_t Min(Short_t a, Short_t b)
Returns the smallest of a and b.
Definition TMathBase.h:198
Short_t Abs(Short_t d)
Returns the absolute value of parameter Short_t d.
Definition TMathBase.h:123