Logo ROOT  
Reference Guide
MethodPDERS.cxx
Go to the documentation of this file.
1// @(#)root/tmva $Id$
2// Author: Andreas Hoecker, Yair Mahalalel, Joerg Stelzer, Helge Voss, Kai Voss
3
4/***********************************************************************************
5 * Project: TMVA - a Root-integrated toolkit for multivariate data analysis *
6 * Package: TMVA *
7 * Class : MethodPDERS *
8 * Web : http://tmva.sourceforge.net *
9 * *
10 * Description: *
11 * Implementation *
12 * *
13 * Authors (alphabetical): *
14 * Krzysztof Danielowski <danielow@cern.ch> - IFJ PAN & AGH, Poland *
15 * Andreas Hoecker <Andreas.Hocker@cern.ch> - CERN, Switzerland *
16 * Kamil Kraszewski <kalq@cern.ch> - IFJ PAN & UJ, Poland *
17 * Maciej Kruk <mkruk@cern.ch> - IFJ PAN & AGH, Poland *
18 * Yair Mahalalel <Yair.Mahalalel@cern.ch> - CERN, Switzerland *
19 * Helge Voss <Helge.Voss@cern.ch> - MPI-K Heidelberg, Germany *
20 * Kai Voss <Kai.Voss@cern.ch> - U. of Victoria, Canada *
21 * *
22 * Copyright (c) 2005: *
23 * CERN, Switzerland *
24 * U. of Victoria, Canada *
25 * MPI-K Heidelberg, Germany *
26 * *
27 * Redistribution and use in source and binary forms, with or without *
28 * modification, are permitted according to the terms listed in LICENSE *
29 * (http://tmva.sourceforge.net/LICENSE) *
30 ***********************************************************************************/
31
32/*! \class TMVA::MethodPDERS
33\ingroup TMVA
34
35This is a generalization of the above Likelihood methods to \f$ N_{var} \f$
36dimensions, where \f$ N_{var} \f$ is the number of input variables
37used in the MVA. If the multi-dimensional probability density functions
38(PDFs) for signal and background were known, this method contains the entire
39physical information, and is therefore optimal. Usually, kernel estimation
40methods are used to approximate the PDFs using the events from the
41training sample.
42
43A very simple probability density estimator (PDE) has been suggested
44in [hep-ex/0211019](http://arxiv.org/abs/hep-ex/0211019). The
45PDE for a given test event is obtained from counting the (normalized)
46number of signal and background (training) events that occur in the
47"vicinity" of the test event. The volume that describes "vicinity" is
48user-defined. A [search method based on binary-trees](http://arxiv.org/abs/hep-ex/0211019)
49is used to effectively reduce the
50selection time for the range search. Three different volume definitions
51are optional:
52
53 - *MinMax:* the volume is defined in each dimension with respect
54 to the full variable range found in the training sample.
55 - *RMS:* the volume is defined in each dimensions with respect
56 to the RMS estimated from the training sample.
57 - *Adaptive:* a volume element is defined in each dimensions with
58 respect to the RMS estimated from the training sample. The overall
59 scale of the volume element is then determined for each event so
60 that the total number of events confined in the volume be within
61 a user-defined range.
62
63The adaptive range search is used by default.
64*/
65
66#include "TMVA/MethodPDERS.h"
67
68#include "TMVA/BinaryTree.h"
70#include "TMVA/Configurable.h"
72#include "TMVA/Event.h"
73#include "TMVA/IMethod.h"
74#include "TMVA/MethodBase.h"
75#include "TMVA/MsgLogger.h"
76#include "TMVA/RootFinder.h"
77#include "TMVA/Tools.h"
79#include "TMVA/Types.h"
80
81#include "ThreadLocalStorage.h"
82#include "TBuffer.h"
83#include "TFile.h"
84#include "TObjString.h"
85#include "TMath.h"
86
87#include <assert.h>
88#include <algorithm>
89
90namespace TMVA {
91 const Bool_t MethodPDERS_UseFindRoot = kFALSE;
92};
93
94
95REGISTER_METHOD(PDERS)
96
98
99////////////////////////////////////////////////////////////////////////////////
100/// standard constructor for the PDERS method
101
103 const TString& methodTitle,
104 DataSetInfo& theData,
105 const TString& theOption) :
106 MethodBase( jobName, Types::kPDERS, methodTitle, theData, theOption),
107 fFcnCall(0),
108 fVRangeMode(kAdaptive),
109 fKernelEstimator(kBox),
110 fDelta(0),
111 fShift(0),
112 fScaleS(0),
113 fScaleB(0),
114 fDeltaFrac(0),
115 fGaussSigma(0),
116 fGaussSigmaNorm(0),
117 fNRegOut(0),
118 fNEventsMin(0),
119 fNEventsMax(0),
120 fMaxVIterations(0),
121 fInitialScale(0),
122 fInitializedVolumeEle(0),
123 fkNNMin(0),
124 fkNNMax(0),
125 fMax_distance(0),
126 fPrinted(0),
127 fNormTree(0)
128{
129 fHelpVolume = NULL;
130 fBinaryTree = NULL;
131}
132
133////////////////////////////////////////////////////////////////////////////////
134/// construct MethodPDERS through from file
135
137 const TString& theWeightFile) :
138 MethodBase( Types::kPDERS, theData, theWeightFile),
139 fFcnCall(0),
140 fVRangeMode(kAdaptive),
141 fKernelEstimator(kBox),
142 fDelta(0),
143 fShift(0),
144 fScaleS(0),
145 fScaleB(0),
146 fDeltaFrac(0),
147 fGaussSigma(0),
148 fGaussSigmaNorm(0),
149 fNRegOut(0),
150 fNEventsMin(0),
151 fNEventsMax(0),
152 fMaxVIterations(0),
153 fInitialScale(0),
154 fInitializedVolumeEle(0),
155 fkNNMin(0),
156 fkNNMax(0),
157 fMax_distance(0),
158 fPrinted(0),
159 fNormTree(0)
160{
161 fHelpVolume = NULL;
162 fBinaryTree = NULL;
163}
164
165////////////////////////////////////////////////////////////////////////////////
166/// PDERS can handle classification with 2 classes and regression with one or more regression-targets
167
169{
170 if (type == Types::kClassification && numberClasses == 2) return kTRUE;
171 if (type == Types::kRegression) return kTRUE;
172 return kFALSE;
173}
174
175////////////////////////////////////////////////////////////////////////////////
176/// default initialisation routine called by all constructors
177
179{
180 fBinaryTree = NULL;
181
182 UpdateThis();
183
184 // default options
185 fDeltaFrac = 3.0;
186 fVRangeMode = kAdaptive;
187 fKernelEstimator = kBox;
188
189 // special options for Adaptive mode
190 fNEventsMin = 100;
191 fNEventsMax = 200;
192 fMaxVIterations = 150;
193 fInitialScale = 0.99;
194 fGaussSigma = 0.1;
195 fNormTree = kFALSE;
196
197 fkNNMin = Int_t(fNEventsMin);
198 fkNNMax = Int_t(fNEventsMax);
199
200 fInitializedVolumeEle = kFALSE;
201 fAverageRMS.clear();
202
203 // the minimum requirement to declare an event signal-like
204 SetSignalReferenceCut( 0.0 );
205}
206
207////////////////////////////////////////////////////////////////////////////////
208/// destructor
209
211{
212 if (fDelta) delete fDelta;
213 if (fShift) delete fShift;
214
215 if (NULL != fBinaryTree) delete fBinaryTree;
216}
217
218////////////////////////////////////////////////////////////////////////////////
219/// define the options (their key words) that can be set in the option string.
220///
221/// know options:
222/// - VolumeRangeMode <string> Method to determine volume range
223/// available values are:
224/// - MinMax
225/// - Unscaled
226/// - RMS
227/// - kNN
228/// - Adaptive <default>
229///
230/// - KernelEstimator <string> Kernel estimation function
231/// available values are:
232/// - Box <default>
233/// - Sphere
234/// - Teepee
235/// - Gauss
236/// - Sinc3
237/// - Sinc5
238/// - Sinc7
239/// - Sinc9
240/// - Sinc11
241/// - Lanczos2
242/// - Lanczos3
243/// - Lanczos5
244/// - Lanczos8
245/// - Trim
246///
247/// - DeltaFrac <float> Ratio of #EventsMin/#EventsMax for MinMax and RMS volume range
248/// - NEventsMin <int> Minimum number of events for adaptive volume range
249/// - NEventsMax <int> Maximum number of events for adaptive volume range
250/// - MaxVIterations <int> Maximum number of iterations for adaptive volume range
251/// - InitialScale <float> Initial scale for adaptive volume range
252/// - GaussSigma <float> Width with respect to the volume size of Gaussian kernel estimator
253
255{
256 DeclareOptionRef(fVolumeRange="Adaptive", "VolumeRangeMode", "Method to determine volume size");
257 AddPreDefVal(TString("Unscaled"));
258 AddPreDefVal(TString("MinMax"));
259 AddPreDefVal(TString("RMS"));
260 AddPreDefVal(TString("Adaptive"));
261 AddPreDefVal(TString("kNN"));
262
263 DeclareOptionRef(fKernelString="Box", "KernelEstimator", "Kernel estimation function");
264 AddPreDefVal(TString("Box"));
265 AddPreDefVal(TString("Sphere"));
266 AddPreDefVal(TString("Teepee"));
267 AddPreDefVal(TString("Gauss"));
268 AddPreDefVal(TString("Sinc3"));
269 AddPreDefVal(TString("Sinc5"));
270 AddPreDefVal(TString("Sinc7"));
271 AddPreDefVal(TString("Sinc9"));
272 AddPreDefVal(TString("Sinc11"));
273 AddPreDefVal(TString("Lanczos2"));
274 AddPreDefVal(TString("Lanczos3"));
275 AddPreDefVal(TString("Lanczos5"));
276 AddPreDefVal(TString("Lanczos8"));
277 AddPreDefVal(TString("Trim"));
278
279 DeclareOptionRef(fDeltaFrac , "DeltaFrac", "nEventsMin/Max for minmax and rms volume range");
280 DeclareOptionRef(fNEventsMin , "NEventsMin", "nEventsMin for adaptive volume range");
281 DeclareOptionRef(fNEventsMax , "NEventsMax", "nEventsMax for adaptive volume range");
282 DeclareOptionRef(fMaxVIterations, "MaxVIterations", "MaxVIterations for adaptive volume range");
283 DeclareOptionRef(fInitialScale , "InitialScale", "InitialScale for adaptive volume range");
284 DeclareOptionRef(fGaussSigma , "GaussSigma", "Width (wrt volume size) of Gaussian kernel estimator");
285 DeclareOptionRef(fNormTree , "NormTree", "Normalize binary search tree");
286}
287
288////////////////////////////////////////////////////////////////////////////////
289/// process the options specified by the user
290
292{
293 if (IgnoreEventsWithNegWeightsInTraining()) {
294 Log() << kFATAL << "Mechanism to ignore events with negative weights in training not yet available for method: "
295 << GetMethodTypeName()
296 << " --> please remove \"IgnoreNegWeightsInTraining\" option from booking string."
297 << Endl;
298 }
299
300 fGaussSigmaNorm = fGaussSigma; // * TMath::Sqrt( Double_t(GetNvar()) );
301
302 fVRangeMode = MethodPDERS::kUnsupported;
303
304 if (fVolumeRange == "MinMax" ) fVRangeMode = kMinMax;
305 else if (fVolumeRange == "RMS" ) fVRangeMode = kRMS;
306 else if (fVolumeRange == "Adaptive" ) fVRangeMode = kAdaptive;
307 else if (fVolumeRange == "Unscaled" ) fVRangeMode = kUnscaled;
308 else if (fVolumeRange == "kNN" ) fVRangeMode = kkNN;
309 else {
310 Log() << kFATAL << "VolumeRangeMode parameter '" << fVolumeRange << "' unknown" << Endl;
311 }
312
313 if (fKernelString == "Box" ) fKernelEstimator = kBox;
314 else if (fKernelString == "Sphere" ) fKernelEstimator = kSphere;
315 else if (fKernelString == "Teepee" ) fKernelEstimator = kTeepee;
316 else if (fKernelString == "Gauss" ) fKernelEstimator = kGauss;
317 else if (fKernelString == "Sinc3" ) fKernelEstimator = kSinc3;
318 else if (fKernelString == "Sinc5" ) fKernelEstimator = kSinc5;
319 else if (fKernelString == "Sinc7" ) fKernelEstimator = kSinc7;
320 else if (fKernelString == "Sinc9" ) fKernelEstimator = kSinc9;
321 else if (fKernelString == "Sinc11" ) fKernelEstimator = kSinc11;
322 else if (fKernelString == "Lanczos2" ) fKernelEstimator = kLanczos2;
323 else if (fKernelString == "Lanczos3" ) fKernelEstimator = kLanczos3;
324 else if (fKernelString == "Lanczos5" ) fKernelEstimator = kLanczos5;
325 else if (fKernelString == "Lanczos8" ) fKernelEstimator = kLanczos8;
326 else if (fKernelString == "Trim" ) fKernelEstimator = kTrim;
327 else {
328 Log() << kFATAL << "KernelEstimator parameter '" << fKernelString << "' unknown" << Endl;
329 }
330
331 // TODO: Add parameter validation
332
333 Log() << kVERBOSE << "interpreted option string: vRangeMethod: '"
334 << (const char*)((fVRangeMode == kMinMax) ? "MinMax" :
335 (fVRangeMode == kUnscaled) ? "Unscaled" :
336 (fVRangeMode == kRMS ) ? "RMS" : "Adaptive") << "'" << Endl;
337 if (fVRangeMode == kMinMax || fVRangeMode == kRMS)
338 Log() << kVERBOSE << "deltaFrac: " << fDeltaFrac << Endl;
339 else
340 Log() << kVERBOSE << "nEventsMin/Max, maxVIterations, initialScale: "
341 << fNEventsMin << " " << fNEventsMax
342 << " " << fMaxVIterations << " " << fInitialScale << Endl;
343 Log() << kVERBOSE << "KernelEstimator = " << fKernelString << Endl;
344}
345
346////////////////////////////////////////////////////////////////////////////////
347/// this is a dummy training: the preparation work to do is the construction
348/// of the binary tree as a pointer chain. It is easier to directly save the
349/// trainingTree in the weight file, and to rebuild the binary tree in the
350/// test phase from scratch
351
353{
354 if (IsNormalised()) Log() << kFATAL << "\"Normalise\" option cannot be used with PDERS; "
355 << "please remove the option from the configuration string, or "
356 << "use \"!Normalise\""
357 << Endl;
358
359 CreateBinarySearchTree( Types::kTraining );
360
361 CalcAverages();
362 SetVolumeElement();
363
364 fInitializedVolumeEle = kTRUE;
365 ExitFromTraining();
366}
367
368////////////////////////////////////////////////////////////////////////////////
369/// init the size of a volume element using a defined fraction of the
370/// volume containing the entire events
371
373{
374 if (fInitializedVolumeEle == kFALSE) {
375 fInitializedVolumeEle = kTRUE;
376
377 // binary trees must exist
378 assert( fBinaryTree );
379
380 CalcAverages();
381 SetVolumeElement();
382 }
383
384 // cannot determine error
385 NoErrorCalc(err, errUpper);
386
387 return this->CRScalc( *GetEvent() );
388}
389
390////////////////////////////////////////////////////////////////////////////////
391
392const std::vector< Float_t >& TMVA::MethodPDERS::GetRegressionValues()
393{
394 if (fRegressionReturnVal == 0) fRegressionReturnVal = new std::vector<Float_t>;
395 fRegressionReturnVal->clear();
396 // init the size of a volume element using a defined fraction of the
397 // volume containing the entire events
398 if (fInitializedVolumeEle == kFALSE) {
399 fInitializedVolumeEle = kTRUE;
400
401 // binary trees must exist
402 assert( fBinaryTree );
403
404 CalcAverages();
405
406 SetVolumeElement();
407 }
408
409 const Event* ev = GetEvent();
410 this->RRScalc( *ev, fRegressionReturnVal );
411
412 Event * evT = new Event(*ev);
413 UInt_t ivar = 0;
414 for (std::vector<Float_t>::iterator it = fRegressionReturnVal->begin(); it != fRegressionReturnVal->end(); ++it ) {
415 evT->SetTarget(ivar,(*it));
416 ivar++;
417 }
418
419 const Event* evT2 = GetTransformationHandler().InverseTransform( evT );
420 fRegressionReturnVal->clear();
421
422 for (ivar = 0; ivar<evT2->GetNTargets(); ivar++) {
423 fRegressionReturnVal->push_back(evT2->GetTarget(ivar));
424 }
425
426 delete evT;
427
428
429 return (*fRegressionReturnVal);
430}
431
432////////////////////////////////////////////////////////////////////////////////
433/// compute also average RMS values required for adaptive Gaussian
434
436{
437 if (fVRangeMode == kAdaptive || fVRangeMode == kRMS || fVRangeMode == kkNN ) {
438 fAverageRMS.clear();
439 fBinaryTree->CalcStatistics();
440
441 for (UInt_t ivar=0; ivar<GetNvar(); ivar++) {
442 if (!DoRegression()){ //why there are separate rms for signal and background?
443 Float_t rmsS = fBinaryTree->RMS(Types::kSignal, ivar);
444 Float_t rmsB = fBinaryTree->RMS(Types::kBackground, ivar);
445 fAverageRMS.push_back( (rmsS + rmsB)*0.5 );
446 } else {
447 Float_t rms = fBinaryTree->RMS( ivar );
448 fAverageRMS.push_back( rms );
449 }
450 }
451 }
452}
453
454////////////////////////////////////////////////////////////////////////////////
455/// create binary search trees for signal and background
456
458{
459 if (NULL != fBinaryTree) delete fBinaryTree;
460 fBinaryTree = new BinarySearchTree();
461 if (fNormTree) {
462 fBinaryTree->SetNormalize( kTRUE );
463 }
464
465 fBinaryTree->Fill( GetEventCollection(type) );
466
467 if (fNormTree) {
468 fBinaryTree->NormalizeTree();
469 }
470
471 if (!DoRegression()) {
472 // these are the signal and background scales for the weights
473 fScaleS = 1.0/fBinaryTree->GetSumOfWeights( Types::kSignal );
474 fScaleB = 1.0/fBinaryTree->GetSumOfWeights( Types::kBackground );
475
476 Log() << kVERBOSE << "Signal and background scales: " << fScaleS << " " << fScaleB << Endl;
477 }
478}
479
480////////////////////////////////////////////////////////////////////////////////
481/// defines volume dimensions
482
484 if (GetNvar()==0) {
485 Log() << kFATAL << "GetNvar() == 0" << Endl;
486 return;
487 }
488
489 // init relative scales
490 fkNNMin = Int_t(fNEventsMin);
491 fkNNMax = Int_t(fNEventsMax);
492
493 if (fDelta) delete fDelta;
494 if (fShift) delete fShift;
495 fDelta = new std::vector<Float_t>( GetNvar() );
496 fShift = new std::vector<Float_t>( GetNvar() );
497
498 for (UInt_t ivar=0; ivar<GetNvar(); ivar++) {
499 switch (fVRangeMode) {
500
501 case kRMS:
502 case kkNN:
503 case kAdaptive:
504 // sanity check
505 if (fAverageRMS.size() != GetNvar())
506 Log() << kFATAL << "<SetVolumeElement> RMS not computed: " << fAverageRMS.size() << Endl;
507 (*fDelta)[ivar] = fAverageRMS[ivar]*fDeltaFrac;
508 Log() << kVERBOSE << "delta of var[" << (*fInputVars)[ivar]
509 << "\t]: " << fAverageRMS[ivar]
510 << "\t | comp with |max - min|: " << (GetXmax( ivar ) - GetXmin( ivar ))
511 << Endl;
512 break;
513 case kMinMax:
514 (*fDelta)[ivar] = (GetXmax( ivar ) - GetXmin( ivar ))*fDeltaFrac;
515 break;
516 case kUnscaled:
517 (*fDelta)[ivar] = fDeltaFrac;
518 break;
519 default:
520 Log() << kFATAL << "<SetVolumeElement> unknown range-set mode: "
521 << fVRangeMode << Endl;
522 }
523 (*fShift)[ivar] = 0.5; // volume is centered around test value
524 }
525
526}
527
528////////////////////////////////////////////////////////////////////////////////
529/// Interface to RootFinder
530
532{
533 return ThisPDERS()->GetVolumeContentForRoot( scale );
534}
535
536////////////////////////////////////////////////////////////////////////////////
537/// count number of events in rescaled volume
538
540{
541 Volume v( *fHelpVolume );
542 v.ScaleInterval( scale );
543
544 Double_t count = GetBinaryTree()->SearchVolume( &v );
545
546 v.Delete();
547 return count;
548}
549
551 std::vector<const BinarySearchTreeNode*>& events,
552 Volume *volume )
553{
554 Float_t count = 0;
555
556 // -------------------------------------------------------------------------
557 //
558 // ==== test of volume search =====
559 //
560 // #define TMVA::MethodPDERS__countByHand__Debug__
561
562#ifdef TMVA_MethodPDERS__countByHand__Debug__
563
564 // starting values
565 count = fBinaryTree->SearchVolume( volume );
566
567 Int_t iS = 0, iB = 0;
568 UInt_t nvar = GetNvar();
569 for (UInt_t ievt_=0; ievt_<Data()->GetNTrainingEvents(); ievt_++) {
570 const Event * ev = GetTrainingEvent(ievt_);
571 Bool_t inV;
572 for (Int_t ivar=0; ivar<nvar; ivar++) {
573 Float_t x = ev->GetValue(ivar);
574 inV = (x > (*volume->Lower)[ivar] && x <= (*volume->Upper)[ivar]);
575 if (!inV) break;
576 }
577 if (inV) {
578 in++;
579 }
580 }
581 Log() << kVERBOSE << "debug: my test: " << in << Endl;// <- ***********tree
582 Log() << kVERBOSE << "debug: binTree: " << count << Endl << Endl;// <- ***********tree
583
584#endif
585
586 // -------------------------------------------------------------------------
587
588 if (fVRangeMode == kRMS || fVRangeMode == kMinMax || fVRangeMode == kUnscaled) { // Constant volume
589
590 std::vector<Double_t> *lb = new std::vector<Double_t>( GetNvar() );
591 for (UInt_t ivar=0; ivar<GetNvar(); ivar++) (*lb)[ivar] = e.GetValue(ivar);
592 std::vector<Double_t> *ub = new std::vector<Double_t>( *lb );
593 for (UInt_t ivar=0; ivar<GetNvar(); ivar++) {
594 (*lb)[ivar] -= (*fDelta)[ivar]*(1.0 - (*fShift)[ivar]);
595 (*ub)[ivar] += (*fDelta)[ivar]*(*fShift)[ivar];
596 }
597 Volume* svolume = new Volume( lb, ub );
598 // starting values
599
600 fBinaryTree->SearchVolume( svolume, &events );
601 }
602 else if (fVRangeMode == kAdaptive) { // adaptive volume
603
604 // -----------------------------------------------------------------------
605
606 // TODO: optimize, perhaps multi stage with broadening limits,
607 // or a different root finding method entirely,
608 if (MethodPDERS_UseFindRoot) {
609
610 // that won't need to search through large volume, where the bottle neck probably is
611
612 fHelpVolume = volume;
613
614 UpdateThis(); // necessary update of static pointer
615 RootFinder rootFinder( this, 0.01, 50, 200, 10 );
616 Double_t scale = rootFinder.Root( (fNEventsMin + fNEventsMax)/2.0 );
617
618 volume->ScaleInterval( scale );
619
620 fBinaryTree->SearchVolume( volume, &events );
621
622 fHelpVolume = NULL;
623 }
624 // -----------------------------------------------------------------------
625 else {
626
627 // starting values
628 count = fBinaryTree->SearchVolume( volume );
629
630 Float_t nEventsO = count;
631 Int_t i_=0;
632
633 while (nEventsO < fNEventsMin) { // this isn't a sain start... try again
634 volume->ScaleInterval( 1.15 );
635 count = fBinaryTree->SearchVolume( volume );
636 nEventsO = count;
637 i_++;
638 }
639 if (i_ > 50) Log() << kWARNING << "warning in event: " << e
640 << ": adaptive volume pre-adjustment reached "
641 << ">50 iterations in while loop (" << i_ << ")" << Endl;
642
643 Float_t nEventsN = nEventsO;
644 Float_t nEventsE = 0.5*(fNEventsMin + fNEventsMax);
645 Float_t scaleO = 1.0;
646 Float_t scaleN = fInitialScale;
647 Float_t scale = scaleN;
648 Float_t scaleBest = scaleN;
649 Float_t nEventsBest = nEventsN;
650
651 for (Int_t ic=1; ic<fMaxVIterations; ic++) {
652 if (nEventsN < fNEventsMin || nEventsN > fNEventsMax) {
653
654 // search for events in rescaled volume
655 Volume* v = new Volume( *volume );
656 v->ScaleInterval( scale );
657 nEventsN = fBinaryTree->SearchVolume( v );
658
659 // determine next iteration (linear approximation)
660 if (nEventsN > 1 && nEventsN - nEventsO != 0)
661 if (scaleN - scaleO != 0)
662 scale += (scaleN - scaleO)/(nEventsN - nEventsO)*(nEventsE - nEventsN);
663 else
664 scale += (-0.01); // should not actually occur...
665 else
666 scale += 0.5; // use much larger volume
667
668 // save old scale
669 scaleN = scale;
670
671 // take if better (don't accept it if too small number of events)
672 if (TMath::Abs(nEventsN - nEventsE) < TMath::Abs(nEventsBest - nEventsE) &&
673 (nEventsN >= fNEventsMin || nEventsBest < nEventsN)) {
674 nEventsBest = nEventsN;
675 scaleBest = scale;
676 }
677
678 v->Delete();
679 delete v;
680 }
681 else break;
682 }
683
684 // last sanity check
685 nEventsN = nEventsBest;
686 // include "1" to cover float precision
687 if (nEventsN < fNEventsMin-1 || nEventsN > fNEventsMax+1)
688 Log() << kWARNING << "warning in event " << e
689 << ": adaptive volume adjustment reached "
690 << "max. #iterations (" << fMaxVIterations << ")"
691 << "[ nEvents: " << nEventsN << " " << fNEventsMin << " " << fNEventsMax << "]"
692 << Endl;
693
694 volume->ScaleInterval( scaleBest );
695 fBinaryTree->SearchVolume( volume, &events );
696 }
697
698 // end of adaptive method
699
700 } else if (fVRangeMode == kkNN) {
701 Volume v(*volume);
702
703 events.clear();
704 // check number of signals in beginning volume
705 Int_t kNNcount = fBinaryTree->SearchVolumeWithMaxLimit( &v, &events, fkNNMax+1 );
706 //if this number is too large return fkNNMax+1
707
708 Int_t t_times = 0; // number of iterations
709
710 while ( !(kNNcount >= fkNNMin && kNNcount <= fkNNMax) ) {
711 if (kNNcount < fkNNMin) { //if we have too less points
712 Float_t scale = 2; //better scale needed
713 volume->ScaleInterval( scale );
714 }
715 else if (kNNcount > fkNNMax) { //if we have too many points
716 Float_t scale = 0.1; //better scale needed
717 volume->ScaleInterval( scale );
718 }
719 events.clear();
720
721 v = *volume ;
722
723 kNNcount = fBinaryTree->SearchVolumeWithMaxLimit( &v, &events, fkNNMax+1 ); //new search
724
725 t_times++;
726
727 if (t_times == fMaxVIterations) {
728 Log() << kWARNING << "warning in event" << e
729 << ": kNN volume adjustment reached "
730 << "max. #iterations (" << fMaxVIterations << ")"
731 << "[ kNN: " << fkNNMin << " " << fkNNMax << Endl;
732 break;
733 }
734 }
735
736 //vector to normalize distance in each dimension
737 Double_t *dim_normalization = new Double_t[GetNvar()];
738 for (UInt_t ivar=0; ivar<GetNvar(); ivar++) {
739 dim_normalization [ivar] = 1.0 / ((*v.fUpper)[ivar] - (*v.fLower)[ivar]);
740 }
741
742 std::vector<const BinarySearchTreeNode*> tempVector; // temporary vector for events
743
744 if (kNNcount >= fkNNMin) {
745 std::vector<Double_t> *distances = new std::vector<Double_t>( kNNcount );
746
747 //counting the distance for each event
748 for (Int_t j=0;j< Int_t(events.size()) ;j++)
749 (*distances)[j] = GetNormalizedDistance ( e, *events[j], dim_normalization );
750
751 //counting the fkNNMin-th element
752 std::vector<Double_t>::iterator wsk = distances->begin();
753 for (Int_t j=0;j<fkNNMin-1;++j) ++wsk;
754 std::nth_element( distances->begin(), wsk, distances->end() );
755
756 //getting all elements that are closer than fkNNMin-th element
757 //signals
758 for (Int_t j=0;j<Int_t(events.size());j++) {
759 Double_t dist = GetNormalizedDistance( e, *events[j], dim_normalization );
760
761 if (dist <= (*distances)[fkNNMin-1])
762 tempVector.push_back( events[j] );
763 }
764 fMax_distance = (*distances)[fkNNMin-1];
765 delete distances;
766 }
767 delete[] dim_normalization;
768 events = tempVector;
769
770 } else {
771
772 // troubles ahead...
773 Log() << kFATAL << "<GetSample> unknown RangeMode: " << fVRangeMode << Endl;
774 }
775 // -----------------------------------------------------------------------
776}
777
778////////////////////////////////////////////////////////////////////////////////
779
781{
782 std::vector<const BinarySearchTreeNode*> events;
783
784 // computes event weight by counting number of signal and background
785 // events (of reference sample) that are found within given volume
786 // defined by the event
787 std::vector<Double_t> *lb = new std::vector<Double_t>( GetNvar() );
788 for (UInt_t ivar=0; ivar<GetNvar(); ivar++) (*lb)[ivar] = e.GetValue(ivar);
789
790 std::vector<Double_t> *ub = new std::vector<Double_t>( *lb );
791 for (UInt_t ivar=0; ivar<GetNvar(); ivar++) {
792 (*lb)[ivar] -= (*fDelta)[ivar]*(1.0 - (*fShift)[ivar]);
793 (*ub)[ivar] += (*fDelta)[ivar]*(*fShift)[ivar];
794 }
795
796 Volume *volume = new Volume( lb, ub );
797
798 GetSample( e, events, volume );
799 Double_t count = CKernelEstimate( e, events, *volume );
800 delete volume;
801 delete lb;
802 delete ub;
803
804 return count;
805}
806
807////////////////////////////////////////////////////////////////////////////////
808
809void TMVA::MethodPDERS::RRScalc( const Event& e, std::vector<Float_t>* count )
810{
811 std::vector<const BinarySearchTreeNode*> events;
812
813 // computes event weight by counting number of signal and background
814 // events (of reference sample) that are found within given volume
815 // defined by the event
816 std::vector<Double_t> *lb = new std::vector<Double_t>( GetNvar() );
817 for (UInt_t ivar=0; ivar<GetNvar(); ivar++) (*lb)[ivar] = e.GetValue(ivar);
818
819 std::vector<Double_t> *ub = new std::vector<Double_t>( *lb );
820 for (UInt_t ivar=0; ivar<GetNvar(); ivar++) {
821 (*lb)[ivar] -= (*fDelta)[ivar]*(1.0 - (*fShift)[ivar]);
822 (*ub)[ivar] += (*fDelta)[ivar]*(*fShift)[ivar];
823 }
824 Volume *volume = new Volume( lb, ub );
825
826 GetSample( e, events, volume );
827 RKernelEstimate( e, events, *volume, count );
828
829 delete volume;
830
831 return;
832}
833////////////////////////////////////////////////////////////////////////////////
834/// normalization factors so we can work with radius 1 hyperspheres
835
837 std::vector<const BinarySearchTreeNode*>& events, Volume& v )
838{
839 Double_t *dim_normalization = new Double_t[GetNvar()];
840 for (UInt_t ivar=0; ivar<GetNvar(); ivar++)
841 dim_normalization [ivar] = 2 / ((*v.fUpper)[ivar] - (*v.fLower)[ivar]);
842
843 Double_t pdfSumS = 0;
844 Double_t pdfSumB = 0;
845
846 // Iteration over sample points
847 for (std::vector<const BinarySearchTreeNode*>::iterator iev = events.begin(); iev != events.end(); ++iev) {
848
849 // First switch to the one dimensional distance
850 Double_t normalized_distance = GetNormalizedDistance (event, *(*iev), dim_normalization);
851
852 // always working within the hyperelipsoid, except for when we don't
853 // note that rejection ratio goes to 1 as nvar goes to infinity
854 if (normalized_distance > 1 && fKernelEstimator != kBox) continue;
855
856 if ( (*iev)->GetClass()==fSignalClass )
857 pdfSumS += ApplyKernelFunction (normalized_distance) * (*iev)->GetWeight();
858 else
859 pdfSumB += ApplyKernelFunction (normalized_distance) * (*iev)->GetWeight();
860 }
861 pdfSumS = KernelNormalization( pdfSumS < 0. ? 0. : pdfSumS );
862 pdfSumB = KernelNormalization( pdfSumB < 0. ? 0. : pdfSumB );
863
864 delete[] dim_normalization;
865
866 if (pdfSumS < 1e-20 && pdfSumB < 1e-20) return 0.5;
867 if (pdfSumB < 1e-20) return 1.0;
868 if (pdfSumS < 1e-20) return 0.0;
869
870 Float_t r = pdfSumB*fScaleB/(pdfSumS*fScaleS);
871 return 1.0/(r + 1.0); // TODO: propagate errors from here
872}
873
874////////////////////////////////////////////////////////////////////////////////
875/// normalization factors so we can work with radius 1 hyperspheres
876
878 std::vector<const BinarySearchTreeNode*>& events, Volume& v,
879 std::vector<Float_t>* pdfSum )
880{
881 Double_t *dim_normalization = new Double_t[GetNvar()];
882 for (UInt_t ivar=0; ivar<GetNvar(); ivar++)
883 dim_normalization [ivar] = 2 / ((*v.fUpper)[ivar] - (*v.fLower)[ivar]);
884
885 // std::vector<Float_t> pdfSum;
886 pdfSum->clear();
887 Float_t pdfDiv = 0;
888 fNRegOut = 1; // for now, regression is just for 1 dimension
889
890 for (Int_t ivar = 0; ivar < fNRegOut ; ivar++)
891 pdfSum->push_back( 0 );
892
893 // Iteration over sample points
894 for (std::vector<const BinarySearchTreeNode*>::iterator iev = events.begin(); iev != events.end(); ++iev) {
895
896 // First switch to the one dimensional distance
897 Double_t normalized_distance = GetNormalizedDistance (event, *(*iev), dim_normalization);
898
899 // always working within the hyperelipsoid, except for when we don't
900 // note that rejection ratio goes to 1 as nvar goes to infinity
901 if (normalized_distance > 1 && fKernelEstimator != kBox) continue;
902
903 for (Int_t ivar = 0; ivar < fNRegOut ; ivar++) {
904 pdfSum->at(ivar) += ApplyKernelFunction (normalized_distance) * (*iev)->GetWeight() * (*iev)->GetTargets()[ivar];
905 pdfDiv += ApplyKernelFunction (normalized_distance) * (*iev)->GetWeight();
906 }
907 }
908
909 delete[] dim_normalization;
910
911 if (pdfDiv == 0)
912 return;
913
914 for (Int_t ivar = 0; ivar < fNRegOut ; ivar++)
915 pdfSum->at(ivar) /= pdfDiv;
916
917 return;
918}
919
920////////////////////////////////////////////////////////////////////////////////
921/// from the normalized euclidean distance calculate the distance
922/// for a certain kernel
923
925{
926 switch (fKernelEstimator) {
927 case kBox:
928 case kSphere:
929 return 1;
930 break;
931 case kTeepee:
932 return (1 - normalized_distance);
933 break;
934 case kGauss:
935 return TMath::Gaus( normalized_distance, 0, fGaussSigmaNorm, kFALSE);
936 break;
937 case kSinc3:
938 case kSinc5:
939 case kSinc7:
940 case kSinc9:
941 case kSinc11: {
942 Double_t side_crossings = 2 + ((int) fKernelEstimator) - ((int) kSinc3);
943 return NormSinc (side_crossings * normalized_distance);
944 }
945 break;
946 case kLanczos2:
947 return LanczosFilter (2, normalized_distance);
948 break;
949 case kLanczos3:
950 return LanczosFilter (3, normalized_distance);
951 break;
952 case kLanczos5:
953 return LanczosFilter (5, normalized_distance);
954 break;
955 case kLanczos8:
956 return LanczosFilter (8, normalized_distance);
957 break;
958 case kTrim: {
959 Double_t x = normalized_distance / fMax_distance;
960 x = 1 - x*x*x;
961 return x*x*x;
962 }
963 break;
964 default:
965 Log() << kFATAL << "Kernel estimation function unsupported. Enumerator is " << fKernelEstimator << Endl;
966 break;
967 }
968
969 return 0;
970}
971
972////////////////////////////////////////////////////////////////////////////////
973/// Calculating the normalization factor only once (might need a reset at some point.
974/// Can the method be restarted with different params?)
975
977{
978 // Caching jammed to disable function.
979 // It's not really useful after all, badly implemented and untested :-)
980 TTHREAD_TLS(Double_t) ret = 1.0;
981
982 if (ret != 0.0) return ret*pdf;
983
984 // We first normalize by the volume of the hypersphere.
985 switch (fKernelEstimator) {
986 case kBox:
987 case kSphere:
988 ret = 1.;
989 break;
990 case kTeepee:
991 ret = (GetNvar() * (GetNvar() + 1) * TMath::Gamma (((Double_t) GetNvar()) / 2.)) /
992 ( TMath::Power (2., (Double_t) GetNvar() + 1) * TMath::Power (TMath::Pi(), ((Double_t) GetNvar()) / 2.));
993 break;
994 case kGauss:
995 // We use full range integral here. Reasonable because of the fast function decay.
996 ret = 1. / TMath::Power ( 2 * TMath::Pi() * fGaussSigmaNorm * fGaussSigmaNorm, ((Double_t) GetNvar()) / 2.);
997 break;
998 case kSinc3:
999 case kSinc5:
1000 case kSinc7:
1001 case kSinc9:
1002 case kSinc11:
1003 case kLanczos2:
1004 case kLanczos3:
1005 case kLanczos5:
1006 case kLanczos8:
1007 // We use the full range integral here. Reasonable because the central lobe dominates it.
1008 ret = 1 / TMath::Power ( 2., (Double_t) GetNvar() );
1009 break;
1010 default:
1011 Log() << kFATAL << "Kernel estimation function unsupported. Enumerator is " << fKernelEstimator << Endl;
1012 }
1013
1014 // Normalizing by the full volume
1015 ret *= ( TMath::Power (2., static_cast<Int_t>(GetNvar())) * TMath::Gamma (1 + (((Double_t) GetNvar()) / 2.)) ) /
1016 TMath::Power (TMath::Pi(), ((Double_t) GetNvar()) / 2.);
1017
1018 return ret*pdf;
1019}
1020
1021////////////////////////////////////////////////////////////////////////////////
1022/// We use Euclidian metric here. Might not be best or most efficient.
1023
1025 const BinarySearchTreeNode &sample_event,
1026 Double_t *dim_normalization)
1027{
1028 Double_t ret=0;
1029 for (UInt_t ivar=0; ivar<GetNvar(); ivar++) {
1030 Double_t dist = dim_normalization[ivar] * (sample_event.GetEventV()[ivar] - base_event.GetValue(ivar));
1031 ret += dist*dist;
1032 }
1033 // DD 26.11.2008: bugfix: division by (GetNvar()) was missing for correct normalisation
1034 ret /= GetNvar();
1035 return TMath::Sqrt (ret);
1036}
1037
1038////////////////////////////////////////////////////////////////////////////////
1039/// NormSinc
1040
1042{
1043 if (x < 10e-10 && x > -10e-10) {
1044 return 1; // Poor man's l'Hopital
1045 }
1046
1047 Double_t pix = TMath::Pi() * x;
1048 Double_t sinc = TMath::Sin(pix) / pix;
1049 Double_t ret;
1050
1051 if (GetNvar() % 2)
1052 ret = TMath::Power (sinc, static_cast<Int_t>(GetNvar()));
1053 else
1054 ret = TMath::Abs (sinc) * TMath::Power (sinc, static_cast<Int_t>(GetNvar() - 1));
1055
1056 return ret;
1057}
1058
1059////////////////////////////////////////////////////////////////////////////////
1060/// Lanczos Filter
1061
1063{
1064 if (x < 10e-10 && x > -10e-10) {
1065 return 1; // Poor man's l'Hopital
1066 }
1067
1068 Double_t pix = TMath::Pi() * x;
1069 Double_t pixtimesn = pix * ((Double_t) level);
1070 Double_t lanczos = (TMath::Sin(pix) / pix) * (TMath::Sin(pixtimesn) / pixtimesn);
1071 Double_t ret;
1072
1073 if (GetNvar() % 2) ret = TMath::Power (lanczos, static_cast<Int_t>(GetNvar()));
1074 else ret = TMath::Abs (lanczos) * TMath::Power (lanczos, static_cast<Int_t>(GetNvar() - 1));
1075
1076 return ret;
1077}
1078
1079////////////////////////////////////////////////////////////////////////////////
1080/// statistical error estimate for RS estimator
1081
1083 Float_t sumW2S, Float_t sumW2B ) const
1084{
1085 Float_t c = fScaleB/fScaleS;
1086 Float_t d = countS + c*countB; d *= d;
1087
1088 if (d < 1e-10) return 1; // Error is zero because of B = S = 0
1089
1090 Float_t f = c*c/d/d;
1091 Float_t err = f*countB*countB*sumW2S + f*countS*countS*sumW2B;
1092
1093 if (err < 1e-10) return 1; // Error is zero because of B or S = 0
1094
1095 return sqrt(err);
1096}
1097
1098////////////////////////////////////////////////////////////////////////////////
1099/// write weights to xml file
1100
1101void TMVA::MethodPDERS::AddWeightsXMLTo( void* parent ) const
1102{
1103 void* wght = gTools().AddChild(parent, "Weights");
1104 if (fBinaryTree)
1105 fBinaryTree->AddXMLTo(wght);
1106 else
1107 Log() << kFATAL << "Signal and background binary search tree not available" << Endl;
1108 //Log() << kFATAL << "Please implement writing of weights as XML" << Endl;
1109}
1110
1111////////////////////////////////////////////////////////////////////////////////
1112
1114{
1115 if (NULL != fBinaryTree) delete fBinaryTree;
1116 void* treenode = gTools().GetChild(wghtnode);
1117 fBinaryTree = TMVA::BinarySearchTree::CreateFromXML(treenode);
1118 if(!fBinaryTree)
1119 Log() << kFATAL << "Could not create BinarySearchTree from XML" << Endl;
1120 if(!fBinaryTree)
1121 Log() << kFATAL << "Could not create BinarySearchTree from XML" << Endl;
1122 fBinaryTree->SetPeriode( GetNvar() );
1123 fBinaryTree->CalcStatistics();
1124 fBinaryTree->CountNodes();
1125 if (fBinaryTree->GetSumOfWeights( Types::kSignal ) > 0)
1126 fScaleS = 1.0/fBinaryTree->GetSumOfWeights( Types::kSignal );
1127 else fScaleS = 1;
1128 if (fBinaryTree->GetSumOfWeights( Types::kBackground ) > 0)
1129 fScaleB = 1.0/fBinaryTree->GetSumOfWeights( Types::kBackground );
1130 else fScaleB = 1;
1131 Log() << kINFO << "signal and background scales: " << fScaleS << " " << fScaleB << Endl;
1132 CalcAverages();
1133 SetVolumeElement();
1134 fInitializedVolumeEle = kTRUE;
1135}
1136
1137////////////////////////////////////////////////////////////////////////////////
1138/// read weight info from file
1139
1141{
1142 if (NULL != fBinaryTree) delete fBinaryTree;
1143
1144 fBinaryTree = new BinarySearchTree();
1145
1146 istr >> *fBinaryTree;
1147
1148 fBinaryTree->SetPeriode( GetNvar() );
1149
1150 fBinaryTree->CalcStatistics();
1151
1152 fBinaryTree->CountNodes();
1153
1154 // these are the signal and background scales for the weights
1155 fScaleS = 1.0/fBinaryTree->GetSumOfWeights( Types::kSignal );
1156 fScaleB = 1.0/fBinaryTree->GetSumOfWeights( Types::kBackground );
1157
1158 Log() << kINFO << "signal and background scales: " << fScaleS << " " << fScaleB << Endl;
1159
1160 CalcAverages();
1161
1162 SetVolumeElement();
1163
1164 fInitializedVolumeEle = kTRUE;
1165}
1166
1167////////////////////////////////////////////////////////////////////////////////
1168/// write training sample (TTree) to file
1169
1171{
1172}
1173
1174////////////////////////////////////////////////////////////////////////////////
1175/// read training sample from file
1176
1178{
1179}
1180
1181////////////////////////////////////////////////////////////////////////////////
1182/// static pointer to this object
1183
1185{
1186 return GetMethodPDERSThreadLocal();
1187}
1188////////////////////////////////////////////////////////////////////////////////
1189/// update static this pointer
1190
1192{
1193 GetMethodPDERSThreadLocal() = this;
1194}
1195
1196////////////////////////////////////////////////////////////////////////////////
1197/// write specific classifier response
1198
1199void TMVA::MethodPDERS::MakeClassSpecific( std::ostream& fout, const TString& className ) const
1200{
1201 fout << " // not implemented for class: \"" << className << "\"" << std::endl;
1202 fout << "};" << std::endl;
1203}
1204
1205////////////////////////////////////////////////////////////////////////////////
1206/// get help message text
1207///
1208/// typical length of text line:
1209/// "|--------------------------------------------------------------|"
1210
1212{
1213 Log() << Endl;
1214 Log() << gTools().Color("bold") << "--- Short description:" << gTools().Color("reset") << Endl;
1215 Log() << Endl;
1216 Log() << "PDERS is a generalization of the projective likelihood classifier " << Endl;
1217 Log() << "to N dimensions, where N is the number of input variables used." << Endl;
1218 Log() << "In its adaptive form it is mostly equivalent to k-Nearest-Neighbor" << Endl;
1219 Log() << "(k-NN) methods. If the multidimensional PDF for signal and background" << Endl;
1220 Log() << "were known, this classifier would exploit the full information" << Endl;
1221 Log() << "contained in the input variables, and would hence be optimal. In " << Endl;
1222 Log() << "practice however, huge training samples are necessary to sufficiently " << Endl;
1223 Log() << "populate the multidimensional phase space. " << Endl;
1224 Log() << Endl;
1225 Log() << "The simplest implementation of PDERS counts the number of signal" << Endl;
1226 Log() << "and background events in the vicinity of a test event, and returns" << Endl;
1227 Log() << "a weight according to the majority species of the neighboring events." << Endl;
1228 Log() << "A more involved version of PDERS (selected by the option \"KernelEstimator\")" << Endl;
1229 Log() << "uses Kernel estimation methods to approximate the shape of the PDF." << Endl;
1230 Log() << Endl;
1231 Log() << gTools().Color("bold") << "--- Performance optimisation:" << gTools().Color("reset") << Endl;
1232 Log() << Endl;
1233 Log() << "PDERS can be very powerful in case of strongly non-linear problems, " << Endl;
1234 Log() << "e.g., distinct islands of signal and background regions. Because of " << Endl;
1235 Log() << "the exponential growth of the phase space, it is important to restrict" << Endl;
1236 Log() << "the number of input variables (dimension) to the strictly necessary." << Endl;
1237 Log() << Endl;
1238 Log() << "Note that PDERS is a slowly responding classifier. Moreover, the necessity" << Endl;
1239 Log() << "to store the entire binary tree in memory, to avoid accessing virtual " << Endl;
1240 Log() << "memory, limits the number of training events that can effectively be " << Endl;
1241 Log() << "used to model the multidimensional PDF." << Endl;
1242 Log() << Endl;
1243 Log() << gTools().Color("bold") << "--- Performance tuning via configuration options:" << gTools().Color("reset") << Endl;
1244 Log() << Endl;
1245 Log() << "If the PDERS response is found too slow when using the adaptive volume " << Endl;
1246 Log() << "size (option \"VolumeRangeMode=Adaptive\"), it might be found beneficial" << Endl;
1247 Log() << "to reduce the number of events required in the volume, and/or to enlarge" << Endl;
1248 Log() << "the allowed range (\"NeventsMin/Max\"). PDERS is relatively insensitive" << Endl;
1249 Log() << "to the width (\"GaussSigma\") of the Gaussian kernel (if used)." << Endl;
1250}
@ kBox
Definition: Buttons.h:29
#define REGISTER_METHOD(CLASS)
for example
ROOT::R::TRInterface & r
Definition: Object.C:4
#define d(i)
Definition: RSha256.hxx:102
#define f(i)
Definition: RSha256.hxx:104
#define c(i)
Definition: RSha256.hxx:101
#define e(i)
Definition: RSha256.hxx:103
int Int_t
Definition: RtypesCore.h:41
unsigned int UInt_t
Definition: RtypesCore.h:42
const Bool_t kFALSE
Definition: RtypesCore.h:88
bool Bool_t
Definition: RtypesCore.h:59
double Double_t
Definition: RtypesCore.h:55
float Float_t
Definition: RtypesCore.h:53
const Bool_t kTRUE
Definition: RtypesCore.h:87
#define ClassImp(name)
Definition: Rtypes.h:365
int type
Definition: TGX11.cxx:120
double sqrt(double)
A ROOT file is a suite of consecutive data records (TKey instances) with a well defined format.
Definition: TFile.h:48
Node for the BinarySearch or Decision Trees.
const std::vector< Float_t > & GetEventV() const
A simple Binary search tree including a volume search method.
static BinarySearchTree * CreateFromXML(void *node, UInt_t tmva_Version_Code=TMVA_VERSION_CODE)
re-create a new tree (decision tree or search tree) from XML
Class that contains all the data information.
Definition: DataSetInfo.h:60
Float_t GetValue(UInt_t ivar) const
return value of i'th variable
Definition: Event.cxx:237
void SetTarget(UInt_t itgt, Float_t value)
set the target value (dimension itgt) to value
Definition: Event.cxx:360
UInt_t GetNTargets() const
accessor to the number of targets
Definition: Event.cxx:320
Float_t GetTarget(UInt_t itgt) const
Definition: Event.h:102
Virtual base Class for all MVA method.
Definition: MethodBase.h:111
This is a generalization of the above Likelihood methods to dimensions, where is the number of inpu...
Definition: MethodPDERS.h:59
void WriteWeightsToStream(TFile &rf) const
write training sample (TTree) to file
void CreateBinarySearchTree(Types::ETreeType type)
create binary search trees for signal and background
BinarySearchTree * fBinaryTree
Definition: MethodPDERS.h:175
virtual ~MethodPDERS(void)
destructor
MethodPDERS(const TString &jobName, const TString &methodTitle, DataSetInfo &theData, const TString &theOption)
standard constructor for the PDERS method
void GetSample(const Event &e, std::vector< const BinarySearchTreeNode * > &events, Volume *volume)
Float_t GetError(Float_t countS, Float_t countB, Float_t sumW2S, Float_t sumW2B) const
statistical error estimate for RS estimator
static MethodPDERS * ThisPDERS(void)
static pointer to this object
Double_t KernelNormalization(Double_t pdf)
Calculating the normalization factor only once (might need a reset at some point.
Double_t GetMvaValue(Double_t *err=0, Double_t *errUpper=0)
init the size of a volume element using a defined fraction of the volume containing the entire events
void ReadWeightsFromXML(void *wghtnode)
void ProcessOptions()
process the options specified by the user
void RRScalc(const Event &, std::vector< Float_t > *count)
void GetHelpMessage() const
get help message text
void UpdateThis()
update static this pointer
void Train(void)
this is a dummy training: the preparation work to do is the construction of the binary tree as a poin...
void MakeClassSpecific(std::ostream &, const TString &) const
write specific classifier response
Double_t CRScalc(const Event &)
void DeclareOptions()
define the options (their key words) that can be set in the option string.
void CalcAverages()
compute also average RMS values required for adaptive Gaussian
void RKernelEstimate(const Event &, std::vector< const BinarySearchTreeNode * > &, Volume &, std::vector< Float_t > *pdfSum)
normalization factors so we can work with radius 1 hyperspheres
void ReadWeightsFromStream(std::istream &istr)
read weight info from file
const std::vector< Float_t > & GetRegressionValues()
Double_t NormSinc(Double_t x)
NormSinc.
virtual Bool_t HasAnalysisType(Types::EAnalysisType type, UInt_t numberClasses, UInt_t numberTargets)
PDERS can handle classification with 2 classes and regression with one or more regression-targets.
void AddWeightsXMLTo(void *parent) const
write weights to xml file
void SetVolumeElement(void)
defines volume dimensions
void Init(void)
default initialisation routine called by all constructors
Double_t LanczosFilter(Int_t level, Double_t x)
Lanczos Filter.
Double_t CKernelEstimate(const Event &, std::vector< const BinarySearchTreeNode * > &, Volume &)
normalization factors so we can work with radius 1 hyperspheres
Double_t ApplyKernelFunction(Double_t normalized_distance)
from the normalized euclidean distance calculate the distance for a certain kernel
Double_t GetNormalizedDistance(const TMVA::Event &base_event, const BinarySearchTreeNode &sample_event, Double_t *dim_normalization)
We use Euclidian metric here. Might not be best or most efficient.
static Double_t IGetVolumeContentForRoot(Double_t)
Interface to RootFinder.
Double_t GetVolumeContentForRoot(Double_t)
count number of events in rescaled volume
Volume * fHelpVolume
Definition: MethodPDERS.h:110
Root finding using Brents algorithm (translated from CERNLIB function RZERO)
Definition: RootFinder.h:48
Double_t Root(Double_t refValue)
Root finding using Brents algorithm; taken from CERNLIB function RZERO.
Definition: RootFinder.cxx:72
void * AddChild(void *parent, const char *childname, const char *content=0, bool isRootNode=false)
add child node
Definition: Tools.cxx:1136
const TString & Color(const TString &)
human readable color strings
Definition: Tools.cxx:840
void * GetChild(void *parent, const char *childname=0)
get child node
Definition: Tools.cxx:1162
Singleton class for Global types used by TMVA.
Definition: Types.h:73
@ kSignal
Definition: Types.h:136
@ kBackground
Definition: Types.h:137
EAnalysisType
Definition: Types.h:127
@ kClassification
Definition: Types.h:128
@ kRegression
Definition: Types.h:129
@ kTraining
Definition: Types.h:144
Volume for BinarySearchTree.
Definition: Volume.h:48
void ScaleInterval(Double_t f)
"scale" the volume by symmetrically blowing up the interval in each dimension
Definition: Volume.cxx:180
Basic string class.
Definition: TString.h:131
Double_t x[n]
Definition: legend1.C:17
double dist(Rotation3D const &r1, Rotation3D const &r2)
Definition: 3DDistances.cxx:48
create variable transformations
Tools & gTools()
MsgLogger & Endl(MsgLogger &ml)
Definition: MsgLogger.h:158
Double_t Gaus(Double_t x, Double_t mean=0, Double_t sigma=1, Bool_t norm=kFALSE)
Calculate a gaussian function with mean and sigma.
Definition: TMath.cxx:448
Double_t Log(Double_t x)
Definition: TMath.h:750
Double_t Sqrt(Double_t x)
Definition: TMath.h:681
LongDouble_t Power(LongDouble_t x, LongDouble_t y)
Definition: TMath.h:725
Double_t Gamma(Double_t z)
Computation of gamma(z) for all z.
Definition: TMath.cxx:348
constexpr Double_t Pi()
Definition: TMath.h:38
Double_t Sin(Double_t)
Definition: TMath.h:627
Short_t Abs(Short_t d)
Definition: TMathBase.h:120