Logo ROOT   6.08/07
Reference Guide
MethodPDERS.cxx
Go to the documentation of this file.
1 // @(#)root/tmva $Id$
2 // Author: Andreas Hoecker, Yair Mahalalel, Joerg Stelzer, Helge Voss, Kai Voss
3 
4 /***********************************************************************************
5  * Project: TMVA - a Root-integrated toolkit for multivariate data analysis *
6  * Package: TMVA *
7  * Class : MethodPDERS *
8  * Web : http://tmva.sourceforge.net *
9  * *
10  * Description: *
11  * Implementation *
12  * *
13  * Authors (alphabetical): *
14  * Krzysztof Danielowski <danielow@cern.ch> - IFJ PAN & AGH, Poland *
15  * Andreas Hoecker <Andreas.Hocker@cern.ch> - CERN, Switzerland *
16  * Kamil Kraszewski <kalq@cern.ch> - IFJ PAN & UJ, Poland *
17  * Maciej Kruk <mkruk@cern.ch> - IFJ PAN & AGH, Poland *
18  * Yair Mahalalel <Yair.Mahalalel@cern.ch> - CERN, Switzerland *
19  * Helge Voss <Helge.Voss@cern.ch> - MPI-K Heidelberg, Germany *
20  * Kai Voss <Kai.Voss@cern.ch> - U. of Victoria, Canada *
21  * *
22  * Copyright (c) 2005: *
23  * CERN, Switzerland *
24  * U. of Victoria, Canada *
25  * MPI-K Heidelberg, Germany *
26  * *
27  * Redistribution and use in source and binary forms, with or without *
28  * modification, are permitted according to the terms listed in LICENSE *
29  * (http://tmva.sourceforge.net/LICENSE) *
30  ***********************************************************************************/
31 
32 //_______________________________________________________________________
33 // Begin_Html
34 /*
35  This is a generalization of the above Likelihood methods to <i>N</i><sub>var</sub>
36  dimensions, where <i>N</i><sub>var</sub> is the number of input variables
37  used in the MVA. If the multi-dimensional probability density functions
38  (PDFs) for signal and background were known, this method contains the entire
39  physical information, and is therefore optimal. Usually, kernel estimation
40  methods are used to approximate the PDFs using the events from the
41  training sample. <br><p></p>
42 
43  A very simple probability density estimator (PDE) has been suggested
44  in <a href="http://arxiv.org/abs/hep-ex/0211019">hep-ex/0211019</a>. The
45  PDE for a given test event is obtained from counting the (normalized)
46  number of signal and background (training) events that occur in the
47  "vicinity" of the test event. The volume that describes "vicinity" is
48  user-defined. A <a href="http://arxiv.org/abs/hep-ex/0211019">search
49  method based on binary-trees</a> is used to effectively reduce the
50  selection time for the range search. Three different volume definitions
51  are optional: <br><p></p>
52  <ul>
53  <li><u>MinMax:</u>
54  the volume is defined in each dimension with respect
55  to the full variable range found in the training sample. </li>
56  <li><u>RMS:</u>
57  the volume is defined in each dimensions with respect
58  to the RMS estimated from the training sample. </li>
59  <li><u>Adaptive:</u>
60  a volume element is defined in each dimensions with
61  respect to the RMS estimated from the training sample. The overall
62  scale of the volume element is then determined for each event so
63  that the total number of events confined in the volume be within
64  a user-defined range.</li>
65  </ul><p></p>
66  The adaptive range search is used by default.
67  // End_Html
68  */
69 //_______________________________________________________________________
70 
71 #include "TMVA/MethodPDERS.h"
72 
73 #include "TMVA/BinaryTree.h"
74 #include "TMVA/BinarySearchTree.h"
75 #include "TMVA/Configurable.h"
76 #include "TMVA/ClassifierFactory.h"
77 #include "TMVA/Event.h"
78 #include "TMVA/IMethod.h"
79 #include "TMVA/MethodBase.h"
80 #include "TMVA/MsgLogger.h"
81 #include "TMVA/RootFinder.h"
82 #include "TMVA/Tools.h"
84 #include "TMVA/Types.h"
85 
86 #include "ThreadLocalStorage.h"
87 #include "TBuffer.h"
88 #include "TFile.h"
89 #include "TObjString.h"
90 #include "TMath.h"
91 
92 #include <assert.h>
93 #include <algorithm>
94 
95 namespace TMVA {
97 };
98 
99 
100 REGISTER_METHOD(PDERS)
101 
103 
104 ////////////////////////////////////////////////////////////////////////////////
105 /// standard constructor for the PDERS method
106 
107  TMVA::MethodPDERS::MethodPDERS( const TString& jobName,
108  const TString& methodTitle,
109  DataSetInfo& theData,
110  const TString& theOption) :
111  MethodBase( jobName, Types::kPDERS, methodTitle, theData, theOption),
112  fFcnCall(0),
113  fVRangeMode(kAdaptive),
114  fKernelEstimator(kBox),
115  fDelta(0),
116  fShift(0),
117  fScaleS(0),
118  fScaleB(0),
119  fDeltaFrac(0),
120  fGaussSigma(0),
121  fGaussSigmaNorm(0),
122  fNRegOut(0),
123  fNEventsMin(0),
124  fNEventsMax(0),
125  fMaxVIterations(0),
126  fInitialScale(0),
127  fInitializedVolumeEle(0),
128  fkNNMin(0),
129  fkNNMax(0),
130  fMax_distance(0),
131  fPrinted(0),
132  fNormTree(0)
133 {
134 }
135 
136 ////////////////////////////////////////////////////////////////////////////////
137 /// construct MethodPDERS through from file
138 
140  const TString& theWeightFile) :
141  MethodBase( Types::kPDERS, theData, theWeightFile),
142  fFcnCall(0),
143  fVRangeMode(kAdaptive),
144  fKernelEstimator(kBox),
145  fDelta(0),
146  fShift(0),
147  fScaleS(0),
148  fScaleB(0),
149  fDeltaFrac(0),
150  fGaussSigma(0),
151  fGaussSigmaNorm(0),
152  fNRegOut(0),
153  fNEventsMin(0),
154  fNEventsMax(0),
155  fMaxVIterations(0),
156  fInitialScale(0),
157  fInitializedVolumeEle(0),
158  fkNNMin(0),
159  fkNNMax(0),
160  fMax_distance(0),
161  fPrinted(0),
162  fNormTree(0)
163 {
164 }
165 
166 ////////////////////////////////////////////////////////////////////////////////
167 /// PDERS can handle classification with 2 classes and regression with one or more regression-targets
168 
170 {
171  if (type == Types::kClassification && numberClasses == 2) return kTRUE;
172  if (type == Types::kRegression) return kTRUE;
173  return kFALSE;
174 }
175 
176 ////////////////////////////////////////////////////////////////////////////////
177 /// default initialisation routine called by all constructors
178 
180 {
181  fBinaryTree = NULL;
182 
183  UpdateThis();
184 
185  // default options
186  fDeltaFrac = 3.0;
189 
190  // special options for Adaptive mode
191  fNEventsMin = 100;
192  fNEventsMax = 200;
193  fMaxVIterations = 150;
194  fInitialScale = 0.99;
195  fGaussSigma = 0.1;
196  fNormTree = kFALSE;
197 
200 
202  fAverageRMS.clear();
203 
204  // the minimum requirement to declare an event signal-like
205  SetSignalReferenceCut( 0.0 );
206 }
207 
208 ////////////////////////////////////////////////////////////////////////////////
209 /// destructor
210 
212 {
213  if (fDelta) delete fDelta;
214  if (fShift) delete fShift;
215 
216  if (NULL != fBinaryTree) delete fBinaryTree;
217 }
218 
219 ////////////////////////////////////////////////////////////////////////////////
220 /// define the options (their key words) that can be set in the option string
221 /// know options:
222 /// VolumeRangeMode <string> Method to determine volume range
223 /// available values are: MinMax
224 /// Unscaled
225 /// RMS
226 /// kNN
227 /// Adaptive <default>
228 ///
229 /// KernelEstimator <string> Kernel estimation function
230 /// available values are: Box <default>
231 /// Sphere
232 /// Teepee
233 /// Gauss
234 /// Sinc3
235 /// Sinc5
236 /// Sinc7
237 /// Sinc9
238 /// Sinc11
239 /// Lanczos2
240 /// Lanczos3
241 /// Lanczos5
242 /// Lanczos8
243 /// Trim
244 ///
245 /// DeltaFrac <float> Ratio of #EventsMin/#EventsMax for MinMax and RMS volume range
246 /// NEventsMin <int> Minimum number of events for adaptive volume range
247 /// NEventsMax <int> Maximum number of events for adaptive volume range
248 /// MaxVIterations <int> Maximum number of iterations for adaptive volume range
249 /// InitialScale <float> Initial scale for adaptive volume range
250 /// GaussSigma <float> Width with respect to the volume size of Gaussian kernel estimator
251 
253 {
254  DeclareOptionRef(fVolumeRange="Adaptive", "VolumeRangeMode", "Method to determine volume size");
255  AddPreDefVal(TString("Unscaled"));
256  AddPreDefVal(TString("MinMax"));
257  AddPreDefVal(TString("RMS"));
258  AddPreDefVal(TString("Adaptive"));
259  AddPreDefVal(TString("kNN"));
260 
261  DeclareOptionRef(fKernelString="Box", "KernelEstimator", "Kernel estimation function");
262  AddPreDefVal(TString("Box"));
263  AddPreDefVal(TString("Sphere"));
264  AddPreDefVal(TString("Teepee"));
265  AddPreDefVal(TString("Gauss"));
266  AddPreDefVal(TString("Sinc3"));
267  AddPreDefVal(TString("Sinc5"));
268  AddPreDefVal(TString("Sinc7"));
269  AddPreDefVal(TString("Sinc9"));
270  AddPreDefVal(TString("Sinc11"));
271  AddPreDefVal(TString("Lanczos2"));
272  AddPreDefVal(TString("Lanczos3"));
273  AddPreDefVal(TString("Lanczos5"));
274  AddPreDefVal(TString("Lanczos8"));
275  AddPreDefVal(TString("Trim"));
276 
277  DeclareOptionRef(fDeltaFrac , "DeltaFrac", "nEventsMin/Max for minmax and rms volume range");
278  DeclareOptionRef(fNEventsMin , "NEventsMin", "nEventsMin for adaptive volume range");
279  DeclareOptionRef(fNEventsMax , "NEventsMax", "nEventsMax for adaptive volume range");
280  DeclareOptionRef(fMaxVIterations, "MaxVIterations", "MaxVIterations for adaptive volume range");
281  DeclareOptionRef(fInitialScale , "InitialScale", "InitialScale for adaptive volume range");
282  DeclareOptionRef(fGaussSigma , "GaussSigma", "Width (wrt volume size) of Gaussian kernel estimator");
283  DeclareOptionRef(fNormTree , "NormTree", "Normalize binary search tree");
284 }
285 
286 ////////////////////////////////////////////////////////////////////////////////
287 /// process the options specified by the user
288 
290 {
292  Log() << kFATAL << "Mechanism to ignore events with negative weights in training not yet available for method: "
293  << GetMethodTypeName()
294  << " --> please remove \"IgnoreNegWeightsInTraining\" option from booking string."
295  << Endl;
296  }
297 
298  fGaussSigmaNorm = fGaussSigma; // * TMath::Sqrt( Double_t(GetNvar()) );
299 
301 
302  if (fVolumeRange == "MinMax" ) fVRangeMode = kMinMax;
303  else if (fVolumeRange == "RMS" ) fVRangeMode = kRMS;
304  else if (fVolumeRange == "Adaptive" ) fVRangeMode = kAdaptive;
305  else if (fVolumeRange == "Unscaled" ) fVRangeMode = kUnscaled;
306  else if (fVolumeRange == "kNN" ) fVRangeMode = kkNN;
307  else {
308  Log() << kFATAL << "VolumeRangeMode parameter '" << fVolumeRange << "' unknown" << Endl;
309  }
310 
311  if (fKernelString == "Box" ) fKernelEstimator = kBox;
312  else if (fKernelString == "Sphere" ) fKernelEstimator = kSphere;
313  else if (fKernelString == "Teepee" ) fKernelEstimator = kTeepee;
314  else if (fKernelString == "Gauss" ) fKernelEstimator = kGauss;
315  else if (fKernelString == "Sinc3" ) fKernelEstimator = kSinc3;
316  else if (fKernelString == "Sinc5" ) fKernelEstimator = kSinc5;
317  else if (fKernelString == "Sinc7" ) fKernelEstimator = kSinc7;
318  else if (fKernelString == "Sinc9" ) fKernelEstimator = kSinc9;
319  else if (fKernelString == "Sinc11" ) fKernelEstimator = kSinc11;
320  else if (fKernelString == "Lanczos2" ) fKernelEstimator = kLanczos2;
321  else if (fKernelString == "Lanczos3" ) fKernelEstimator = kLanczos3;
322  else if (fKernelString == "Lanczos5" ) fKernelEstimator = kLanczos5;
323  else if (fKernelString == "Lanczos8" ) fKernelEstimator = kLanczos8;
324  else if (fKernelString == "Trim" ) fKernelEstimator = kTrim;
325  else {
326  Log() << kFATAL << "KernelEstimator parameter '" << fKernelString << "' unknown" << Endl;
327  }
328 
329  // TODO: Add parameter validation
330 
331  Log() << kVERBOSE << "interpreted option string: vRangeMethod: '"
332  << (const char*)((fVRangeMode == kMinMax) ? "MinMax" :
333  (fVRangeMode == kUnscaled) ? "Unscaled" :
334  (fVRangeMode == kRMS ) ? "RMS" : "Adaptive") << "'" << Endl;
335  if (fVRangeMode == kMinMax || fVRangeMode == kRMS)
336  Log() << kVERBOSE << "deltaFrac: " << fDeltaFrac << Endl;
337  else
338  Log() << kVERBOSE << "nEventsMin/Max, maxVIterations, initialScale: "
339  << fNEventsMin << " " << fNEventsMax
340  << " " << fMaxVIterations << " " << fInitialScale << Endl;
341  Log() << kVERBOSE << "KernelEstimator = " << fKernelString << Endl;
342 }
343 
344 ////////////////////////////////////////////////////////////////////////////////
345 /// this is a dummy training: the preparation work to do is the construction
346 /// of the binary tree as a pointer chain. It is easier to directly save the
347 /// trainingTree in the weight file, and to rebuild the binary tree in the
348 /// test phase from scratch
349 
351 {
352  if (IsNormalised()) Log() << kFATAL << "\"Normalise\" option cannot be used with PDERS; "
353  << "please remove the option from the configuration string, or "
354  << "use \"!Normalise\""
355  << Endl;
356 
358 
359  CalcAverages();
361 
364 }
365 
366 ////////////////////////////////////////////////////////////////////////////////
367 /// init the size of a volume element using a defined fraction of the
368 /// volume containing the entire events
369 
371 {
372  if (fInitializedVolumeEle == kFALSE) {
374 
375  // binary trees must exist
376  assert( fBinaryTree );
377 
378  CalcAverages();
380  }
381 
382  // cannot determine error
383  NoErrorCalc(err, errUpper);
384 
385  return this->CRScalc( *GetEvent() );
386 }
387 
388 ////////////////////////////////////////////////////////////////////////////////
389 
390 const std::vector< Float_t >& TMVA::MethodPDERS::GetRegressionValues()
391 {
392  if (fRegressionReturnVal == 0) fRegressionReturnVal = new std::vector<Float_t>;
393  fRegressionReturnVal->clear();
394  // init the size of a volume element using a defined fraction of the
395  // volume containing the entire events
396  if (fInitializedVolumeEle == kFALSE) {
398 
399  // binary trees must exist
400  assert( fBinaryTree );
401 
402  CalcAverages();
403 
405  }
406 
407  const Event* ev = GetEvent();
408  this->RRScalc( *ev, fRegressionReturnVal );
409 
410  Event * evT = new Event(*ev);
411  UInt_t ivar = 0;
412  for (std::vector<Float_t>::iterator it = fRegressionReturnVal->begin(); it != fRegressionReturnVal->end(); it++ ) {
413  evT->SetTarget(ivar,(*it));
414  ivar++;
415  }
416 
417  const Event* evT2 = GetTransformationHandler().InverseTransform( evT );
418  fRegressionReturnVal->clear();
419 
420  for (ivar = 0; ivar<evT2->GetNTargets(); ivar++) {
421  fRegressionReturnVal->push_back(evT2->GetTarget(ivar));
422  }
423 
424  delete evT;
425 
426 
427  return (*fRegressionReturnVal);
428 }
429 
430 ////////////////////////////////////////////////////////////////////////////////
431 /// compute also average RMS values required for adaptive Gaussian
432 
434 {
435  if (fVRangeMode == kAdaptive || fVRangeMode == kRMS || fVRangeMode == kkNN ) {
436  fAverageRMS.clear();
438 
439  for (UInt_t ivar=0; ivar<GetNvar(); ivar++) {
440  if (!DoRegression()){ //why there are separate rms for signal and background?
441  Float_t rmsS = fBinaryTree->RMS(Types::kSignal, ivar);
442  Float_t rmsB = fBinaryTree->RMS(Types::kBackground, ivar);
443  fAverageRMS.push_back( (rmsS + rmsB)*0.5 );
444  } else {
445  Float_t rms = fBinaryTree->RMS( ivar );
446  fAverageRMS.push_back( rms );
447  }
448  }
449  }
450 }
451 
452 ////////////////////////////////////////////////////////////////////////////////
453 /// create binary search trees for signal and background
454 
456 {
457  if (NULL != fBinaryTree) delete fBinaryTree;
459  if (fNormTree) {
461  }
462 
464 
465  if (fNormTree) {
467  }
468 
469  if (!DoRegression()) {
470  // these are the signal and background scales for the weights
473 
474  Log() << kVERBOSE << "Signal and background scales: " << fScaleS << " " << fScaleB << Endl;
475  }
476 }
477 
478 ////////////////////////////////////////////////////////////////////////////////
479 /// defines volume dimensions
480 
482  if (GetNvar()==0) {
483  Log() << kFATAL << "GetNvar() == 0" << Endl;
484  return;
485  }
486 
487  // init relative scales
490 
491  if (fDelta) delete fDelta;
492  if (fShift) delete fShift;
493  fDelta = new std::vector<Float_t>( GetNvar() );
494  fShift = new std::vector<Float_t>( GetNvar() );
495 
496  for (UInt_t ivar=0; ivar<GetNvar(); ivar++) {
497  switch (fVRangeMode) {
498 
499  case kRMS:
500  case kkNN:
501  case kAdaptive:
502  // sanity check
503  if (fAverageRMS.size() != GetNvar())
504  Log() << kFATAL << "<SetVolumeElement> RMS not computed: " << fAverageRMS.size() << Endl;
505  (*fDelta)[ivar] = fAverageRMS[ivar]*fDeltaFrac;
506  Log() << kVERBOSE << "delta of var[" << (*fInputVars)[ivar]
507  << "\t]: " << fAverageRMS[ivar]
508  << "\t | comp with |max - min|: " << (GetXmax( ivar ) - GetXmin( ivar ))
509  << Endl;
510  break;
511  case kMinMax:
512  (*fDelta)[ivar] = (GetXmax( ivar ) - GetXmin( ivar ))*fDeltaFrac;
513  break;
514  case kUnscaled:
515  (*fDelta)[ivar] = fDeltaFrac;
516  break;
517  default:
518  Log() << kFATAL << "<SetVolumeElement> unknown range-set mode: "
519  << fVRangeMode << Endl;
520  }
521  (*fShift)[ivar] = 0.5; // volume is centered around test value
522  }
523 
524 }
525 
526 ////////////////////////////////////////////////////////////////////////////////
527 /// Interface to RootFinder
528 
530 {
531  return ThisPDERS()->GetVolumeContentForRoot( scale );
532 }
533 
534 ////////////////////////////////////////////////////////////////////////////////
535 /// count number of events in rescaled volume
536 
538 {
539  Volume v( *fHelpVolume );
540  v.ScaleInterval( scale );
541 
542  Double_t count = GetBinaryTree()->SearchVolume( &v );
543 
544  v.Delete();
545  return count;
546 }
547 
549  std::vector<const BinarySearchTreeNode*>& events,
550  Volume *volume )
551 {
552  Float_t count = 0;
553 
554  // -------------------------------------------------------------------------
555  //
556  // ==== test of volume search =====
557  //
558  // #define TMVA::MethodPDERS__countByHand__Debug__
559 
560 #ifdef TMVA_MethodPDERS__countByHand__Debug__
561 
562  // starting values
563  count = fBinaryTree->SearchVolume( volume );
564 
565  Int_t iS = 0, iB = 0;
566  UInt_t nvar = GetNvar();
567  for (UInt_t ievt_=0; ievt_<Data()->GetNTrainingEvents(); ievt_++) {
568  const Event * ev = GetTrainingEvent(ievt_);
569  Bool_t inV;
570  for (Int_t ivar=0; ivar<nvar; ivar++) {
571  Float_t x = ev->GetValue(ivar);
572  inV = (x > (*volume->Lower)[ivar] && x <= (*volume->Upper)[ivar]);
573  if (!inV) break;
574  }
575  if (inV) {
576  in++;
577  }
578  }
579  Log() << kVERBOSE << "debug: my test: " << in << Endl;// <- ***********tree
580  Log() << kVERBOSE << "debug: binTree: " << count << Endl << Endl;// <- ***********tree
581 
582 #endif
583 
584  // -------------------------------------------------------------------------
585 
586  if (fVRangeMode == kRMS || fVRangeMode == kMinMax || fVRangeMode == kUnscaled) { // Constant volume
587 
588  std::vector<Double_t> *lb = new std::vector<Double_t>( GetNvar() );
589  for (UInt_t ivar=0; ivar<GetNvar(); ivar++) (*lb)[ivar] = e.GetValue(ivar);
590  std::vector<Double_t> *ub = new std::vector<Double_t>( *lb );
591  for (UInt_t ivar=0; ivar<GetNvar(); ivar++) {
592  (*lb)[ivar] -= (*fDelta)[ivar]*(1.0 - (*fShift)[ivar]);
593  (*ub)[ivar] += (*fDelta)[ivar]*(*fShift)[ivar];
594  }
595  Volume* svolume = new Volume( lb, ub );
596  // starting values
597 
598  fBinaryTree->SearchVolume( svolume, &events );
599  }
600  else if (fVRangeMode == kAdaptive) { // adaptive volume
601 
602  // -----------------------------------------------------------------------
603 
604  // TODO: optimize, perhaps multi stage with broadening limits,
605  // or a different root finding method entirely,
607 
608  // that won't need to search through large volume, where the bottle neck probably is
609 
610  fHelpVolume = volume;
611 
612  UpdateThis(); // necessary update of static pointer
613  RootFinder rootFinder( this, 0.01, 50, 200, 10 );
614  Double_t scale = rootFinder.Root( (fNEventsMin + fNEventsMax)/2.0 );
615 
616  volume->ScaleInterval( scale );
617 
618  fBinaryTree->SearchVolume( volume, &events );
619 
620  fHelpVolume = NULL;
621  }
622  // -----------------------------------------------------------------------
623  else {
624 
625  // starting values
626  count = fBinaryTree->SearchVolume( volume );
627 
628  Float_t nEventsO = count;
629  Int_t i_=0;
630 
631  while (nEventsO < fNEventsMin) { // this isn't a sain start... try again
632  volume->ScaleInterval( 1.15 );
633  count = fBinaryTree->SearchVolume( volume );
634  nEventsO = count;
635  i_++;
636  }
637  if (i_ > 50) Log() << kWARNING << "warning in event: " << e
638  << ": adaptive volume pre-adjustment reached "
639  << ">50 iterations in while loop (" << i_ << ")" << Endl;
640 
641  Float_t nEventsN = nEventsO;
642  Float_t nEventsE = 0.5*(fNEventsMin + fNEventsMax);
643  Float_t scaleO = 1.0;
644  Float_t scaleN = fInitialScale;
645  Float_t scale = scaleN;
646  Float_t scaleBest = scaleN;
647  Float_t nEventsBest = nEventsN;
648 
649  for (Int_t ic=1; ic<fMaxVIterations; ic++) {
650  if (nEventsN < fNEventsMin || nEventsN > fNEventsMax) {
651 
652  // search for events in rescaled volume
653  Volume* v = new Volume( *volume );
654  v->ScaleInterval( scale );
655  nEventsN = fBinaryTree->SearchVolume( v );
656 
657  // determine next iteration (linear approximation)
658  if (nEventsN > 1 && nEventsN - nEventsO != 0)
659  if (scaleN - scaleO != 0)
660  scale += (scaleN - scaleO)/(nEventsN - nEventsO)*(nEventsE - nEventsN);
661  else
662  scale += (-0.01); // should not actually occur...
663  else
664  scale += 0.5; // use much larger volume
665 
666  // save old scale
667  scaleN = scale;
668 
669  // take if better (don't accept it if too small number of events)
670  if (TMath::Abs(nEventsN - nEventsE) < TMath::Abs(nEventsBest - nEventsE) &&
671  (nEventsN >= fNEventsMin || nEventsBest < nEventsN)) {
672  nEventsBest = nEventsN;
673  scaleBest = scale;
674  }
675 
676  v->Delete();
677  delete v;
678  }
679  else break;
680  }
681 
682  // last sanity check
683  nEventsN = nEventsBest;
684  // include "1" to cover float precision
685  if (nEventsN < fNEventsMin-1 || nEventsN > fNEventsMax+1)
686  Log() << kWARNING << "warning in event " << e
687  << ": adaptive volume adjustment reached "
688  << "max. #iterations (" << fMaxVIterations << ")"
689  << "[ nEvents: " << nEventsN << " " << fNEventsMin << " " << fNEventsMax << "]"
690  << Endl;
691 
692  volume->ScaleInterval( scaleBest );
693  fBinaryTree->SearchVolume( volume, &events );
694  }
695 
696  // end of adaptive method
697 
698  } else if (fVRangeMode == kkNN) {
699  Volume v(*volume);
700 
701  events.clear();
702  // check number of signals in begining volume
703  Int_t kNNcount = fBinaryTree->SearchVolumeWithMaxLimit( &v, &events, fkNNMax+1 );
704  //if this number is too large return fkNNMax+1
705 
706  Int_t t_times = 0; // number of iterations
707 
708  while ( !(kNNcount >= fkNNMin && kNNcount <= fkNNMax) ) {
709  if (kNNcount < fkNNMin) { //if we have too less points
710  Float_t scale = 2; //better scale needed
711  volume->ScaleInterval( scale );
712  }
713  else if (kNNcount > fkNNMax) { //if we have too many points
714  Float_t scale = 0.1; //better scale needed
715  volume->ScaleInterval( scale );
716  }
717  events.clear();
718 
719  v = *volume ;
720 
721  kNNcount = fBinaryTree->SearchVolumeWithMaxLimit( &v, &events, fkNNMax+1 ); //new search
722 
723  t_times++;
724 
725  if (t_times == fMaxVIterations) {
726  Log() << kWARNING << "warining in event" << e
727  << ": kNN volume adjustment reached "
728  << "max. #iterations (" << fMaxVIterations << ")"
729  << "[ kNN: " << fkNNMin << " " << fkNNMax << Endl;
730  break;
731  }
732  }
733 
734  //vector to normalize distance in each dimension
735  Double_t *dim_normalization = new Double_t[GetNvar()];
736  for (UInt_t ivar=0; ivar<GetNvar(); ivar++) {
737  dim_normalization [ivar] = 1.0 / ((*v.fUpper)[ivar] - (*v.fLower)[ivar]);
738  }
739 
740  std::vector<const BinarySearchTreeNode*> tempVector; // temporary vector for events
741 
742  if (kNNcount >= fkNNMin) {
743  std::vector<Double_t> *distances = new std::vector<Double_t>( kNNcount );
744 
745  //counting the distance for each event
746  for (Int_t j=0;j< Int_t(events.size()) ;j++)
747  (*distances)[j] = GetNormalizedDistance ( e, *events[j], dim_normalization );
748 
749  //counting the fkNNMin-th element
750  std::vector<Double_t>::iterator wsk = distances->begin();
751  for (Int_t j=0;j<fkNNMin-1;j++) wsk++;
752  std::nth_element( distances->begin(), wsk, distances->end() );
753 
754  //getting all elements that are closer than fkNNMin-th element
755  //signals
756  for (Int_t j=0;j<Int_t(events.size());j++) {
757  Double_t dist = GetNormalizedDistance( e, *events[j], dim_normalization );
758 
759  if (dist <= (*distances)[fkNNMin-1])
760  tempVector.push_back( events[j] );
761  }
762  fMax_distance = (*distances)[fkNNMin-1];
763  delete distances;
764  }
765  delete[] dim_normalization;
766  events = tempVector;
767 
768  } else {
769 
770  // troubles ahead...
771  Log() << kFATAL << "<GetSample> unknown RangeMode: " << fVRangeMode << Endl;
772  }
773  // -----------------------------------------------------------------------
774 }
775 
776 ////////////////////////////////////////////////////////////////////////////////
777 
779 {
780  std::vector<const BinarySearchTreeNode*> events;
781 
782  // computes event weight by counting number of signal and background
783  // events (of reference sample) that are found within given volume
784  // defined by the event
785  std::vector<Double_t> *lb = new std::vector<Double_t>( GetNvar() );
786  for (UInt_t ivar=0; ivar<GetNvar(); ivar++) (*lb)[ivar] = e.GetValue(ivar);
787 
788  std::vector<Double_t> *ub = new std::vector<Double_t>( *lb );
789  for (UInt_t ivar=0; ivar<GetNvar(); ivar++) {
790  (*lb)[ivar] -= (*fDelta)[ivar]*(1.0 - (*fShift)[ivar]);
791  (*ub)[ivar] += (*fDelta)[ivar]*(*fShift)[ivar];
792  }
793 
794  Volume *volume = new Volume( lb, ub );
795 
796  GetSample( e, events, volume );
797  Double_t count = CKernelEstimate( e, events, *volume );
798  delete volume;
799  delete lb;
800  delete ub;
801 
802  return count;
803 }
804 
805 ////////////////////////////////////////////////////////////////////////////////
806 
807 void TMVA::MethodPDERS::RRScalc( const Event& e, std::vector<Float_t>* count )
808 {
809  std::vector<const BinarySearchTreeNode*> events;
810 
811  // computes event weight by counting number of signal and background
812  // events (of reference sample) that are found within given volume
813  // defined by the event
814  std::vector<Double_t> *lb = new std::vector<Double_t>( GetNvar() );
815  for (UInt_t ivar=0; ivar<GetNvar(); ivar++) (*lb)[ivar] = e.GetValue(ivar);
816 
817  std::vector<Double_t> *ub = new std::vector<Double_t>( *lb );
818  for (UInt_t ivar=0; ivar<GetNvar(); ivar++) {
819  (*lb)[ivar] -= (*fDelta)[ivar]*(1.0 - (*fShift)[ivar]);
820  (*ub)[ivar] += (*fDelta)[ivar]*(*fShift)[ivar];
821  }
822  Volume *volume = new Volume( lb, ub );
823 
824  GetSample( e, events, volume );
825  RKernelEstimate( e, events, *volume, count );
826 
827  delete volume;
828 
829  return;
830 }
831 ////////////////////////////////////////////////////////////////////////////////
832 /// normalization factors so we can work with radius 1 hyperspheres
833 
835  std::vector<const BinarySearchTreeNode*>& events, Volume& v )
836 {
837  Double_t *dim_normalization = new Double_t[GetNvar()];
838  for (UInt_t ivar=0; ivar<GetNvar(); ivar++)
839  dim_normalization [ivar] = 2 / ((*v.fUpper)[ivar] - (*v.fLower)[ivar]);
840 
841  Double_t pdfSumS = 0;
842  Double_t pdfSumB = 0;
843 
844  // Iteration over sample points
845  for (std::vector<const BinarySearchTreeNode*>::iterator iev = events.begin(); iev != events.end(); iev++) {
846 
847  // First switch to the one dimensional distance
848  Double_t normalized_distance = GetNormalizedDistance (event, *(*iev), dim_normalization);
849 
850  // always working within the hyperelipsoid, except for when we don't
851  // note that rejection ratio goes to 1 as nvar goes to infinity
852  if (normalized_distance > 1 && fKernelEstimator != kBox) continue;
853 
854  if ( (*iev)->GetClass()==fSignalClass )
855  pdfSumS += ApplyKernelFunction (normalized_distance) * (*iev)->GetWeight();
856  else
857  pdfSumB += ApplyKernelFunction (normalized_distance) * (*iev)->GetWeight();
858  }
859  pdfSumS = KernelNormalization( pdfSumS < 0. ? 0. : pdfSumS );
860  pdfSumB = KernelNormalization( pdfSumB < 0. ? 0. : pdfSumB );
861 
862  delete[] dim_normalization;
863 
864  if (pdfSumS < 1e-20 && pdfSumB < 1e-20) return 0.5;
865  if (pdfSumB < 1e-20) return 1.0;
866  if (pdfSumS < 1e-20) return 0.0;
867 
868  Float_t r = pdfSumB*fScaleB/(pdfSumS*fScaleS);
869  return 1.0/(r + 1.0); // TODO: propagate errors from here
870 }
871 
872 ////////////////////////////////////////////////////////////////////////////////
873 /// normalization factors so we can work with radius 1 hyperspheres
874 
876  std::vector<const BinarySearchTreeNode*>& events, Volume& v,
877  std::vector<Float_t>* pdfSum )
878 {
879  Double_t *dim_normalization = new Double_t[GetNvar()];
880  for (UInt_t ivar=0; ivar<GetNvar(); ivar++)
881  dim_normalization [ivar] = 2 / ((*v.fUpper)[ivar] - (*v.fLower)[ivar]);
882 
883  // std::vector<Float_t> pdfSum;
884  pdfSum->clear();
885  Float_t pdfDiv = 0;
886  fNRegOut = 1; // for now, regression is just for 1 dimension
887 
888  for (Int_t ivar = 0; ivar < fNRegOut ; ivar++)
889  pdfSum->push_back( 0 );
890 
891  // Iteration over sample points
892  for (std::vector<const BinarySearchTreeNode*>::iterator iev = events.begin(); iev != events.end(); iev++) {
893 
894  // First switch to the one dimensional distance
895  Double_t normalized_distance = GetNormalizedDistance (event, *(*iev), dim_normalization);
896 
897  // always working within the hyperelipsoid, except for when we don't
898  // note that rejection ratio goes to 1 as nvar goes to infinity
899  if (normalized_distance > 1 && fKernelEstimator != kBox) continue;
900 
901  for (Int_t ivar = 0; ivar < fNRegOut ; ivar++) {
902  pdfSum->at(ivar) += ApplyKernelFunction (normalized_distance) * (*iev)->GetWeight() * (*iev)->GetTargets()[ivar];
903  pdfDiv += ApplyKernelFunction (normalized_distance) * (*iev)->GetWeight();
904  }
905  }
906 
907  delete[] dim_normalization;
908 
909  if (pdfDiv == 0)
910  return;
911 
912  for (Int_t ivar = 0; ivar < fNRegOut ; ivar++)
913  pdfSum->at(ivar) /= pdfDiv;
914 
915  return;
916 }
917 
918 ////////////////////////////////////////////////////////////////////////////////
919 /// from the normalized euclidean distance calculate the distance
920 /// for a certain kernel
921 
923 {
924  switch (fKernelEstimator) {
925  case kBox:
926  case kSphere:
927  return 1;
928  break;
929  case kTeepee:
930  return (1 - normalized_distance);
931  break;
932  case kGauss:
933  return TMath::Gaus( normalized_distance, 0, fGaussSigmaNorm, kFALSE);
934  break;
935  case kSinc3:
936  case kSinc5:
937  case kSinc7:
938  case kSinc9:
939  case kSinc11: {
940  Double_t side_crossings = 2 + ((int) fKernelEstimator) - ((int) kSinc3);
941  return NormSinc (side_crossings * normalized_distance);
942  }
943  break;
944  case kLanczos2:
945  return LanczosFilter (2, normalized_distance);
946  break;
947  case kLanczos3:
948  return LanczosFilter (3, normalized_distance);
949  break;
950  case kLanczos5:
951  return LanczosFilter (5, normalized_distance);
952  break;
953  case kLanczos8:
954  return LanczosFilter (8, normalized_distance);
955  break;
956  case kTrim: {
957  Double_t x = normalized_distance / fMax_distance;
958  x = 1 - x*x*x;
959  return x*x*x;
960  }
961  break;
962  default:
963  Log() << kFATAL << "Kernel estimation function unsupported. Enumerator is " << fKernelEstimator << Endl;
964  break;
965  }
966 
967  return 0;
968 }
969 
970 ////////////////////////////////////////////////////////////////////////////////
971 /// Calculating the normalization factor only once (might need a reset at some point.
972 /// Can the method be restarted with different params?)
973 
975 {
976  // Caching jammed to disable function.
977  // It's not really useful afterall, badly implemented and untested :-)
978  TTHREAD_TLS(Double_t) ret = 1.0;
979 
980  if (ret != 0.0) return ret*pdf;
981 
982  // We first normalize by the volume of the hypersphere.
983  switch (fKernelEstimator) {
984  case kBox:
985  case kSphere:
986  ret = 1.;
987  break;
988  case kTeepee:
989  ret = (GetNvar() * (GetNvar() + 1) * TMath::Gamma (((Double_t) GetNvar()) / 2.)) /
990  ( TMath::Power (2., (Double_t) GetNvar() + 1) * TMath::Power (TMath::Pi(), ((Double_t) GetNvar()) / 2.));
991  break;
992  case kGauss:
993  // We use full range integral here. Reasonable because of the fast function decay.
994  ret = 1. / TMath::Power ( 2 * TMath::Pi() * fGaussSigmaNorm * fGaussSigmaNorm, ((Double_t) GetNvar()) / 2.);
995  break;
996  case kSinc3:
997  case kSinc5:
998  case kSinc7:
999  case kSinc9:
1000  case kSinc11:
1001  case kLanczos2:
1002  case kLanczos3:
1003  case kLanczos5:
1004  case kLanczos8:
1005  // We use the full range integral here. Reasonable because the central lobe domintes it.
1006  ret = 1 / TMath::Power ( 2., (Double_t) GetNvar() );
1007  break;
1008  default:
1009  Log() << kFATAL << "Kernel estimation function unsupported. Enumerator is " << fKernelEstimator << Endl;
1010  }
1011 
1012  // Normalizing by the full volume
1013  ret *= ( TMath::Power (2., static_cast<Int_t>(GetNvar())) * TMath::Gamma (1 + (((Double_t) GetNvar()) / 2.)) ) /
1014  TMath::Power (TMath::Pi(), ((Double_t) GetNvar()) / 2.);
1015 
1016  return ret*pdf;
1017 }
1018 
1019 ////////////////////////////////////////////////////////////////////////////////
1020 /// We use Euclidian metric here. Might not be best or most efficient.
1021 
1023  const BinarySearchTreeNode &sample_event,
1024  Double_t *dim_normalization)
1025 {
1026  Double_t ret=0;
1027  for (UInt_t ivar=0; ivar<GetNvar(); ivar++) {
1028  Double_t dist = dim_normalization[ivar] * (sample_event.GetEventV()[ivar] - base_event.GetValue(ivar));
1029  ret += dist*dist;
1030  }
1031  // DD 26.11.2008: bugfix: division by (GetNvar()) was missing for correct normalisation
1032  ret /= GetNvar();
1033  return TMath::Sqrt (ret);
1034 }
1035 
1036 ////////////////////////////////////////////////////////////////////////////////
1037 /// NormSinc
1038 
1040 {
1041  if (x < 10e-10 && x > -10e-10) {
1042  return 1; // Poor man's l'Hopital
1043  }
1044 
1045  Double_t pix = TMath::Pi() * x;
1046  Double_t sinc = TMath::Sin(pix) / pix;
1047  Double_t ret;
1048 
1049  if (GetNvar() % 2)
1050  ret = TMath::Power (sinc, static_cast<Int_t>(GetNvar()));
1051  else
1052  ret = TMath::Abs (sinc) * TMath::Power (sinc, static_cast<Int_t>(GetNvar() - 1));
1053 
1054  return ret;
1055 }
1056 
1057 ////////////////////////////////////////////////////////////////////////////////
1058 /// Lanczos Filter
1059 
1061 {
1062  if (x < 10e-10 && x > -10e-10) {
1063  return 1; // Poor man's l'Hopital
1064  }
1065 
1066  Double_t pix = TMath::Pi() * x;
1067  Double_t pixtimesn = pix * ((Double_t) level);
1068  Double_t lanczos = (TMath::Sin(pix) / pix) * (TMath::Sin(pixtimesn) / pixtimesn);
1069  Double_t ret;
1070 
1071  if (GetNvar() % 2) ret = TMath::Power (lanczos, static_cast<Int_t>(GetNvar()));
1072  else ret = TMath::Abs (lanczos) * TMath::Power (lanczos, static_cast<Int_t>(GetNvar() - 1));
1073 
1074  return ret;
1075 }
1076 
1077 ////////////////////////////////////////////////////////////////////////////////
1078 /// statistical error estimate for RS estimator
1079 
1081  Float_t sumW2S, Float_t sumW2B ) const
1082 {
1084  Float_t d = countS + c*countB; d *= d;
1085 
1086  if (d < 1e-10) return 1; // Error is zero because of B = S = 0
1087 
1088  Float_t f = c*c/d/d;
1089  Float_t err = f*countB*countB*sumW2S + f*countS*countS*sumW2B;
1090 
1091  if (err < 1e-10) return 1; // Error is zero because of B or S = 0
1092 
1093  return sqrt(err);
1094 }
1095 
1096 ////////////////////////////////////////////////////////////////////////////////
1097 /// write weights to xml file
1098 
1099 void TMVA::MethodPDERS::AddWeightsXMLTo( void* parent ) const
1100 {
1101  void* wght = gTools().AddChild(parent, "Weights");
1102  if (fBinaryTree)
1103  fBinaryTree->AddXMLTo(wght);
1104  else
1105  Log() << kFATAL << "Signal and background binary search tree not available" << Endl;
1106  //Log() << kFATAL << "Please implement writing of weights as XML" << Endl;
1107 }
1108 
1109 ////////////////////////////////////////////////////////////////////////////////
1110 
1112 {
1113  if (NULL != fBinaryTree) delete fBinaryTree;
1114  void* treenode = gTools().GetChild(wghtnode);
1116  if(!fBinaryTree)
1117  Log() << kFATAL << "Could not create BinarySearchTree from XML" << Endl;
1118  if(!fBinaryTree)
1119  Log() << kFATAL << "Could not create BinarySearchTree from XML" << Endl;
1125  else fScaleS = 1;
1128  else fScaleB = 1;
1129  Log() << kINFO << "signal and background scales: " << fScaleS << " " << fScaleB << Endl;
1130  CalcAverages();
1131  SetVolumeElement();
1133 }
1134 
1135 ////////////////////////////////////////////////////////////////////////////////
1136 /// read weight info from file
1137 
1139 {
1140  if (NULL != fBinaryTree) delete fBinaryTree;
1141 
1142  fBinaryTree = new BinarySearchTree();
1143 
1144  istr >> *fBinaryTree;
1145 
1146  fBinaryTree->SetPeriode( GetNvar() );
1147 
1148  fBinaryTree->CalcStatistics();
1149 
1150  fBinaryTree->CountNodes();
1151 
1152  // these are the signal and background scales for the weights
1153  fScaleS = 1.0/fBinaryTree->GetSumOfWeights( Types::kSignal );
1154  fScaleB = 1.0/fBinaryTree->GetSumOfWeights( Types::kBackground );
1155 
1156  Log() << kINFO << "signal and background scales: " << fScaleS << " " << fScaleB << Endl;
1157 
1158  CalcAverages();
1159 
1160  SetVolumeElement();
1161 
1163 }
1164 
1165 ////////////////////////////////////////////////////////////////////////////////
1166 /// write training sample (TTree) to file
1167 
1169 {
1170 }
1171 
1172 ////////////////////////////////////////////////////////////////////////////////
1173 /// read training sample from file
1174 
1176 {
1177 }
1178 
1179 ////////////////////////////////////////////////////////////////////////////////
1180 /// static pointer to this object
1181 
1183 {
1184  return GetMethodPDERSThreadLocal();
1185 }
1186 ////////////////////////////////////////////////////////////////////////////////
1187 /// update static this pointer
1188 
1190 {
1191  GetMethodPDERSThreadLocal() = this;
1192 }
1193 
1194 ////////////////////////////////////////////////////////////////////////////////
1195 /// write specific classifier response
1196 
1197 void TMVA::MethodPDERS::MakeClassSpecific( std::ostream& fout, const TString& className ) const
1198 {
1199  fout << " // not implemented for class: \"" << className << "\"" << std::endl;
1200  fout << "};" << std::endl;
1201 }
1202 
1203 ////////////////////////////////////////////////////////////////////////////////
1204 /// get help message text
1205 ///
1206 /// typical length of text line:
1207 /// "|--------------------------------------------------------------|"
1208 
1210 {
1211  Log() << Endl;
1212  Log() << gTools().Color("bold") << "--- Short description:" << gTools().Color("reset") << Endl;
1213  Log() << Endl;
1214  Log() << "PDERS is a generalization of the projective likelihood classifier " << Endl;
1215  Log() << "to N dimensions, where N is the number of input variables used." << Endl;
1216  Log() << "In its adaptive form it is mostly equivalent to k-Nearest-Neighbor" << Endl;
1217  Log() << "(k-NN) methods. If the multidimensional PDF for signal and background" << Endl;
1218  Log() << "were known, this classifier would exploit the full information" << Endl;
1219  Log() << "contained in the input variables, and would hence be optimal. In " << Endl;
1220  Log() << "practice however, huge training samples are necessary to sufficiently " << Endl;
1221  Log() << "populate the multidimensional phase space. " << Endl;
1222  Log() << Endl;
1223  Log() << "The simplest implementation of PDERS counts the number of signal" << Endl;
1224  Log() << "and background events in the vicinity of a test event, and returns" << Endl;
1225  Log() << "a weight according to the majority species of the neighboring events." << Endl;
1226  Log() << "A more involved version of PDERS (selected by the option \"KernelEstimator\")" << Endl;
1227  Log() << "uses Kernel estimation methods to approximate the shape of the PDF." << Endl;
1228  Log() << Endl;
1229  Log() << gTools().Color("bold") << "--- Performance optimisation:" << gTools().Color("reset") << Endl;
1230  Log() << Endl;
1231  Log() << "PDERS can be very powerful in case of strongly non-linear problems, " << Endl;
1232  Log() << "e.g., distinct islands of signal and background regions. Because of " << Endl;
1233  Log() << "the exponential growth of the phase space, it is important to restrict" << Endl;
1234  Log() << "the number of input variables (dimension) to the strictly necessary." << Endl;
1235  Log() << Endl;
1236  Log() << "Note that PDERS is a slowly responding classifier. Moreover, the necessity" << Endl;
1237  Log() << "to store the entire binary tree in memory, to avoid accessing virtual " << Endl;
1238  Log() << "memory, limits the number of training events that can effectively be " << Endl;
1239  Log() << "used to model the multidimensional PDF." << Endl;
1240  Log() << Endl;
1241  Log() << gTools().Color("bold") << "--- Performance tuning via configuration options:" << gTools().Color("reset") << Endl;
1242  Log() << Endl;
1243  Log() << "If the PDERS response is found too slow when using the adaptive volume " << Endl;
1244  Log() << "size (option \"VolumeRangeMode=Adaptive\"), it might be found beneficial" << Endl;
1245  Log() << "to reduce the number of events required in the volume, and/or to enlarge" << Endl;
1246  Log() << "the allowed range (\"NeventsMin/Max\"). PDERS is relatively insensitive" << Endl;
1247  Log() << "to the width (\"GaussSigma\") of the Gaussian kernel (if used)." << Endl;
1248 }
Int_t SearchVolumeWithMaxLimit(TMVA::Volume *, std::vector< const TMVA::BinarySearchTreeNode *> *events=0, Int_t=-1)
std::vector< Double_t > * fLower
Definition: Volume.h:75
virtual void * AddXMLTo(void *parent) const
add attributes to XML
Definition: BinaryTree.cxx:135
void WriteWeightsToStream(TFile &rf) const
write training sample (TTree) to file
void Delete(void)
Definition: Volume.cxx:127
void UpdateThis()
update static this pointer
double dist(Rotation3D const &r1, Rotation3D const &r2)
Definition: 3DDistances.cxx:48
virtual ~MethodPDERS(void)
destructor
Definition: Buttons.h:29
MsgLogger & Endl(MsgLogger &ml)
Definition: MsgLogger.h:162
Double_t GetNormalizedDistance(const TMVA::Event &base_event, const BinarySearchTreeNode &sample_event, Double_t *dim_normalization)
We use Euclidian metric here. Might not be best or most efficient.
const Bool_t MethodPDERS_UseFindRoot
Definition: MethodPDERS.cxx:96
std::vector< Float_t > * fShift
Definition: MethodPDERS.h:186
Float_t fMaxVIterations
Definition: MethodPDERS.h:200
float Float_t
Definition: RtypesCore.h:53
void GetHelpMessage() const
get help message text
return c
BinarySearchTree * fBinaryTree
Definition: MethodPDERS.h:183
UInt_t GetNvar() const
Definition: MethodBase.h:340
Double_t NormSinc(Double_t x)
NormSinc.
MsgLogger & Log() const
Definition: Configurable.h:128
OptionBase * DeclareOptionRef(T &ref, const TString &name, const TString &desc="")
A ROOT file is a suite of consecutive data records (TKey instances) with a well defined format...
Definition: TFile.h:50
EAnalysisType
Definition: Types.h:129
void CreateBinarySearchTree(Types::ETreeType type)
create binary search trees for signal and background
Double_t Fill(const std::vector< TMVA::Event *> &events, const std::vector< Int_t > &theVars, Int_t theType=-1)
create the search tree from the event collection using ONLY the variables specified in "theVars" ...
std::vector< Double_t > * fUpper
Definition: Volume.h:76
Basic string class.
Definition: TString.h:137
void AddWeightsXMLTo(void *parent) const
write weights to xml file
TransformationHandler & GetTransformationHandler(Bool_t takeReroutedIfAvailable=true)
Definition: MethodBase.h:390
void ScaleInterval(Double_t f)
Definition: Volume.cxx:141
int Int_t
Definition: RtypesCore.h:41
bool Bool_t
Definition: RtypesCore.h:59
const Bool_t kFALSE
Definition: Rtypes.h:92
void RKernelEstimate(const Event &, std::vector< const BinarySearchTreeNode *> &, Volume &, std::vector< Float_t > *pdfSum)
normalization factors so we can work with radius 1 hyperspheres
Float_t fInitialScale
Definition: MethodPDERS.h:201
Double_t GetMvaValue(Double_t *err=0, Double_t *errUpper=0)
init the size of a volume element using a defined fraction of the volume containing the entire events...
Double_t Gamma(Double_t z)
Computation of gamma(z) for all z.
Definition: TMath.cxx:352
Double_t GetSumOfWeights(void) const
return the sum of event (node) weights
void * AddChild(void *parent, const char *childname, const char *content=0, bool isRootNode=false)
add child node
Definition: Tools.cxx:1134
Short_t Abs(Short_t d)
Definition: TMathBase.h:110
LongDouble_t Power(LongDouble_t x, LongDouble_t y)
Definition: TMath.h:501
Double_t fMax_distance
Definition: MethodPDERS.h:208
void GetSample(const Event &e, std::vector< const BinarySearchTreeNode *> &events, Volume *volume)
void NormalizeTree()
Normalisation of tree.
UInt_t fSignalClass
Definition: MethodBase.h:683
void Init(void)
default initialisation routine called by all constructors
static Double_t IGetVolumeContentForRoot(Double_t)
Interface to RootFinder.
double sqrt(double)
Tools & gTools()
Definition: Tools.cxx:79
Double_t x[n]
Definition: legend1.C:17
void ReadWeightsFromStream(std::istream &istr)
read weight info from file
const Event * GetEvent() const
Definition: MethodBase.h:745
DataSet * Data() const
Definition: MethodBase.h:405
void * GetChild(void *parent, const char *childname=0)
get child node
Definition: Tools.cxx:1158
TString fVolumeRange
Definition: MethodPDERS.h:154
Double_t GetXmin(Int_t ivar) const
Definition: MethodBase.h:352
Float_t GetError(Float_t countS, Float_t countB, Float_t sumW2S, Float_t sumW2B) const
statistical error estimate for RS estimator
Bool_t DoRegression() const
Definition: MethodBase.h:434
Long64_t GetNTrainingEvents() const
Definition: DataSet.h:93
void CalcStatistics(TMVA::Node *n=0)
calculate basic statistics (mean, rms for each variable)
const Event * GetTrainingEvent(Long64_t ievt) const
Definition: MethodBase.h:765
UInt_t GetNTargets() const
accessor to the number of targets
Definition: Event.cxx:316
Double_t Root(Double_t refValue)
Root finding using Brents algorithm; taken from CERNLIB function RZERO.
Definition: RootFinder.cxx:68
void CalcAverages()
compute also average RMS values required for adaptive Gaussian
Double_t CRScalc(const Event &)
Double_t GetXmax(Int_t ivar) const
Definition: MethodBase.h:353
TString fKernelString
Definition: MethodPDERS.h:155
static BinarySearchTree * CreateFromXML(void *node, UInt_t tmva_Version_Code=TMVA_VERSION_CODE)
re-create a new tree (decision tree or search tree) from XML
void DeclareOptions()
define the options (their key words) that can be set in the option string know options: VolumeRangeMo...
Float_t GetTarget(UInt_t itgt) const
Definition: Event.h:104
Double_t fGaussSigma
Definition: MethodPDERS.h:192
TRandom2 r(17)
void SetVolumeElement(void)
defines volume dimensions
SVector< double, 2 > v
Definition: Dict.h:5
Bool_t fInitializedVolumeEle
Definition: MethodPDERS.h:203
BinarySearchTree * GetBinaryTree(void) const
Definition: MethodPDERS.h:122
unsigned int UInt_t
Definition: RtypesCore.h:42
const Event * InverseTransform(const Event *, Bool_t suppressIfNoTargets=true) const
void SetTarget(UInt_t itgt, Float_t value)
set the target value (dimension itgt) to value
Definition: Event.cxx:356
void SetNormalize(Bool_t norm)
Double_t Gaus(Double_t x, Double_t mean=0, Double_t sigma=1, Bool_t norm=kFALSE)
Calculate a gaussian function with mean and sigma.
Definition: TMath.cxx:452
Float_t GetValue(UInt_t ivar) const
return value of i&#39;th variable
Definition: Event.cxx:233
Double_t Pi()
Definition: TMath.h:44
void ReadWeightsFromXML(void *wghtnode)
static MethodPDERS * ThisPDERS(void)
static pointer to this object
Bool_t IgnoreEventsWithNegWeightsInTraining() const
Definition: MethodBase.h:680
void Train(void)
this is a dummy training: the preparation work to do is the construction of the binary tree as a poin...
const std::vector< TMVA::Event * > & GetEventCollection(Types::ETreeType type)
returns the event collection (i.e.
Volume * fHelpVolume
Definition: MethodPDERS.h:118
#define ClassImp(name)
Definition: Rtypes.h:279
double f(double x)
std::vector< Float_t > * fDelta
Definition: MethodPDERS.h:185
double Double_t
Definition: RtypesCore.h:55
Float_t RMS(Types::ESBType sb, UInt_t var)
enum TMVA::MethodPDERS::EKernelEstimator fKernelEstimator
Bool_t IsNormalised() const
Definition: MethodBase.h:490
int type
Definition: TGX11.cxx:120
you should not use this method at all Int_t Int_t Double_t Double_t Double_t e
Definition: TRolke.cxx:630
void AddPreDefVal(const T &)
Definition: Configurable.h:174
virtual Bool_t HasAnalysisType(Types::EAnalysisType type, UInt_t numberClasses, UInt_t numberTargets)
PDERS can handle classification with 2 classes and regression with one or more regression-targets.
void ExitFromTraining()
Definition: MethodBase.h:458
Double_t CKernelEstimate(const Event &, std::vector< const BinarySearchTreeNode *> &, Volume &)
normalization factors so we can work with radius 1 hyperspheres
std::vector< Float_t > fAverageRMS
Definition: MethodPDERS.h:187
UInt_t CountNodes(Node *n=NULL)
return the number of nodes in the tree. (make a new count –> takes time)
Definition: BinaryTree.cxx:104
const TString & Color(const TString &)
human readable color strings
Definition: Tools.cxx:837
const std::vector< Float_t > & GetEventV() const
const std::vector< Float_t > & GetRegressionValues()
#define REGISTER_METHOD(CLASS)
for example
Abstract ClassifierFactory template that handles arbitrary types.
TString GetMethodTypeName() const
Definition: MethodBase.h:328
enum TMVA::MethodPDERS::EVolumeRangeMode fVRangeMode
Double_t Sin(Double_t)
Definition: TMath.h:421
void ProcessOptions()
process the options specified by the user
#define NULL
Definition: Rtypes.h:82
std::vector< Float_t > * fRegressionReturnVal
Definition: MethodBase.h:591
Double_t GetVolumeContentForRoot(Double_t)
count number of events in rescaled volume
MethodPDERS(const TString &jobName, const TString &methodTitle, DataSetInfo &theData, const TString &theOption)
standard constructor for the PDERS method
Double_t Sqrt(Double_t x)
Definition: TMath.h:464
Double_t SearchVolume(Volume *, std::vector< const TMVA::BinarySearchTreeNode *> *events=0)
search the whole tree and add up all weigths of events that lie within the given voluem ...
static MethodPDERS *& GetMethodPDERSThreadLocal()
Definition: MethodPDERS.h:222
Double_t ApplyKernelFunction(Double_t normalized_distance)
from the normalized euclidean distance calculate the distance for a certain kernel ...
const Bool_t kTRUE
Definition: Rtypes.h:91
Double_t fGaussSigmaNorm
Definition: MethodPDERS.h:193
Double_t LanczosFilter(Int_t level, Double_t x)
Lanczos Filter.
void RRScalc(const Event &, std::vector< Float_t > *count)
Double_t KernelNormalization(Double_t pdf)
Calculating the normalization factor only once (might need a reset at some point. ...
void MakeClassSpecific(std::ostream &, const TString &) const
write specific classifier response
void NoErrorCalc(Double_t *const err, Double_t *const errUpper)
Definition: MethodBase.cxx:819
void SetSignalReferenceCut(Double_t cut)
Definition: MethodBase.h:360