Logo ROOT   6.14/05
Reference Guide
MethodPDEFoam.cxx
Go to the documentation of this file.
1 // @(#)root/tmva $Id$
2 // Author: Tancredi Carli, Dominik Dannheim, Alexander Voigt
3 
4 /**********************************************************************************
5  * Project: TMVA - a Root-integrated toolkit for multivariate Data analysis *
6  * Package: TMVA *
7  * Class : MethodPDEFoam *
8  * Web : http://tmva.sourceforge.net *
9  * *
10  * Description: *
11  * Implementation (see header for description) *
12  * *
13  * Authors (alphabetical): *
14  * Tancredi Carli - CERN, Switzerland *
15  * Dominik Dannheim - CERN, Switzerland *
16  * Alexander Voigt - TU Dresden, Germany *
17  * Peter Speckmayer - CERN, Switzerland *
18  * *
19  * Original author of the TFoam implementation: *
20  * S. Jadach - Institute of Nuclear Physics, Cracow, Poland *
21  * *
22  * Copyright (c) 2008, 2010: *
23  * CERN, Switzerland *
24  * MPI-K Heidelberg, Germany *
25  * *
26  * Redistribution and use in source and binary forms, with or without *
27  * modification, are permitted according to the terms listed in LICENSE *
28  * (http://tmva.sourceforge.net/LICENSE) *
29  **********************************************************************************/
30 
31 /*! \class TMVA::MethodPDEFoam
32 \ingroup TMVA
33 
34 The PDEFoam method is an extension of the PDERS method, which
35 divides the multi-dimensional phase space in a finite number of
36 hyper-rectangles (cells) of constant event density. This "foam" of
37 cells is filled with averaged probability-density information
38 sampled from a training event sample.
39 
40 For a given number of cells, the binning algorithm adjusts the size
41 and position of the cells inside the multidimensional phase space
42 based on a binary-split algorithm, minimizing the variance of the
43 event density in the cell.
44 The binned event density information of the final foam is stored in
45 binary trees, allowing for a fast and memory-efficient
46 classification of events.
47 
48 The implementation of PDEFoam is based on the Monte-Carlo
49 integration package TFoam included in the analysis package ROOT.
50 */
51 
52 #include "TMVA/MethodPDEFoam.h"
53 
54 #include "TMVA/ClassifierFactory.h"
55 #include "TMVA/Config.h"
56 #include "TMVA/Configurable.h"
57 #include "TMVA/CrossEntropy.h"
58 #include "TMVA/DataSet.h"
59 #include "TMVA/DataSetInfo.h"
60 #include "TMVA/Event.h"
61 #include "TMVA/GiniIndex.h"
63 #include "TMVA/IMethod.h"
65 #include "TMVA/MethodBase.h"
66 #include "TMVA/MsgLogger.h"
67 #include "TMVA/Ranking.h"
68 #include "TMVA/SdivSqrtSplusB.h"
69 #include "TMVA/SeparationBase.h"
70 #include "TMVA/Tools.h"
71 #include "TMVA/Types.h"
72 #include "TMVA/VariableInfo.h"
73 
74 #include "TMath.h"
75 #include "TH1F.h"
76 #include "TFile.h"
77 
78 REGISTER_METHOD(PDEFoam)
79 
81 
82 ////////////////////////////////////////////////////////////////////////////////
83 /// init PDEFoam objects
84 
86  const TString& methodTitle,
87  DataSetInfo& dsi,
88  const TString& theOption ) :
89  MethodBase( jobName, Types::kPDEFoam, methodTitle, dsi, theOption)
90  , fSigBgSeparated(kFALSE)
91  , fFrac(0.001)
92  , fDiscrErrCut(-1.0)
93  , fVolFrac(1.0/15.0)
94  , fnCells(999)
95  , fnActiveCells(500)
96  , fnSampl(2000)
97  , fnBin(5)
98  , fEvPerBin(10000)
99  , fCompress(kTRUE)
100  , fMultiTargetRegression(kFALSE)
101  , fNmin(100)
102  , fCutNmin(kTRUE)
103  , fMaxDepth(0)
104  , fKernelStr("None")
105  , fKernel(kNone)
106  , fKernelEstimator(NULL)
107  , fTargetSelectionStr("Mean")
108  , fTargetSelection(kMean)
109  , fFillFoamWithOrigWeights(kFALSE)
110  , fUseYesNoCell(kFALSE)
111  , fDTLogic("None")
112  , fDTSeparation(kFoam)
113  , fPeekMax(kTRUE)
114  , fXmin()
115  , fXmax()
116  , fFoam()
117 {
118 }
119 
120 ////////////////////////////////////////////////////////////////////////////////
121 /// constructor from weight file
122 
124  const TString& theWeightFile) :
125  MethodBase( Types::kPDEFoam, dsi, theWeightFile)
127  , fFrac(0.001)
128  , fDiscrErrCut(-1.0)
129  , fVolFrac(1.0/15.0)
130  , fnCells(999)
131  , fnActiveCells(500)
132  , fnSampl(2000)
133  , fnBin(5)
134  , fEvPerBin(10000)
135  , fCompress(kTRUE)
137  , fNmin(100)
138  , fCutNmin(kTRUE)
139  , fMaxDepth(0)
140  , fKernelStr("None")
141  , fKernel(kNone)
142  , fKernelEstimator(NULL)
143  , fTargetSelectionStr("Mean")
144  , fTargetSelection(kMean)
147  , fDTLogic("None")
148  , fDTSeparation(kFoam)
149  , fPeekMax(kTRUE)
150  , fXmin()
151  , fXmax()
152  , fFoam()
153 {
154 }
155 
156 ////////////////////////////////////////////////////////////////////////////////
157 /// PDEFoam can handle classification with multiple classes and regression
158 /// with one or more regression-targets
159 
161 {
162  if (type == Types::kClassification && numberClasses == 2) return kTRUE;
163  if (type == Types::kMulticlass ) return kTRUE;
164  if (type == Types::kRegression) return kTRUE;
165  return kFALSE;
166 }
167 
168 ////////////////////////////////////////////////////////////////////////////////
169 /// default initialization called by all constructors
170 
172 {
173  // init PDEFoam options
174  fSigBgSeparated = kFALSE; // default: unified foam
175  fFrac = 0.001; // fraction of outlier events
176  fDiscrErrCut = -1.; // cut on discriminator error
177  fVolFrac = 1./15.; // range searching box size
178  fnActiveCells = 500; // number of active cells to create
179  fnCells = fnActiveCells*2-1; // total number of cells
180  fnSampl = 2000; // number of sampling points in cell
181  fnBin = 5; // number of bins in edge histogram
182  fEvPerBin = 10000; // number of events per bin
183  fNmin = 100; // minimum number of events in cell
184  fMaxDepth = 0; // cell tree depth (default: unlimited)
185  fFillFoamWithOrigWeights = kFALSE; // fill orig. weights into foam
186  fUseYesNoCell = kFALSE; // return -1 or 1 for bg or signal events
187  fDTLogic = "None"; // decision tree algorithmus
188  fDTSeparation = kFoam; // separation type
189 
190  fKernel = kNone; // default: use no kernel
191  fKernelEstimator= NULL; // kernel estimator used during evaluation
192  fTargetSelection= kMean; // default: use mean for target selection (only multi target regression!)
193 
194  fCompress = kTRUE; // compress ROOT output file
195  fMultiTargetRegression = kFALSE; // multi-target regression
196 
197  DeleteFoams();
198 
199  if (fUseYesNoCell)
200  SetSignalReferenceCut( 0.0 ); // MVA output in [-1, 1]
201  else
202  SetSignalReferenceCut( 0.5 ); // MVA output in [0, 1]
203 }
204 
205 ////////////////////////////////////////////////////////////////////////////////
206 /// Declare MethodPDEFoam options
207 
209 {
210  DeclareOptionRef( fSigBgSeparated = kFALSE, "SigBgSeparate", "Separate foams for signal and background" );
211  DeclareOptionRef( fFrac = 0.001, "TailCut", "Fraction of outlier events that are excluded from the foam in each dimension" );
212  DeclareOptionRef( fVolFrac = 1./15., "VolFrac", "Size of sampling box, used for density calculation during foam build-up (maximum value: 1.0 is equivalent to volume of entire foam)");
213  DeclareOptionRef( fnActiveCells = 500, "nActiveCells", "Maximum number of active cells to be created by the foam");
214  DeclareOptionRef( fnSampl = 2000, "nSampl", "Number of generated MC events per cell");
215  DeclareOptionRef( fnBin = 5, "nBin", "Number of bins in edge histograms");
216  DeclareOptionRef( fCompress = kTRUE, "Compress", "Compress foam output file");
217  DeclareOptionRef( fMultiTargetRegression = kFALSE, "MultiTargetRegression", "Do regression with multiple targets");
218  DeclareOptionRef( fNmin = 100, "Nmin", "Number of events in cell required to split cell");
219  DeclareOptionRef( fMaxDepth = 0, "MaxDepth", "Maximum depth of cell tree (0=unlimited)");
220  DeclareOptionRef( fFillFoamWithOrigWeights = kFALSE, "FillFoamWithOrigWeights", "Fill foam with original or boost weights");
221  DeclareOptionRef( fUseYesNoCell = kFALSE, "UseYesNoCell", "Return -1 or 1 for bkg or signal like events");
222  DeclareOptionRef( fDTLogic = "None", "DTLogic", "Use decision tree algorithm to split cells");
223  AddPreDefVal(TString("None"));
224  AddPreDefVal(TString("GiniIndex"));
225  AddPreDefVal(TString("MisClassificationError"));
226  AddPreDefVal(TString("CrossEntropy"));
227  AddPreDefVal(TString("GiniIndexWithLaplace"));
228  AddPreDefVal(TString("SdivSqrtSplusB"));
229 
230  DeclareOptionRef( fKernelStr = "None", "Kernel", "Kernel type used");
231  AddPreDefVal(TString("None"));
232  AddPreDefVal(TString("Gauss"));
233  AddPreDefVal(TString("LinNeighbors"));
234  DeclareOptionRef( fTargetSelectionStr = "Mean", "TargetSelection", "Target selection method");
235  AddPreDefVal(TString("Mean"));
236  AddPreDefVal(TString("Mpv"));
237 }
238 
239 
240 ////////////////////////////////////////////////////////////////////////////////
241 /// options that are used ONLY for the READER to ensure backward compatibility
242 
245  DeclareOptionRef(fCutNmin = kTRUE, "CutNmin", "Requirement for minimal number of events in cell");
246  DeclareOptionRef(fPeekMax = kTRUE, "PeekMax", "Peek cell with max. loss for the next split");
247 }
248 
249 ////////////////////////////////////////////////////////////////////////////////
250 /// process user options
251 
253 {
254  if (!(fFrac>=0. && fFrac<=1.)) {
255  Log() << kWARNING << "TailCut not in [0.,1] ==> using 0.001 instead" << Endl;
256  fFrac = 0.001;
257  }
258 
259  if (fnActiveCells < 1) {
260  Log() << kWARNING << "invalid number of active cells specified: "
261  << fnActiveCells << "; setting nActiveCells=2" << Endl;
262  fnActiveCells = 2;
263  }
264  fnCells = fnActiveCells*2-1;
265 
266  // DT logic is only applicable if a single foam is trained
267  if (fSigBgSeparated && fDTLogic != "None") {
268  Log() << kFATAL << "Decision tree logic works only for a single foam (SigBgSeparate=F)" << Endl;
269  }
270 
271  // set separation to use
272  if (fDTLogic == "None")
273  fDTSeparation = kFoam;
274  else if (fDTLogic == "GiniIndex")
275  fDTSeparation = kGiniIndex;
276  else if (fDTLogic == "MisClassificationError")
277  fDTSeparation = kMisClassificationError;
278  else if (fDTLogic == "CrossEntropy")
280  else if (fDTLogic == "GiniIndexWithLaplace")
281  fDTSeparation = kGiniIndexWithLaplace;
282  else if (fDTLogic == "SdivSqrtSplusB")
283  fDTSeparation = kSdivSqrtSplusB;
284  else {
285  Log() << kWARNING << "Unknown separation type: " << fDTLogic
286  << ", setting to None" << Endl;
287  fDTLogic = "None";
288  fDTSeparation = kFoam;
289  }
290 
291  if (fKernelStr == "None" ) fKernel = kNone;
292  else if (fKernelStr == "Gauss" ) fKernel = kGaus;
293  else if (fKernelStr == "LinNeighbors") fKernel = kLinN;
294 
295  if (fTargetSelectionStr == "Mean" ) fTargetSelection = kMean;
296  else fTargetSelection = kMpv;
297  // sanity check: number of targets > 1 and MultiTargetRegression=F
298  // makes no sense --> set MultiTargetRegression=T
299  if (DoRegression() && Data()->GetNTargets() > 1 && !fMultiTargetRegression) {
300  Log() << kWARNING << "Warning: number of targets > 1"
301  << " and MultiTargetRegression=F was set, this makes no sense!"
302  << " --> I'm setting MultiTargetRegression=T" << Endl;
304  }
305 }
306 
307 ////////////////////////////////////////////////////////////////////////////////
308 /// destructor
309 
311 {
312  DeleteFoams();
313 
314  if (fKernelEstimator != NULL)
315  delete fKernelEstimator;
316 }
317 
318 ////////////////////////////////////////////////////////////////////////////////
319 /// Determine foam range [fXmin, fXmax] for all dimensions, such
320 /// that a fraction of 'fFrac' events lie outside the foam.
321 
323 {
324  fXmin.clear();
325  fXmax.clear();
326  UInt_t kDim = GetNvar(); // == Data()->GetNVariables();
327  UInt_t tDim = Data()->GetNTargets();
328  UInt_t vDim = Data()->GetNVariables();
330  kDim += tDim;
331 
332  Float_t *xmin = new Float_t[kDim];
333  Float_t *xmax = new Float_t[kDim];
334 
335  // set default values
336  for (UInt_t dim=0; dim<kDim; dim++) {
337  xmin[dim] = FLT_MAX;
338  xmax[dim] = FLT_MIN;
339  }
340 
341  Log() << kDEBUG << "Number of training events: " << Data()->GetNTrainingEvents() << Endl;
342  Int_t nevoutside = (Int_t)((Data()->GetNTrainingEvents())*(fFrac)); // number of events that are outside the range
343  Int_t rangehistbins = 10000; // number of bins in histos
344 
345  // loop over all testing signal and BG events and clac minimal and
346  // maximal value of every variable
347  for (Long64_t i=0; i<(GetNEvents()); i++) { // events loop
348  const Event* ev = GetEvent(i);
349  for (UInt_t dim=0; dim<kDim; dim++) { // variables loop
350  Float_t val;
352  if (dim < vDim)
353  val = ev->GetValue(dim);
354  else
355  val = ev->GetTarget(dim-vDim);
356  }
357  else
358  val = ev->GetValue(dim);
359 
360  if (val<xmin[dim])
361  xmin[dim] = val;
362  if (val>xmax[dim])
363  xmax[dim] = val;
364  }
365  }
366 
367  // Create and fill histograms for each dimension (with same events
368  // as before), to determine range based on number of events outside
369  // the range
370  TH1F **range_h = new TH1F*[kDim];
371  for (UInt_t dim=0; dim<kDim; dim++) {
372  range_h[dim] = new TH1F(Form("range%i", dim), "range", rangehistbins, xmin[dim], xmax[dim]);
373  }
374 
375  // fill all testing events into histos
376  for (Long64_t i=0; i<GetNEvents(); i++) {
377  const Event* ev = GetEvent(i);
378  for (UInt_t dim=0; dim<kDim; dim++) {
380  if (dim < vDim)
381  range_h[dim]->Fill(ev->GetValue(dim));
382  else
383  range_h[dim]->Fill(ev->GetTarget(dim-vDim));
384  }
385  else
386  range_h[dim]->Fill(ev->GetValue(dim));
387  }
388  }
389 
390  // calc Xmin, Xmax from Histos
391  for (UInt_t dim=0; dim<kDim; dim++) {
392  for (Int_t i=1; i<(rangehistbins+1); i++) { // loop over bins
393  if (range_h[dim]->Integral(0, i) > nevoutside) { // calc left limit (integral over bins 0..i = nevoutside)
394  xmin[dim]=range_h[dim]->GetBinLowEdge(i);
395  break;
396  }
397  }
398  for (Int_t i=rangehistbins; i>0; i--) { // calc right limit (integral over bins i..max = nevoutside)
399  if (range_h[dim]->Integral(i, (rangehistbins+1)) > nevoutside) {
400  xmax[dim]=range_h[dim]->GetBinLowEdge(i+1);
401  break;
402  }
403  }
404  }
405  // now xmin[] and xmax[] contain upper/lower limits for every dimension
406 
407  // copy xmin[], xmax[] values to the class variable
408  fXmin.clear();
409  fXmax.clear();
410  for (UInt_t dim=0; dim<kDim; dim++) {
411  fXmin.push_back(xmin[dim]);
412  fXmax.push_back(xmax[dim]);
413  }
414 
415 
416  delete[] xmin;
417  delete[] xmax;
418 
419  // delete histos
420  for (UInt_t dim=0; dim<kDim; dim++)
421  delete range_h[dim];
422  delete[] range_h;
423 
424  return;
425 }
426 
427 ////////////////////////////////////////////////////////////////////////////////
428 /// Train PDE-Foam depending on the set options
429 
431 {
432  Log() << kVERBOSE << "Calculate Xmin and Xmax for every dimension" << Endl;
433  CalcXminXmax();
434 
435  // delete foams
436  DeleteFoams();
437 
438  // start training
439  if (DoRegression()) {
442  else
444  }
445  else {
446  if (DoMulticlass())
448  else {
449  if (DataInfo().GetNormalization() != "EQUALNUMEVENTS" ) {
450  Log() << kHEADER << "NormMode=" << DataInfo().GetNormalization()
451  << " chosen. Note that only NormMode=EqualNumEvents"
452  << " ensures that Discriminant values correspond to"
453  << " signal probabilities." << Endl;
454  }
455 
456  Log() << kDEBUG << "N_sig for training events: " << Data()->GetNEvtSigTrain() << Endl;
457  Log() << kDEBUG << "N_bg for training events: " << Data()->GetNEvtBkgdTrain() << Endl;
458  Log() << kDEBUG << "User normalization: " << DataInfo().GetNormalization().Data() << Endl;
459 
460  if (fSigBgSeparated)
462  else
464  }
465  }
466 
467  // delete the binary search tree in order to save memory
468  for(UInt_t i=0; i<fFoam.size(); i++) {
469  if(fFoam.at(i))
470  fFoam.at(i)->DeleteBinarySearchTree();
471  }
473 }
474 
475 ////////////////////////////////////////////////////////////////////////////////
476 /// Creation of 2 separated foams: one for signal events, one for
477 /// background events. At the end the foam cells of fFoam[0] will
478 /// contain the average number of signal events and fFoam[1] will
479 /// contain the average number of background events.
480 
482 {
483  TString foamcaption[2];
484  foamcaption[0] = "SignalFoam";
485  foamcaption[1] = "BgFoam";
486 
487  for(int i=0; i<2; i++) {
488  // create 2 PDEFoams
489  fFoam.push_back( InitFoam(foamcaption[i], kSeparate) );
490 
491  Log() << kVERBOSE << "Filling binary search tree of " << foamcaption[i]
492  << " with events" << Endl;
493  // insert event to BinarySearchTree
494  for (Long64_t k=0; k<GetNEvents(); ++k) {
495  const Event* ev = GetEvent(k);
496  if ((i==0 && DataInfo().IsSignal(ev)) || (i==1 && !DataInfo().IsSignal(ev)))
497  if (!(IgnoreEventsWithNegWeightsInTraining() && ev->GetWeight()<=0))
498  fFoam.back()->FillBinarySearchTree(ev);
499  }
500 
501  Log() << kINFO << "Build up " << foamcaption[i] << Endl;
502  fFoam.back()->Create(); // build foam
503 
504  Log() << kVERBOSE << "Filling foam cells with events" << Endl;
505  // loop over all events -> fill foam cells
506  for (Long64_t k=0; k<GetNEvents(); ++k) {
507  const Event* ev = GetEvent(k);
509  if ((i==0 && DataInfo().IsSignal(ev)) || (i==1 && !DataInfo().IsSignal(ev)))
510  if (!(IgnoreEventsWithNegWeightsInTraining() && ev->GetWeight()<=0))
511  fFoam.back()->FillFoamCells(ev, weight);
512  }
513  }
514 }
515 
516 ////////////////////////////////////////////////////////////////////////////////
517 /// Create only one unified foam (fFoam[0]) whose cells contain the
518 /// average discriminator (N_sig)/(N_sig + N_bg)
519 
521 {
522  fFoam.push_back( InitFoam("DiscrFoam", kDiscr, fSignalClass) );
523 
524  Log() << kVERBOSE << "Filling binary search tree of discriminator foam with events" << Endl;
525  // insert event to BinarySearchTree
526  for (Long64_t k=0; k<GetNEvents(); ++k) {
527  const Event* ev = GetEvent(k);
528  if (!(IgnoreEventsWithNegWeightsInTraining() && ev->GetWeight()<=0))
529  fFoam.back()->FillBinarySearchTree(ev);
530  }
531 
532  Log() << kINFO << "Build up discriminator foam" << Endl;
533  fFoam.back()->Create(); // build foam
534 
535  Log() << kVERBOSE << "Filling foam cells with events" << Endl;
536  // loop over all training events -> fill foam cells with N_sig and N_Bg
537  for (Long64_t k=0; k<GetNEvents(); ++k) {
538  const Event* ev = GetEvent(k);
540  if (!(IgnoreEventsWithNegWeightsInTraining() && ev->GetWeight()<=0))
541  fFoam.back()->FillFoamCells(ev, weight);
542  }
543 
544  Log() << kVERBOSE << "Calculate cell discriminator"<< Endl;
545  // calc discriminator (and it's error) for each cell
546  fFoam.back()->Finalize();
547 }
548 
549 ////////////////////////////////////////////////////////////////////////////////
550 /// Create one unified foam (see TrainUnifiedClassification()) for
551 /// each class, where the cells of foam i (fFoam[i]) contain the
552 /// average fraction of events of class i, i.e.
553 ///
554 /// D = number events of class i / total number of events
555 
557 {
558  for (UInt_t iClass=0; iClass<DataInfo().GetNClasses(); ++iClass) {
559 
560  fFoam.push_back( InitFoam(Form("MultiClassFoam%u",iClass), kMultiClass, iClass) );
561 
562  Log() << kVERBOSE << "Filling binary search tree of multiclass foam "
563  << iClass << " with events" << Endl;
564  // insert event to BinarySearchTree
565  for (Long64_t k=0; k<GetNEvents(); ++k) {
566  const Event* ev = GetEvent(k);
567  if (!(IgnoreEventsWithNegWeightsInTraining() && ev->GetWeight()<=0))
568  fFoam.back()->FillBinarySearchTree(ev);
569  }
570 
571  Log() << kINFO << "Build up multiclass foam " << iClass << Endl;
572  fFoam.back()->Create(); // build foam
573 
574  Log() << kVERBOSE << "Filling foam cells with events" << Endl;
575  // loop over all training events and fill foam cells with signal
576  // and background events
577  for (Long64_t k=0; k<GetNEvents(); ++k) {
578  const Event* ev = GetEvent(k);
580  if (!(IgnoreEventsWithNegWeightsInTraining() && ev->GetWeight()<=0))
581  fFoam.back()->FillFoamCells(ev, weight);
582  }
583 
584  Log() << kVERBOSE << "Calculate cell discriminator"<< Endl;
585  // calc discriminator (and it's error) for each cell
586  fFoam.back()->Finalize();
587  }
588 }
589 
590 ////////////////////////////////////////////////////////////////////////////////
591 /// Training one (mono target regression) foam, whose cells contain
592 /// the average 0th target. The dimension of the foam = number of
593 /// non-targets (= number of variables).
594 
596 {
597  if (Data()->GetNTargets() != 1) {
598  Log() << kFATAL << "Can't do mono-target regression with "
599  << Data()->GetNTargets() << " targets!" << Endl;
600  }
601 
602  Log() << kDEBUG << "MethodPDEFoam: number of Targets: " << Data()->GetNTargets() << Endl;
603 
604  fFoam.push_back( InitFoam("MonoTargetRegressionFoam", kMonoTarget) );
605 
606  Log() << kVERBOSE << "Filling binary search tree with events" << Endl;
607  // insert event to BinarySearchTree
608  for (Long64_t k=0; k<GetNEvents(); ++k) {
609  const Event* ev = GetEvent(k);
610  if (!(IgnoreEventsWithNegWeightsInTraining() && ev->GetWeight()<=0))
611  fFoam.back()->FillBinarySearchTree(ev);
612  }
613 
614  Log() << kINFO << "Build mono target regression foam" << Endl;
615  fFoam.back()->Create(); // build foam
616 
617  Log() << kVERBOSE << "Filling foam cells with events" << Endl;
618  // loop over all events -> fill foam cells with target
619  for (Long64_t k=0; k<GetNEvents(); ++k) {
620  const Event* ev = GetEvent(k);
622  if (!(IgnoreEventsWithNegWeightsInTraining() && ev->GetWeight()<=0))
623  fFoam.back()->FillFoamCells(ev, weight);
624  }
625 
626  Log() << kVERBOSE << "Calculate average cell targets"<< Endl;
627  // calc weight (and it's error) for each cell
628  fFoam.back()->Finalize();
629 }
630 
631 ////////////////////////////////////////////////////////////////////////////////
632 /// Training one (multi target regression) foam, whose cells contain
633 /// the average event density. The dimension of the foam = number
634 /// of non-targets + number of targets.
635 
637 {
638  Log() << kDEBUG << "Number of variables: " << Data()->GetNVariables() << Endl;
639  Log() << kDEBUG << "Number of Targets: " << Data()->GetNTargets() << Endl;
640  Log() << kDEBUG << "Dimension of foam: " << Data()->GetNVariables()+Data()->GetNTargets() << Endl;
641  if (fKernel==kLinN)
642  Log() << kFATAL << "LinNeighbors kernel currently not supported"
643  << " for multi target regression" << Endl;
644 
645  fFoam.push_back( InitFoam("MultiTargetRegressionFoam", kMultiTarget) );
646 
647  Log() << kVERBOSE << "Filling binary search tree of multi target regression foam with events"
648  << Endl;
649  // insert event to BinarySearchTree
650  for (Long64_t k=0; k<GetNEvents(); ++k) {
651  Event *ev = new Event(*GetEvent(k));
652  // since in multi-target regression targets are handled like
653  // variables --> remove targets and add them to the event variabels
654  std::vector<Float_t> targets(ev->GetTargets());
655  const UInt_t nVariables = ev->GetValues().size();
656  for (UInt_t i = 0; i < targets.size(); ++i)
657  ev->SetVal(i+nVariables, targets.at(i));
658  ev->GetTargets().clear();
659  if (!(IgnoreEventsWithNegWeightsInTraining() && ev->GetWeight()<=0))
660  fFoam.back()->FillBinarySearchTree(ev);
661  // since the binary search tree copies the event, one can delete
662  // it
663  delete ev;
664  }
665 
666  Log() << kINFO << "Build multi target regression foam" << Endl;
667  fFoam.back()->Create(); // build foam
668 
669  Log() << kVERBOSE << "Filling foam cells with events" << Endl;
670  // loop over all events -> fill foam cells with number of events
671  for (Long64_t k=0; k<GetNEvents(); ++k) {
672  Event *ev = new Event(*GetEvent(k));
673  // since in multi-target regression targets are handled like
674  // variables --> remove targets and add them to the event variabels
675  std::vector<Float_t> targets = ev->GetTargets();
676  const UInt_t nVariables = ev->GetValues().size();
678  for (UInt_t i = 0; i < targets.size(); ++i)
679  ev->SetVal(i+nVariables, targets.at(i));
680  ev->GetTargets().clear();
681  if (!(IgnoreEventsWithNegWeightsInTraining() && ev->GetWeight()<=0))
682  fFoam.back()->FillFoamCells(ev, weight);
683  // since the PDEFoam copies the event, one can delete it
684  delete ev;
685  }
686 }
687 
688 ////////////////////////////////////////////////////////////////////////////////
689 /// Return Mva-Value.
690 ///
691 /// In case of `fSigBgSeparated==false` (one unified PDEFoam was
692 /// trained) the function returns the content of the cell, which
693 /// corresponds to the current TMVA::Event, i.e. D =
694 /// N_sig/(N_bg+N_sig).
695 ///
696 /// In case of `fSigBgSeparated==true` (two separate PDEFoams were
697 /// trained) the function returns
698 ///
699 /// D = Density_sig/(Density_sig+Density_bg)
700 ///
701 /// where 'Density_sig' is the content of the cell in the signal
702 /// PDEFoam (fFoam[0]) and 'Density_bg' is the content of the cell
703 /// in the background PDEFoam (fFoam[1]).
704 ///
705 /// In both cases the error on the discriminant is stored in 'err'
706 /// and 'errUpper'. (Of course err and errUpper must be non-zero
707 /// and point to valid address to make this work.)
708 
710 {
711  const Event* ev = GetEvent();
712  Double_t discr = 0.;
713 
714  if (fSigBgSeparated) {
715  std::vector<Float_t> xvec = ev->GetValues();
716 
717  Double_t density_sig = 0.; // calc signal event density
718  Double_t density_bg = 0.; // calc background event density
719  density_sig = fFoam.at(0)->GetCellValue(xvec, kValueDensity, fKernelEstimator);
720  density_bg = fFoam.at(1)->GetCellValue(xvec, kValueDensity, fKernelEstimator);
721 
722  // calc discriminator (normed!)
723  if ( (density_sig+density_bg) > 0 )
724  discr = density_sig/(density_sig+density_bg);
725  else
726  discr = 0.5; // assume 50% signal probability, if no events found (bad assumption, but can be overruled by cut on error)
727  }
728  else { // Signal and Bg not separated
729  // get discriminator direct from the foam
730  discr = fFoam.at(0)->GetCellValue(ev->GetValues(), kValue, fKernelEstimator);
731  }
732 
733  // calculate the error
734  if (err || errUpper) {
735  const Double_t discr_error = CalculateMVAError();
736  if (err != 0) *err = discr_error;
737  if (errUpper != 0) *errUpper = discr_error;
738  }
739 
740  if (fUseYesNoCell)
741  return (discr < 0.5 ? -1 : 1);
742  else
743  return discr;
744 }
745 
746 ////////////////////////////////////////////////////////////////////////////////
747 /// Calculate the error on the Mva value
748 ///
749 /// If `fSigBgSeparated == true` the error is calculated from the
750 /// number of events in the signal and background PDEFoam cells.
751 ///
752 /// If `fSigBgSeparated == false`, the error is taken directly from
753 /// the PDEFoam cell.
754 
756 {
757  const Event* ev = GetEvent(); // current event
758  Double_t mvaError = 0.0; // the error on the Mva value
759 
760  if (fSigBgSeparated) {
761  const std::vector<Float_t>& xvec = ev->GetValues();
762 
763  const Double_t neventsB = fFoam.at(1)->GetCellValue(xvec, kValue, fKernelEstimator);
764  const Double_t neventsS = fFoam.at(0)->GetCellValue(xvec, kValue, fKernelEstimator);
765  const Double_t scaleB = 1.;
766  // estimation of statistical error on counted signal/background events
767  const Double_t errorS = neventsS == 0 ? 1.0 : TMath::Sqrt(neventsS);
768  const Double_t errorB = neventsB == 0 ? 1.0 : TMath::Sqrt(neventsB);
769 
770  if ((neventsS > 1e-10) || (neventsB > 1e-10)) {
771  // eq. (5) in paper T.Carli, B.Koblitz 2002
772  mvaError = TMath::Sqrt(Sqr(scaleB * neventsB / Sqr(neventsS + scaleB * neventsB) * errorS) +
773  Sqr(scaleB * neventsS / Sqr(neventsS + scaleB * neventsB) * errorB));
774  } else {
775  mvaError = 1.0;
776  }
777  } else { // Signal and Bg not separated
778  // get discriminator error direct from the foam
779  mvaError = fFoam.at(0)->GetCellValue(ev->GetValues(), kValueError, fKernelEstimator);
780  }
781 
782  return mvaError;
783 }
784 
785 ////////////////////////////////////////////////////////////////////////////////
786 /// Get the multiclass MVA response for the PDEFoam classifier. The
787 /// returned MVA values are normalized, i.e. their sum equals 1.
788 
789 const std::vector<Float_t>& TMVA::MethodPDEFoam::GetMulticlassValues()
790 {
791  const TMVA::Event *ev = GetEvent();
792  std::vector<Float_t> xvec = ev->GetValues();
793 
794  if (fMulticlassReturnVal == NULL)
795  fMulticlassReturnVal = new std::vector<Float_t>();
796  fMulticlassReturnVal->clear();
797  fMulticlassReturnVal->reserve(DataInfo().GetNClasses());
798 
799  std::vector<Float_t> temp; // temp class. values
800  UInt_t nClasses = DataInfo().GetNClasses();
801  temp.reserve(nClasses);
802  for (UInt_t iClass = 0; iClass < nClasses; ++iClass) {
803  temp.push_back(fFoam.at(iClass)->GetCellValue(xvec, kValue, fKernelEstimator));
804  }
805 
806  for (UInt_t iClass = 0; iClass < nClasses; ++iClass) {
807  Float_t norm = 0.0; // normalization
808  for (UInt_t j = 0; j < nClasses; ++j) {
809  if (iClass != j)
810  norm += exp(temp[j] - temp[iClass]);
811  }
812  fMulticlassReturnVal->push_back(1.0 / (1.0 + norm));
813  }
814 
815  return *fMulticlassReturnVal;
816 }
817 
818 ////////////////////////////////////////////////////////////////////////////////
819 /// Compute ranking of input variables from the number of cuts made
820 /// in each PDEFoam dimension. The PDEFoam dimension (the variable)
821 /// for which the most cuts were done is ranked highest.
822 
824 {
825  // create the ranking object
826  fRanking = new Ranking(GetName(), "Variable Importance");
827  std::vector<Float_t> importance(GetNvar(), 0);
828 
829  // determine variable importances
830  for (UInt_t ifoam = 0; ifoam < fFoam.size(); ++ifoam) {
831  // get the number of cuts made in every dimension of foam
832  PDEFoamCell *root_cell = fFoam.at(ifoam)->GetRootCell();
833  std::vector<UInt_t> nCuts(fFoam.at(ifoam)->GetTotDim(), 0);
834  GetNCuts(root_cell, nCuts);
835 
836  // fill the importance vector (ignoring the target dimensions in
837  // case of a multi-target regression foam)
838  UInt_t sumOfCuts = 0;
839  std::vector<Float_t> tmp_importance;
840  for (UInt_t ivar = 0; ivar < GetNvar(); ++ivar) {
841  sumOfCuts += nCuts.at(ivar);
842  tmp_importance.push_back( nCuts.at(ivar) );
843  }
844  // normalization of the variable importances of this foam: the
845  // sum of all variable importances equals 1 for this foam
846  for (UInt_t ivar = 0; ivar < GetNvar(); ++ivar) {
847  if (sumOfCuts > 0)
848  tmp_importance.at(ivar) /= sumOfCuts;
849  else
850  tmp_importance.at(ivar) = 0;
851  }
852  // the overall variable importance is the average over all foams
853  for (UInt_t ivar = 0; ivar < GetNvar(); ++ivar) {
854  importance.at(ivar) += tmp_importance.at(ivar) / fFoam.size();
855  }
856  }
857 
858  // fill ranking vector
859  for (UInt_t ivar = 0; ivar < GetNvar(); ++ivar) {
860  fRanking->AddRank(Rank(GetInputLabel(ivar), importance.at(ivar)));
861  }
862 
863  return fRanking;
864 }
865 
866 ////////////////////////////////////////////////////////////////////////////////
867 /// Fill in 'nCuts' the number of cuts made in every foam dimension,
868 /// starting at the root cell 'cell'.
869 ///
870 /// Parameters:
871 ///
872 /// - cell - root cell to start the counting from
873 ///
874 /// - nCuts - the number of cuts are saved in this vector
875 
876 void TMVA::MethodPDEFoam::GetNCuts(PDEFoamCell *cell, std::vector<UInt_t> &nCuts)
877 {
878  if (cell == NULL || cell->GetStat() == 1) // cell is active
879  return;
880 
881  nCuts.at(cell->GetBest())++;
882 
883  if (cell->GetDau0() != NULL)
884  GetNCuts(cell->GetDau0(), nCuts);
885  if (cell->GetDau1() != NULL)
886  GetNCuts(cell->GetDau1(), nCuts);
887 }
888 
889 ////////////////////////////////////////////////////////////////////////////////
890 /// Set Xmin, Xmax for every dimension in the given pdefoam object
891 
893 {
894  if (!pdefoam){
895  Log() << kFATAL << "Null pointer given!" << Endl;
896  return;
897  }
898 
899  UInt_t num_vars = GetNvar();
901  num_vars += Data()->GetNTargets();
902 
903  for (UInt_t idim=0; idim<num_vars; idim++) { // set upper/ lower limit in foam
904  Log()<< kDEBUG << "foam: SetXmin[dim="<<idim<<"]: " << fXmin.at(idim) << Endl;
905  Log()<< kDEBUG << "foam: SetXmax[dim="<<idim<<"]: " << fXmax.at(idim) << Endl;
906  pdefoam->SetXmin(idim, fXmin.at(idim));
907  pdefoam->SetXmax(idim, fXmax.at(idim));
908  }
909 }
910 
911 ////////////////////////////////////////////////////////////////////////////////
912 /// Create a new PDEFoam, set the PDEFoam options (nCells, nBin,
913 /// Xmin, Xmax, etc.) and initialize the PDEFoam by calling
914 /// pdefoam->Initialize().
915 ///
916 /// Parameters:
917 ///
918 /// - foamcaption - name of PDEFoam object
919 ///
920 /// - ft - type of PDEFoam
921 ///
922 /// Candidates are:
923 /// - kSeparate - creates TMVA::PDEFoamEvent
924 /// - kDiscr - creates TMVA::PDEFoamDiscriminant
925 /// - kMonoTarget - creates TMVA::PDEFoamTarget
926 /// - kMultiTarget - creates TMVA::MultiTarget
927 /// - kMultiClass - creates TMVA::PDEFoamDiscriminant
928 ///
929 /// If 'fDTSeparation != kFoam' then a TMVA::PDEFoamDecisionTree
930 /// is created (the separation type depends on fDTSeparation).
931 ///
932 /// - cls - marked event class (optional, default value = 0)
933 
935 {
936  // number of foam dimensions
937  Int_t dim = 1;
938  if (ft == kMultiTarget)
939  // dimension of foam = number of targets + non-targets
940  dim = Data()->GetNTargets() + Data()->GetNVariables();
941  else
942  dim = GetNvar();
943 
944  // calculate range-searching box
945  std::vector<Double_t> box;
946  for (Int_t idim = 0; idim < dim; ++idim) {
947  box.push_back((fXmax.at(idim) - fXmin.at(idim))* fVolFrac);
948  }
949 
950  // create PDEFoam and PDEFoamDensityBase
951  PDEFoam *pdefoam = NULL;
952  PDEFoamDensityBase *density = NULL;
953  if (fDTSeparation == kFoam) {
954  // use PDEFoam algorithm
955  switch (ft) {
956  case kSeparate:
957  pdefoam = new PDEFoamEvent(foamcaption);
958  density = new PDEFoamEventDensity(box);
959  break;
960  case kMultiTarget:
961  pdefoam = new PDEFoamMultiTarget(foamcaption, fTargetSelection);
962  density = new PDEFoamEventDensity(box);
963  break;
964  case kDiscr:
965  case kMultiClass:
966  pdefoam = new PDEFoamDiscriminant(foamcaption, cls);
967  density = new PDEFoamDiscriminantDensity(box, cls);
968  break;
969  case kMonoTarget:
970  pdefoam = new PDEFoamTarget(foamcaption, 0);
971  density = new PDEFoamTargetDensity(box, 0);
972  break;
973  default:
974  Log() << kFATAL << "Unknown PDEFoam type!" << Endl;
975  break;
976  }
977  } else {
978  // create a decision tree like PDEFoam
979 
980  // create separation type class, which is owned by
981  // PDEFoamDecisionTree (i.e. PDEFoamDecisionTree will delete it)
982  SeparationBase *sepType = NULL;
983  switch (fDTSeparation) {
984  case kGiniIndex:
985  sepType = new GiniIndex();
986  break;
987  case kMisClassificationError:
988  sepType = new MisClassificationError();
989  break;
990  case kCrossEntropy:
991  sepType = new CrossEntropy();
992  break;
993  case kGiniIndexWithLaplace:
994  sepType = new GiniIndexWithLaplace();
995  break;
996  case kSdivSqrtSplusB:
997  sepType = new SdivSqrtSplusB();
998  break;
999  default:
1000  Log() << kFATAL << "Separation type " << fDTSeparation
1001  << " currently not supported" << Endl;
1002  break;
1003  }
1004  switch (ft) {
1005  case kDiscr:
1006  case kMultiClass:
1007  pdefoam = new PDEFoamDecisionTree(foamcaption, sepType, cls);
1008  density = new PDEFoamDecisionTreeDensity(box, cls);
1009  break;
1010  default:
1011  Log() << kFATAL << "Decision tree cell split algorithm is only"
1012  << " available for (multi) classification with a single"
1013  << " PDE-Foam (SigBgSeparate=F)" << Endl;
1014  break;
1015  }
1016  }
1017 
1018  if (pdefoam) pdefoam->SetDensity(density);
1019  else Log() << kFATAL << "PDEFoam pointer not set, exiting.." << Endl;
1020 
1021  // create pdefoam kernel
1023 
1024  // set fLogger attributes
1025  pdefoam->Log().SetMinType(this->Log().GetMinType());
1026 
1027  // set PDEFoam parameters
1028  pdefoam->SetDim( dim);
1029  pdefoam->SetnCells( fnCells); // optional
1030  pdefoam->SetnSampl( fnSampl); // optional
1031  pdefoam->SetnBin( fnBin); // optional
1032  pdefoam->SetEvPerBin( fEvPerBin); // optional
1033 
1034  // cuts
1035  pdefoam->SetNmin(fNmin);
1036  pdefoam->SetMaxDepth(fMaxDepth); // maximum cell tree depth
1037 
1038  // Init PDEFoam
1039  pdefoam->Initialize();
1040 
1041  // Set Xmin, Xmax
1042  SetXminXmax(pdefoam);
1043 
1044  return pdefoam;
1045 }
1046 
1047 ////////////////////////////////////////////////////////////////////////////////
1048 /// Return regression values for both multi- and mono-target regression
1049 
1050 const std::vector<Float_t>& TMVA::MethodPDEFoam::GetRegressionValues()
1051 {
1052  if (fRegressionReturnVal == 0) fRegressionReturnVal = new std::vector<Float_t>();
1053  fRegressionReturnVal->clear();
1054  fRegressionReturnVal->reserve(Data()->GetNTargets());
1055 
1056  const Event* ev = GetEvent();
1057  std::vector<Float_t> vals = ev->GetValues(); // get array of event variables (non-targets)
1058 
1059  if (vals.empty()) {
1060  Log() << kWARNING << "<GetRegressionValues> value vector is empty. " << Endl;
1061  }
1062 
1063  if (fMultiTargetRegression) {
1064  // create std::map from event variables
1065  std::map<Int_t, Float_t> xvec;
1066  for (UInt_t i=0; i<vals.size(); ++i)
1067  xvec.insert(std::pair<Int_t, Float_t>(i, vals.at(i)));
1068  // get the targets
1069  std::vector<Float_t> targets = fFoam.at(0)->GetCellValue( xvec, kValue );
1070 
1071  // sanity check
1072  if (targets.size() != Data()->GetNTargets())
1073  Log() << kFATAL << "Something wrong with multi-target regression foam: "
1074  << "number of targets does not match the DataSet()" << Endl;
1075  for(UInt_t i=0; i<targets.size(); i++)
1076  fRegressionReturnVal->push_back(targets.at(i));
1077  }
1078  else {
1079  fRegressionReturnVal->push_back(fFoam.at(0)->GetCellValue(vals, kValue, fKernelEstimator));
1080  }
1081 
1082  // apply inverse transformation to regression values
1083  Event * evT = new Event(*ev);
1084  for (UInt_t itgt = 0; itgt < Data()->GetNTargets(); itgt++) {
1085  evT->SetTarget(itgt, fRegressionReturnVal->at(itgt) );
1086  }
1087  const Event* evT2 = GetTransformationHandler().InverseTransform( evT );
1088  fRegressionReturnVal->clear();
1089  for (UInt_t itgt = 0; itgt < Data()->GetNTargets(); itgt++) {
1090  fRegressionReturnVal->push_back( evT2->GetTarget(itgt) );
1091  }
1092 
1093  delete evT;
1094 
1095  return (*fRegressionReturnVal);
1096 }
1097 
1098 ////////////////////////////////////////////////////////////////////////////////
1099 /// create a pdefoam kernel estimator, depending on the current
1100 /// value of fKernel
1101 
1103 {
1104  switch (fKernel) {
1105  case kNone:
1106  return new PDEFoamKernelTrivial();
1107  case kLinN:
1108  return new PDEFoamKernelLinN();
1109  case kGaus:
1110  return new PDEFoamKernelGauss(fVolFrac/2.0);
1111  default:
1112  Log() << kFATAL << "Kernel: " << fKernel << " not supported!" << Endl;
1113  return NULL;
1114  }
1115  return NULL;
1116 }
1117 
1118 ////////////////////////////////////////////////////////////////////////////////
1119 /// Deletes all trained foams
1120 
1122 {
1123  for (UInt_t i=0; i<fFoam.size(); i++)
1124  if (fFoam.at(i)) delete fFoam.at(i);
1125  fFoam.clear();
1126 }
1127 
1128 ////////////////////////////////////////////////////////////////////////////////
1129 /// reset MethodPDEFoam:
1130 ///
1131 /// - delete all PDEFoams
1132 /// - delete the kernel estimator
1133 
1135 {
1136  DeleteFoams();
1137 
1138  if (fKernelEstimator != NULL) {
1139  delete fKernelEstimator;
1140  fKernelEstimator = NULL;
1141  }
1142 }
1143 
1144 ////////////////////////////////////////////////////////////////////////////////
1145 
1147 {}
1148 
1149 ////////////////////////////////////////////////////////////////////////////////
1150 /// create XML output of PDEFoam method variables
1151 
1152 void TMVA::MethodPDEFoam::AddWeightsXMLTo( void* parent ) const
1153 {
1154  void* wght = gTools().AddChild(parent, "Weights");
1155  gTools().AddAttr( wght, "SigBgSeparated", fSigBgSeparated );
1156  gTools().AddAttr( wght, "Frac", fFrac );
1157  gTools().AddAttr( wght, "DiscrErrCut", fDiscrErrCut );
1158  gTools().AddAttr( wght, "VolFrac", fVolFrac );
1159  gTools().AddAttr( wght, "nCells", fnCells );
1160  gTools().AddAttr( wght, "nSampl", fnSampl );
1161  gTools().AddAttr( wght, "nBin", fnBin );
1162  gTools().AddAttr( wght, "EvPerBin", fEvPerBin );
1163  gTools().AddAttr( wght, "Compress", fCompress );
1164  gTools().AddAttr( wght, "DoRegression", DoRegression() );
1165  gTools().AddAttr( wght, "CutNmin", fNmin>0 );
1166  gTools().AddAttr( wght, "Nmin", fNmin );
1167  gTools().AddAttr( wght, "CutRMSmin", false );
1168  gTools().AddAttr( wght, "RMSmin", 0.0 );
1169  gTools().AddAttr( wght, "Kernel", KernelToUInt(fKernel) );
1170  gTools().AddAttr( wght, "TargetSelection", TargetSelectionToUInt(fTargetSelection) );
1171  gTools().AddAttr( wght, "FillFoamWithOrigWeights", fFillFoamWithOrigWeights );
1172  gTools().AddAttr( wght, "UseYesNoCell", fUseYesNoCell );
1173 
1174  // save foam borders Xmin[i], Xmax[i]
1175  void *xmin_wrap;
1176  for (UInt_t i=0; i<fXmin.size(); i++){
1177  xmin_wrap = gTools().AddChild( wght, "Xmin" );
1178  gTools().AddAttr( xmin_wrap, "Index", i );
1179  gTools().AddAttr( xmin_wrap, "Value", fXmin.at(i) );
1180  }
1181  void *xmax_wrap;
1182  for (UInt_t i=0; i<fXmax.size(); i++){
1183  xmax_wrap = gTools().AddChild( wght, "Xmax" );
1184  gTools().AddAttr( xmax_wrap, "Index", i );
1185  gTools().AddAttr( xmax_wrap, "Value", fXmax.at(i) );
1186  }
1187 
1188  // write foams to xml file
1189  WriteFoamsToFile();
1190 }
1191 
1192 ////////////////////////////////////////////////////////////////////////////////
1193 /// Write PDEFoams to file
1194 
1196 {
1197  // fill variable names into foam
1199 
1200  TString rfname( GetWeightFileName() );
1201 
1202  // replace in case of txt weight file
1203  rfname.ReplaceAll( TString(".") + gConfig().GetIONames().fWeightFileExtension + ".txt", ".xml" );
1204 
1205  // add foam indicator to distinguish from main weight file
1206  rfname.ReplaceAll( ".xml", "_foams.root" );
1207 
1208  TFile *rootFile = 0;
1209  if (fCompress) rootFile = new TFile(rfname, "RECREATE", "foamfile", 9);
1210  else rootFile = new TFile(rfname, "RECREATE");
1211 
1212  // write the foams
1213  for (UInt_t i=0; i<fFoam.size(); ++i) {
1214  Log() << "writing foam " << fFoam.at(i)->GetFoamName().Data()
1215  << " to file" << Endl;
1216  fFoam.at(i)->Write(fFoam.at(i)->GetFoamName().Data());
1217  }
1218 
1219  rootFile->Close();
1220  Log() << kINFO << "Foams written to file: "
1221  << gTools().Color("lightblue") << rfname << gTools().Color("reset") << Endl;
1222 }
1223 
1224 ////////////////////////////////////////////////////////////////////////////////
1225 /// read options and internal parameters
1226 
1228 {
1229  istr >> fSigBgSeparated; // Separate Sig and Bg, or not
1230  istr >> fFrac; // Fraction used for calc of Xmin, Xmax
1231  istr >> fDiscrErrCut; // cut on discriminant error
1232  istr >> fVolFrac; // volume fraction (used for density calculation during buildup)
1233  istr >> fnCells; // Number of Cells (500)
1234  istr >> fnSampl; // Number of MC events per cell in build-up (1000)
1235  istr >> fnBin; // Number of bins in build-up (100)
1236  istr >> fEvPerBin; // Maximum events (equiv.) per bin in build-up (1000)
1237  istr >> fCompress; // compress output file
1238 
1239  Bool_t regr;
1240  istr >> regr; // regression foam
1242 
1243  Bool_t CutNmin, CutRMSmin; // dummy for backwards compatible.
1244  Float_t RMSmin; // dummy for backwards compatible.
1245  istr >> CutNmin; // cut on minimal number of events in cell
1246  istr >> fNmin;
1247  istr >> CutRMSmin; // cut on minimal RMS in cell
1248  istr >> RMSmin;
1249 
1250  UInt_t ker = 0;
1251  istr >> ker; // used kernel for GetMvaValue()
1252  fKernel = UIntToKernel(ker);
1253 
1254  UInt_t ts = 0;
1255  istr >> ts; // used method for target selection
1257 
1258  istr >> fFillFoamWithOrigWeights; // fill foam with original event weights
1259  istr >> fUseYesNoCell; // return -1 or 1 for bg or signal event
1260 
1261  // clear old range and prepare new range
1262  fXmin.clear();
1263  fXmax.clear();
1264  UInt_t kDim = GetNvar();
1266  kDim += Data()->GetNTargets();
1267  fXmin.assign(kDim, 0);
1268  fXmax.assign(kDim, 0);
1269 
1270  // read range
1271  for (UInt_t i=0; i<kDim; i++)
1272  istr >> fXmin.at(i);
1273  for (UInt_t i=0; i<kDim; i++)
1274  istr >> fXmax.at(i);
1275 
1276  // read pure foams from file
1278 }
1279 
1280 ////////////////////////////////////////////////////////////////////////////////
1281 /// read PDEFoam variables from xml weight file
1282 
1284 {
1285  gTools().ReadAttr( wghtnode, "SigBgSeparated", fSigBgSeparated );
1286  gTools().ReadAttr( wghtnode, "Frac", fFrac );
1287  gTools().ReadAttr( wghtnode, "DiscrErrCut", fDiscrErrCut );
1288  gTools().ReadAttr( wghtnode, "VolFrac", fVolFrac );
1289  gTools().ReadAttr( wghtnode, "nCells", fnCells );
1290  gTools().ReadAttr( wghtnode, "nSampl", fnSampl );
1291  gTools().ReadAttr( wghtnode, "nBin", fnBin );
1292  gTools().ReadAttr( wghtnode, "EvPerBin", fEvPerBin );
1293  gTools().ReadAttr( wghtnode, "Compress", fCompress );
1294  Bool_t regr; // dummy for backwards compatible.
1295  gTools().ReadAttr( wghtnode, "DoRegression", regr );
1296  Bool_t CutNmin; // dummy for backwards compatible.
1297  gTools().ReadAttr( wghtnode, "CutNmin", CutNmin );
1298  gTools().ReadAttr( wghtnode, "Nmin", fNmin );
1299  Bool_t CutRMSmin; // dummy for backwards compatible.
1300  Float_t RMSmin; // dummy for backwards compatible.
1301  gTools().ReadAttr( wghtnode, "CutRMSmin", CutRMSmin );
1302  gTools().ReadAttr( wghtnode, "RMSmin", RMSmin );
1303  UInt_t ker = 0;
1304  gTools().ReadAttr( wghtnode, "Kernel", ker );
1305  fKernel = UIntToKernel(ker);
1306  UInt_t ts = 0;
1307  gTools().ReadAttr( wghtnode, "TargetSelection", ts );
1309  if (gTools().HasAttr(wghtnode, "FillFoamWithOrigWeights"))
1310  gTools().ReadAttr( wghtnode, "FillFoamWithOrigWeights", fFillFoamWithOrigWeights );
1311  if (gTools().HasAttr(wghtnode, "UseYesNoCell"))
1312  gTools().ReadAttr( wghtnode, "UseYesNoCell", fUseYesNoCell );
1313 
1314  // clear old range [Xmin, Xmax] and prepare new range for reading
1315  fXmin.clear();
1316  fXmax.clear();
1317  UInt_t kDim = GetNvar();
1319  kDim += Data()->GetNTargets();
1320  fXmin.assign(kDim, 0);
1321  fXmax.assign(kDim, 0);
1322 
1323  // read foam range
1324  void *xmin_wrap = gTools().GetChild( wghtnode );
1325  for (UInt_t counter=0; counter<kDim; counter++) {
1326  UInt_t i=0;
1327  gTools().ReadAttr( xmin_wrap , "Index", i );
1328  if (i>=kDim)
1329  Log() << kFATAL << "dimension index out of range:" << i << Endl;
1330  gTools().ReadAttr( xmin_wrap , "Value", fXmin.at(i) );
1331  xmin_wrap = gTools().GetNextChild( xmin_wrap );
1332  }
1333 
1334  void *xmax_wrap = xmin_wrap;
1335  for (UInt_t counter=0; counter<kDim; counter++) {
1336  UInt_t i=0;
1337  gTools().ReadAttr( xmax_wrap , "Index", i );
1338  if (i>=kDim)
1339  Log() << kFATAL << "dimension index out of range:" << i << Endl;
1340  gTools().ReadAttr( xmax_wrap , "Value", fXmax.at(i) );
1341  xmax_wrap = gTools().GetNextChild( xmax_wrap );
1342  }
1343 
1344  // if foams exist, delete them
1345  DeleteFoams();
1346 
1347  // read pure foams from file
1349 
1350  // recreate the pdefoam kernel estimator
1351  if (fKernelEstimator != NULL)
1352  delete fKernelEstimator;
1354 }
1355 
1356 ////////////////////////////////////////////////////////////////////////////////
1357 /// Reads a foam with name 'foamname' from file, and returns a clone
1358 /// of the foam. The given ROOT file must be open. (The ROOT file
1359 /// will not be closed in this function.)
1360 ///
1361 /// Parameters:
1362 ///
1363 /// - file - an open ROOT file
1364 ///
1365 /// - foamname - name of foam to load from the file
1366 ///
1367 /// Returns:
1368 ///
1369 /// If a foam with name 'foamname' exists in the file, then it is
1370 /// read from the file, cloned and returned. If a foam with name
1371 /// 'foamname' does not exist in the file or the clone operation
1372 /// does not succeed, then NULL is returned.
1373 
1375 {
1376  if (file == NULL) {
1377  Log() << kWARNING << "<ReadClonedFoamFromFile>: NULL pointer given" << Endl;
1378  return NULL;
1379  }
1380 
1381  // try to load the foam from the file
1382  PDEFoam *foam = (PDEFoam*) file->Get(foamname);
1383  if (foam == NULL) {
1384  return NULL;
1385  }
1386  // try to clone the foam
1387  foam = (PDEFoam*) foam->Clone();
1388  if (foam == NULL) {
1389  Log() << kWARNING << "<ReadClonedFoamFromFile>: " << foamname
1390  << " could not be cloned!" << Endl;
1391  return NULL;
1392  }
1393 
1394  return foam;
1395 }
1396 
1397 ////////////////////////////////////////////////////////////////////////////////
1398 /// read foams from file
1399 
1401 {
1402  TString rfname( GetWeightFileName() );
1403 
1404  // replace in case of txt weight file
1405  rfname.ReplaceAll( TString(".") + gConfig().GetIONames().fWeightFileExtension + ".txt", ".xml" );
1406 
1407  // add foam indicator to distinguish from main weight file
1408  rfname.ReplaceAll( ".xml", "_foams.root" );
1409 
1410  Log() << kINFO << "Read foams from file: " << gTools().Color("lightblue")
1411  << rfname << gTools().Color("reset") << Endl;
1412  TFile *rootFile = new TFile( rfname, "READ" );
1413  if (rootFile->IsZombie()) Log() << kFATAL << "Cannot open file \"" << rfname << "\"" << Endl;
1414 
1415  // read foams from file
1416  if (DoRegression()) {
1418  fFoam.push_back(ReadClonedFoamFromFile(rootFile, "MultiTargetRegressionFoam"));
1419  else
1420  fFoam.push_back(ReadClonedFoamFromFile(rootFile, "MonoTargetRegressionFoam"));
1421  } else {
1422  if (fSigBgSeparated) {
1423  fFoam.push_back(ReadClonedFoamFromFile(rootFile, "SignalFoam"));
1424  fFoam.push_back(ReadClonedFoamFromFile(rootFile, "BgFoam"));
1425  } else {
1426  // try to load discriminator foam
1427  PDEFoam *foam = ReadClonedFoamFromFile(rootFile, "DiscrFoam");
1428  if (foam != NULL)
1429  fFoam.push_back(foam);
1430  else {
1431  // load multiclass foams
1432  for (UInt_t iClass=0; iClass<DataInfo().GetNClasses(); ++iClass) {
1433  fFoam.push_back(ReadClonedFoamFromFile(rootFile, Form("MultiClassFoam%u",iClass)));
1434  }
1435  }
1436  }
1437  }
1438 
1439  // Close the root file. Note, that the foams are still present in
1440  // memory!
1441  rootFile->Close();
1442  delete rootFile;
1443 
1444  for (UInt_t i=0; i<fFoam.size(); ++i) {
1445  if (!fFoam.at(0))
1446  Log() << kFATAL << "Could not load foam!" << Endl;
1447  }
1448 }
1449 
1450 ////////////////////////////////////////////////////////////////////////////////
1451 /// convert UInt_t to EKernel (used for reading weight files)
1452 
1454 {
1455  switch(iker) {
1456  case 0: return kNone;
1457  case 1: return kGaus;
1458  case 2: return kLinN;
1459  default:
1460  Log() << kWARNING << "<UIntToKernel>: unknown kernel number: " << iker << Endl;
1461  return kNone;
1462  }
1463  return kNone;
1464 }
1465 
1466 ////////////////////////////////////////////////////////////////////////////////
1467 /// convert UInt_t to ETargetSelection (used for reading weight files)
1468 
1470 {
1471  switch(its) {
1472  case 0: return kMean;
1473  case 1: return kMpv;
1474  default:
1475  Log() << kWARNING << "<UIntToTargetSelection>: unknown method TargetSelection: " << its << Endl;
1476  return kMean;
1477  }
1478  return kMean;
1479 }
1480 
1481 ////////////////////////////////////////////////////////////////////////////////
1482 /// store the variable names in all foams
1483 
1485 {
1486  for (UInt_t ifoam=0; ifoam<fFoam.size(); ifoam++) {
1487  for (Int_t idim=0; idim<fFoam.at(ifoam)->GetTotDim(); idim++) {
1489  fFoam.at(ifoam)->AddVariableName(DataInfo().GetTargetInfo(idim-DataInfo().GetNVariables()).GetExpression().Data());
1490  else
1491  fFoam.at(ifoam)->AddVariableName(DataInfo().GetVariableInfo(idim).GetExpression().Data());
1492  }
1493  }
1494 }
1495 
1496 ////////////////////////////////////////////////////////////////////////////////
1497 /// write PDEFoam-specific classifier response
1498 /// NOT IMPLEMENTED YET!
1499 
1500 void TMVA::MethodPDEFoam::MakeClassSpecific( std::ostream& /*fout*/, const TString& /*className*/ ) const
1501 {
1502 }
1503 
1504 ////////////////////////////////////////////////////////////////////////////////
1505 /// provide help message
1506 
1508 {
1509  Log() << Endl;
1510  Log() << gTools().Color("bold") << "--- Short description:" << gTools().Color("reset") << Endl;
1511  Log() << Endl;
1512  Log() << "PDE-Foam is a variation of the PDE-RS method using a self-adapting" << Endl;
1513  Log() << "binning method to divide the multi-dimensional variable space into a" << Endl;
1514  Log() << "finite number of hyper-rectangles (cells). The binning algorithm " << Endl;
1515  Log() << "adjusts the size and position of a predefined number of cells such" << Endl;
1516  Log() << "that the variance of the signal and background densities inside the " << Endl;
1517  Log() << "cells reaches a minimum" << Endl;
1518  Log() << Endl;
1519  Log() << gTools().Color("bold") << "--- Use of booking options:" << gTools().Color("reset") << Endl;
1520  Log() << Endl;
1521  Log() << "The PDEFoam classifier supports two different algorithms: " << Endl;
1522  Log() << Endl;
1523  Log() << " (1) Create one foam, which stores the signal over background" << Endl;
1524  Log() << " probability density. During foam buildup the variance of the" << Endl;
1525  Log() << " discriminant inside the cells is minimised." << Endl;
1526  Log() << Endl;
1527  Log() << " Booking option: SigBgSeparated=F" << Endl;
1528  Log() << Endl;
1529  Log() << " (2) Create two separate foams, one for the signal events and one for" << Endl;
1530  Log() << " background events. During foam buildup the variance of the" << Endl;
1531  Log() << " event density inside the cells is minimised separately for" << Endl;
1532  Log() << " signal and background." << Endl;
1533  Log() << Endl;
1534  Log() << " Booking option: SigBgSeparated=T" << Endl;
1535  Log() << Endl;
1536  Log() << "The following options can be set (the listed values are found to be a" << Endl;
1537  Log() << "good starting point for most applications):" << Endl;
1538  Log() << Endl;
1539  Log() << " SigBgSeparate False Separate Signal and Background" << Endl;
1540  Log() << " TailCut 0.001 Fraction of outlier events that excluded" << Endl;
1541  Log() << " from the foam in each dimension " << Endl;
1542  Log() << " VolFrac 0.0666 Volume fraction (used for density calculation" << Endl;
1543  Log() << " during foam build-up) " << Endl;
1544  Log() << " nActiveCells 500 Maximal number of active cells in final foam " << Endl;
1545  Log() << " nSampl 2000 Number of MC events per cell in foam build-up " << Endl;
1546  Log() << " nBin 5 Number of bins used in foam build-up " << Endl;
1547  Log() << " Nmin 100 Number of events in cell required to split cell" << Endl;
1548  Log() << " Kernel None Kernel type used (possible values are: None," << Endl;
1549  Log() << " Gauss)" << Endl;
1550  Log() << " Compress True Compress foam output file " << Endl;
1551  Log() << Endl;
1552  Log() << " Additional regression options:" << Endl;
1553  Log() << Endl;
1554  Log() << "MultiTargetRegression False Do regression with multiple targets " << Endl;
1555  Log() << " TargetSelection Mean Target selection method (possible values are: " << Endl;
1556  Log() << " Mean, Mpv)" << Endl;
1557  Log() << Endl;
1558  Log() << gTools().Color("bold") << "--- Performance optimisation:" << gTools().Color("reset") << Endl;
1559  Log() << Endl;
1560  Log() << "The performance of the two implementations was found to be similar for" << Endl;
1561  Log() << "most examples studied. For the same number of cells per foam, the two-" << Endl;
1562  Log() << "foam option approximately doubles the amount of computer memory needed" << Endl;
1563  Log() << "during classification. For special cases where the event-density" << Endl;
1564  Log() << "distribution of signal and background events is very different, the" << Endl;
1565  Log() << "two-foam option was found to perform significantly better than the" << Endl;
1566  Log() << "option with only one foam." << Endl;
1567  Log() << Endl;
1568  Log() << "In order to gain better classification performance we recommend to set" << Endl;
1569  Log() << "the parameter \"nActiveCells\" to a high value." << Endl;
1570  Log() << Endl;
1571  Log() << "The parameter \"VolFrac\" specifies the size of the sampling volume" << Endl;
1572  Log() << "during foam buildup and should be tuned in order to achieve optimal" << Endl;
1573  Log() << "performance. A larger box leads to a reduced statistical uncertainty" << Endl;
1574  Log() << "for small training samples and to smoother sampling. A smaller box on" << Endl;
1575  Log() << "the other hand increases the sensitivity to statistical fluctuations" << Endl;
1576  Log() << "in the training samples, but for sufficiently large training samples" << Endl;
1577  Log() << "it will result in a more precise local estimate of the sampled" << Endl;
1578  Log() << "density. In general, higher dimensional problems require larger box" << Endl;
1579  Log() << "sizes, due to the reduced average number of events per box volume. The" << Endl;
1580  Log() << "default value of 0.0666 was optimised for an example with 5" << Endl;
1581  Log() << "observables and training samples of the order of 50000 signal and" << Endl;
1582  Log() << "background events each." << Endl;
1583  Log() << Endl;
1584  Log() << "Furthermore kernel weighting can be activated, which will lead to an" << Endl;
1585  Log() << "additional performance improvement. Note that Gauss weighting will" << Endl;
1586  Log() << "significantly increase the response time of the method. LinNeighbors" << Endl;
1587  Log() << "weighting performs a linear interpolation with direct neighbor cells" << Endl;
1588  Log() << "for each dimension and is much faster than Gauss weighting." << Endl;
1589  Log() << Endl;
1590  Log() << "The classification results were found to be rather insensitive to the" << Endl;
1591  Log() << "values of the parameters \"nSamples\" and \"nBin\"." << Endl;
1592 }
This PDEFoam kernel estimates a cell value for a given event by weighting all cell values with a gaus...
This is a concrete implementation of PDEFoam.
void Train(void)
Train PDE-Foam depending on the set options.
PDEFoamCell * GetDau1() const
Definition: PDEFoamCell.h:95
std::vector< Float_t > fXmax
virtual Int_t Fill(Double_t x)
Increment bin with abscissa X by 1.
Definition: TH1.cxx:3251
The PDEFoam method is an extension of the PDERS method, which divides the multi-dimensional phase spa...
Definition: MethodPDEFoam.h:67
UInt_t GetNVariables() const
Definition: DataSetInfo.h:110
float xmin
Definition: THbookFile.cxx:93
virtual void Reset()
reset MethodPDEFoam:
MsgLogger & Endl(MsgLogger &ml)
Definition: MsgLogger.h:158
Singleton class for Global types used by TMVA.
Definition: Types.h:73
long long Long64_t
Definition: RtypesCore.h:69
Bool_t fFillFoamWithOrigWeights
This class is the abstract kernel interface for PDEFoam.
UInt_t KernelToUInt(EKernel ker) const
void GetNCuts(PDEFoamCell *cell, std::vector< UInt_t > &nCuts)
Fill in &#39;nCuts&#39; the number of cuts made in every foam dimension, starting at the root cell &#39;cell&#39;...
float Float_t
Definition: RtypesCore.h:53
This PDEFoam variant is used to estimate multiple targets by creating an event density foam (PDEFoamE...
PDEFoam * InitFoam(TString, EFoamType, UInt_t cls=0)
Create a new PDEFoam, set the PDEFoam options (nCells, nBin, Xmin, Xmax, etc.) and initialize the PDE...
TString & ReplaceAll(const TString &s1, const TString &s2)
Definition: TString.h:687
UInt_t GetNvar() const
Definition: MethodBase.h:335
void PrintCoefficients(void)
THist< 1, float, THistStatContent, THistStatUncertainty > TH1F
Definition: THist.hxx:285
Config & gConfig()
MsgLogger & Log() const
Definition: Configurable.h:122
OptionBase * DeclareOptionRef(T &ref, const TString &name, const TString &desc="")
A ROOT file is a suite of consecutive data records (TKey instances) with a well defined format...
Definition: TFile.h:47
EAnalysisType
Definition: Types.h:127
Virtual base Class for all MVA method.
Definition: MethodBase.h:109
UInt_t GetNVariables() const
access the number of variables through the datasetinfo
Definition: DataSet.cxx:216
virtual TObject * Get(const char *namecycle)
Return pointer to object identified by namecycle.
Basic string class.
Definition: TString.h:131
1-D histogram with a float per channel (see TH1 documentation)}
Definition: TH1.h:567
void SetXmin(Int_t idim, Double_t wmin)
set lower foam bound in dimension idim
Definition: PDEFoam.cxx:266
TransformationHandler & GetTransformationHandler(Bool_t takeReroutedIfAvailable=true)
Definition: MethodBase.h:385
Ranking for variables in method (implementation)
Definition: Ranking.h:48
int Int_t
Definition: RtypesCore.h:41
void TrainUnifiedClassification(void)
Create only one unified foam (fFoam[0]) whose cells contain the average discriminator (N_sig)/(N_sig ...
bool Bool_t
Definition: RtypesCore.h:59
void FillVariableNamesToFoam() const
store the variable names in all foams
UInt_t GetNClasses() const
Definition: DataSetInfo.h:136
UInt_t GetNTargets() const
Definition: MethodBase.h:337
virtual Double_t GetBinLowEdge(Int_t bin) const
Return bin lower edge for 1D histogram.
Definition: TH1.cxx:8445
void box(Int_t pat, Double_t x1, Double_t y1, Double_t x2, Double_t y2)
Definition: fillpatterns.C:1
void AddAttr(void *node, const char *, const T &value, Int_t precision=16)
add attribute to xml
Definition: Tools.h:353
void * AddChild(void *parent, const char *childname, const char *content=0, bool isRootNode=false)
add child node
Definition: Tools.cxx:1136
void GetHelpMessage() const
provide help message
const TString & GetInputLabel(Int_t i) const
Definition: MethodBase.h:341
Long64_t GetNEvtBkgdTrain()
return number of background training events in dataset
Definition: DataSet.cxx:451
UInt_t fSignalClass
Definition: MethodBase.h:678
const TString & GetNormalization() const
Definition: DataSetInfo.h:114
Implementation of the CrossEntropy as separation criterion.
Definition: CrossEntropy.h:43
void ReadWeightsFromStream(std::istream &i)
read options and internal parameters
virtual ~MethodPDEFoam(void)
destructor
const Event * GetEvent() const
Definition: MethodBase.h:740
DataSet * Data() const
Definition: MethodBase.h:400
void * GetChild(void *parent, const char *childname=0)
get child node
Definition: Tools.cxx:1162
DataSetInfo & DataInfo() const
Definition: MethodBase.h:401
Bool_t DoRegression() const
Definition: MethodBase.h:429
void SetMinType(EMsgType minType)
Definition: MsgLogger.h:72
void SetVal(UInt_t ivar, Float_t val)
set variable ivar to val
Definition: Event.cxx:341
This PDEFoam variant stores in every cell the sum of event weights and the sum of the squared event w...
Definition: PDEFoamEvent.h:38
void DeclareOptions()
Declare MethodPDEFoam options.
Class that contains all the data information.
Definition: DataSetInfo.h:60
Implementation of the SdivSqrtSplusB as separation criterion.
Double_t GetWeight() const
return the event weight - depending on whether the flag IgnoreNegWeightsInTraining is or not...
Definition: Event.cxx:382
std::vector< Float_t > fXmin
Long64_t GetNTrainingEvents() const
Definition: DataSet.h:79
R__ALWAYS_INLINE Bool_t IsZombie() const
Definition: TObject.h:134
void SetXminXmax(TMVA::PDEFoam *)
Set Xmin, Xmax for every dimension in the given pdefoam object.
PDEFoam * ReadClonedFoamFromFile(TFile *, const TString &)
Reads a foam with name &#39;foamname&#39; from file, and returns a clone of the foam.
void SetMaxDepth(UInt_t maxdepth)
Definition: PDEFoam.h:205
Implementation of the MisClassificationError as separation criterion.
std::vector< Float_t > & GetTargets()
Definition: Event.h:98
UInt_t GetNEvents() const
temporary event when testing on a different DataSet than the own one
Definition: MethodBase.h:408
Implementation of PDEFoam.
Definition: PDEFoam.h:77
void SetnSampl(Long_t nSampl)
Definition: PDEFoam.h:188
void SetNmin(UInt_t val)
Definition: PDEFoam.h:203
Bool_t DoMulticlass() const
Definition: MethodBase.h:430
void AddWeightsXMLTo(void *parent) const
create XML output of PDEFoam method variables
Int_t GetBest() const
Definition: PDEFoamCell.h:78
void SetXmax(Int_t idim, Double_t wmax)
set upper foam bound in dimension idim
Definition: PDEFoam.cxx:277
Float_t GetTarget(UInt_t itgt) const
Definition: Event.h:97
Double_t CalculateMVAError()
Calculate the error on the Mva value.
void Init(void)
default initialization called by all constructors
const char * GetName() const
Definition: MethodBase.h:325
void CalcXminXmax()
Determine foam range [fXmin, fXmax] for all dimensions, such that a fraction of &#39;fFrac&#39; events lie ou...
virtual Bool_t HasAnalysisType(Types::EAnalysisType type, UInt_t numberClasses, UInt_t numberTargets)
PDEFoam can handle classification with multiple classes and regression with one or more regression-ta...
MethodPDEFoam(const TString &jobName, const TString &methodTitle, DataSetInfo &dsi, const TString &theOption="PDEFoam")
init PDEFoam objects
Implementation of the GiniIndex as separation criterion.
Definition: GiniIndex.h:63
PDEFoamKernelBase * fKernelEstimator
EKernel UIntToKernel(UInt_t iker)
convert UInt_t to EKernel (used for reading weight files)
unsigned int UInt_t
Definition: RtypesCore.h:42
char * Form(const char *fmt,...)
const Handle_t kNone
Definition: GuiTypes.h:87
ETargetSelection UIntToTargetSelection(UInt_t its)
convert UInt_t to ETargetSelection (used for reading weight files)
const Event * InverseTransform(const Event *, Bool_t suppressIfNoTargets=true) const
Double_t GetMvaValue(Double_t *err=0, Double_t *errUpper=0)
Return Mva-Value.
This PDEFoam variant stores in every cell the average target fTarget (see the Constructor) as well as...
Definition: PDEFoamTarget.h:38
void SetTarget(UInt_t itgt, Float_t value)
set the target value (dimension itgt) to value
Definition: Event.cxx:360
void TrainMultiTargetRegression(void)
Training one (multi target regression) foam, whose cells contain the average event density...
UInt_t TargetSelectionToUInt(ETargetSelection ts) const
void SetDensity(PDEFoamDensityBase *dens)
Definition: PDEFoam.h:192
void ReadAttr(void *node, const char *, T &value)
read attribute from xml
Definition: Tools.h:335
float xmax
Definition: THbookFile.cxx:93
void SetDim(Int_t kDim)
Sets dimension of cubical space.
Definition: PDEFoam.cxx:251
Tools & gTools()
void MakeClassSpecific(std::ostream &, const TString &) const
write PDEFoam-specific classifier response NOT IMPLEMENTED YET!
An interface to calculate the "SeparationGain" for different separation criteria used in various trai...
MsgLogger & Log() const
Definition: PDEFoam.h:238
TString GetWeightFileName() const
retrieve weight file name
void TrainSeparatedClassification(void)
Creation of 2 separated foams: one for signal events, one for background events.
void WriteFoamsToFile() const
Write PDEFoams to file.
This is a concrete implementation of PDEFoam.
Implementation of the GiniIndex With Laplace correction as separation criterion.
UInt_t GetNVariables() const
Definition: MethodBase.h:336
const Bool_t kFALSE
Definition: RtypesCore.h:88
Float_t GetValue(UInt_t ivar) const
return value of i&#39;th variable
Definition: Event.cxx:237
This PDEFoam variant acts like a decision tree and stores in every cell the discriminant.
Bool_t IgnoreEventsWithNegWeightsInTraining() const
Definition: MethodBase.h:675
#define ClassImp(name)
Definition: Rtypes.h:359
void ReadWeightsFromXML(void *wghtnode)
read PDEFoam variables from xml weight file
double Double_t
Definition: RtypesCore.h:55
void TrainMultiClassification()
Create one unified foam (see TrainUnifiedClassification()) for each class, where the cells of foam i ...
void SetnCells(Long_t nCells)
Definition: PDEFoam.h:187
std::vector< Float_t > * fMulticlassReturnVal
Definition: MethodBase.h:587
Long64_t GetNEvtSigTrain()
return number of signal training events in dataset
Definition: DataSet.cxx:443
int type
Definition: TGX11.cxx:120
std::vector< PDEFoam * > fFoam
PDEFoamCell * GetDau0() const
Definition: PDEFoamCell.h:94
void * GetNextChild(void *prevchild, const char *childname=0)
XML helpers.
Definition: Tools.cxx:1174
you should not use this method at all Int_t Int_t Double_t Double_t Double_t e
Definition: TRolke.cxx:630
void AddPreDefVal(const T &)
Definition: Configurable.h:168
void ExitFromTraining()
Definition: MethodBase.h:453
PDEFoamKernelBase * CreatePDEFoamKernel()
create a pdefoam kernel estimator, depending on the current value of fKernel
This PDEFoam kernel estimates a cell value for a given event by weighting with cell values of the nea...
void TrainMonoTargetRegression(void)
Training one (mono target regression) foam, whose cells contain the average 0th target.
void Initialize()
Definition: PDEFoam.h:171
const TString & Color(const TString &)
human readable color strings
Definition: Tools.cxx:840
virtual TObject * Clone(const char *newname="") const
Make a clone of an object using the Streamer facility.
Definition: TObject.cxx:144
const std::vector< Float_t > & GetMulticlassValues()
Get the multiclass MVA response for the PDEFoam classifier.
#define REGISTER_METHOD(CLASS)
for example
const Ranking * CreateRanking()
Compute ranking of input variables from the number of cuts made in each PDEFoam dimension.
Ranking * fRanking
Definition: MethodBase.h:576
void DeleteFoams()
Deletes all trained foams.
std::vector< Float_t > & GetValues()
Definition: Event.h:89
Int_t GetStat() const
Definition: PDEFoamCell.h:91
Definition: file.py:1
virtual void AddRank(const Rank &rank)
Add a new rank take ownership of it.
Definition: Ranking.cxx:86
virtual void DeclareCompatibilityOptions()
options that are used ONLY for the READER to ensure backward compatibility they are hence without any...
Definition: MethodBase.cxx:601
ETargetSelection fTargetSelection
Double_t GetOriginalWeight() const
Definition: Event.h:79
void SetnBin(Int_t nBin)
Definition: PDEFoam.h:189
void SetEvPerBin(Int_t EvPerBin)
Definition: PDEFoam.h:190
This is an abstract class, which provides an interface for a PDEFoam density estimator.
EDTSeparation fDTSeparation
std::vector< Float_t > * fRegressionReturnVal
Definition: MethodBase.h:586
virtual const std::vector< Float_t > & GetRegressionValues()
Return regression values for both multi- and mono-target regression.
This class is a trivial PDEFoam kernel estimator.
Double_t Sqrt(Double_t x)
Definition: TMath.h:690
void ProcessOptions()
process user options
double exp(double)
This is a concrete implementation of PDEFoam.
UInt_t GetNTargets() const
access the number of targets through the datasetinfo
Definition: DataSet.cxx:224
const Bool_t kTRUE
Definition: RtypesCore.h:87
This is a concrete implementation of PDEFoam.
void DeclareCompatibilityOptions()
options that are used ONLY for the READER to ensure backward compatibility
virtual void SetAnalysisType(Types::EAnalysisType type)
Definition: MethodBase.h:427
This PDEFoam variant stores in every cell the discriminant.
void ReadFoamsFromFile()
read foams from file
void SetSignalReferenceCut(Double_t cut)
Definition: MethodBase.h:355
virtual void Close(Option_t *option="")
Close a file.
Definition: TFile.cxx:917
const char * Data() const
Definition: TString.h:364