Logo ROOT   6.08/07
Reference Guide
MethodBase.cxx
Go to the documentation of this file.
1 // @(#)root/tmva $Id$
2 // Author: Andreas Hoecker, Peter Speckmayer, Joerg Stelzer, Helge Voss, Kai Voss, Eckhard von Toerne, Jan Therhaag
3 
4 /**********************************************************************************
5  * Project: TMVA - a Root-integrated toolkit for multivariate data analysis *
6  * Package: TMVA *
7  * Class : MethodBase *
8  * Web : http://tmva.sourceforge.net *
9  * *
10  * Description: *
11  * Implementation (see header for description) *
12  * *
13  * Authors (alphabetical): *
14  * Andreas Hoecker <Andreas.Hocker@cern.ch> - CERN, Switzerland *
15  * Joerg Stelzer <Joerg.Stelzer@cern.ch> - CERN, Switzerland *
16  * Peter Speckmayer <Peter.Speckmayer@cern.ch> - CERN, Switzerland *
17  * Helge Voss <Helge.Voss@cern.ch> - MPI-K Heidelberg, Germany *
18  * Kai Voss <Kai.Voss@cern.ch> - U. of Victoria, Canada *
19  * Jan Therhaag <Jan.Therhaag@cern.ch> - U of Bonn, Germany *
20  * Eckhard v. Toerne <evt@uni-bonn.de> - U of Bonn, Germany *
21  * *
22  * Copyright (c) 2005-2011: *
23  * CERN, Switzerland *
24  * U. of Victoria, Canada *
25  * MPI-K Heidelberg, Germany *
26  * U. of Bonn, Germany *
27  * *
28  * Redistribution and use in source and binary forms, with or without *
29  * modification, are permitted according to the terms listed in LICENSE *
30  * (http://tmva.sourceforge.net/LICENSE) *
31  * *
32  **********************************************************************************/
33 
34 ////////////////////////////////////////////////////////////////////////////////
35 
36 /* Begin_Html
37  Virtual base Class for all MVA method
38 
39  MethodBase hosts several specific evaluation methods.
40 
41  The kind of MVA that provides optimal performance in an analysis strongly
42  depends on the particular application. The evaluation factory provides a
43  number of numerical benchmark results to directly assess the performance
44  of the MVA training on the independent test sample. These are:
45  <ul>
46  <li> The <i>signal efficiency</i> at three representative background efficiencies
47  (which is 1 &minus; rejection).</li>
48  <li> The <i>significance</I> of an MVA estimator, defined by the difference
49  between the MVA mean values for signal and background, divided by the
50  quadratic sum of their root mean squares.</li>
51  <li> The <i>separation</i> of an MVA <i>x</i>, defined by the integral
52  &frac12;&int;(S(x) &minus; B(x))<sup>2</sup>/(S(x) + B(x))dx, where
53  S(x) and B(x) are the signal and background distributions, respectively.
54  The separation is zero for identical signal and background MVA shapes,
55  and it is one for disjunctive shapes.
56  <li> <a name="mu_transform">
57  The average, &int;x &mu;(S(x))dx, of the signal &mu;-transform.
58  The &mu;-transform of an MVA denotes the transformation that yields
59  a uniform background distribution. In this way, the signal distributions
60  S(x) can be directly compared among the various MVAs. The stronger S(x)
61  peaks towards one, the better is the discrimination of the MVA. The
62  &mu;-transform is
63  <a href=http://tel.ccsd.cnrs.fr/documents/archives0/00/00/29/91/index_fr.html>documented here</a>.
64  </ul>
65  The MVA standard output also prints the linear correlation coefficients between
66  signal and background, which can be useful to eliminate variables that exhibit too
67  strong correlations.
68 
69  End_Html */
70 //_______________________________________________________________________
71 
72 #include "TMVA/MethodBase.h"
73 
74 #include "TMVA/Config.h"
75 #include "TMVA/Configurable.h"
76 #include "TMVA/DataSetInfo.h"
77 #include "TMVA/DataSet.h"
78 #include "TMVA/Factory.h"
79 #include "TMVA/IMethod.h"
80 #include "TMVA/MsgLogger.h"
81 #include "TMVA/PDF.h"
82 #include "TMVA/Ranking.h"
83 #include "TMVA/Factory.h"
84 #include "TMVA/DataLoader.h"
85 #include "TMVA/Tools.h"
86 #include "TMVA/Results.h"
88 #include "TMVA/ResultsRegression.h"
89 #include "TMVA/ResultsMulticlass.h"
90 #include "TMVA/RootFinder.h"
91 #include "TMVA/Timer.h"
92 #include "TMVA/Tools.h"
93 #include "TMVA/TSpline1.h"
94 #include "TMVA/Types.h"
98 #include "TMVA/VariableInfo.h"
101 #include "TMVA/VariableTransform.h"
102 #include "TMVA/Version.h"
103 
104 #include "TROOT.h"
105 #include "TSystem.h"
106 #include "TObjString.h"
107 #include "TQObject.h"
108 #include "TSpline.h"
109 #include "TMatrix.h"
110 #include "TMath.h"
111 #include "TH1F.h"
112 #include "TH2F.h"
113 #include "TFile.h"
114 #include "TKey.h"
115 #include "TGraph.h"
116 #include "Riostream.h"
117 #include "TXMLEngine.h"
118 
119 #include <iomanip>
120 #include <iostream>
121 #include <fstream>
122 #include <sstream>
123 #include <cstdlib>
124 #include <algorithm>
125 #include <limits>
126 
127 
129 
130 using std::endl;
131 using std::atof;
132 
133 //const Int_t MethodBase_MaxIterations_ = 200;
135 
136 //const Int_t NBIN_HIST_PLOT = 100;
137 const Int_t NBIN_HIST_HIGH = 10000;
138 
139 #ifdef _WIN32
140 /* Disable warning C4355: 'this' : used in base member initializer list */
141 #pragma warning ( disable : 4355 )
142 #endif
143 
144 
145 #include "TGraph.h"
146 #include "TMultiGraph.h"
147 
148 ////////////////////////////////////////////////////////////////////////////////
149 /// standard constructur
151 {
152  fNumGraphs = 0;
153  fIndex = 0;
154 }
155 
156 ////////////////////////////////////////////////////////////////////////////////
157 /// standard destructor
159 {
160  if (fMultiGraph){
161  delete fMultiGraph;
162  fMultiGraph = nullptr;
163  }
164  return;
165 }
166 
167 ////////////////////////////////////////////////////////////////////////////////
168 /// This function gets some title and it creates a TGraph for every title.
169 /// It also sets up the style for every TGraph. All graphs are added to a single TMultiGrah.
170 /// \param[in] graphTtitles vector of titles
171 void TMVA::IPythonInteractive::Init(std::vector<TString>& graphTitles)
172 {
173  if (fNumGraphs!=0){
174  std::cerr << kERROR << "IPythonInteractive::Init: already initialized..." << std::endl;
175  return;
176  }
177  Int_t color = 2;
178  for(auto& title : graphTitles){
179  fGraphs.push_back( new TGraph() );
180  fGraphs.back()->SetTitle(title);
181  fGraphs.back()->SetName(title);
182  fGraphs.back()->SetFillColor(color);
183  fGraphs.back()->SetLineColor(color);
184  fGraphs.back()->SetMarkerColor(color);
185  fMultiGraph->Add(fGraphs.back());
186  color += 2;
187  fNumGraphs += 1;
188  }
189  return;
190 }
191 
192 ////////////////////////////////////////////////////////////////////////////////
193 /// This function sets the point number to 0 for all graphs.
195 {
196  for(Int_t i=0; i<fNumGraphs; i++){
197  fGraphs[i]->Set(0);
198  }
199 }
200 
201 ////////////////////////////////////////////////////////////////////////////////
202 /// This function is used only in 2 TGraph case, and it will add new data points to graphs.
203 /// \param[in] x the x coordinate
204 /// \param[in] y1 the y coordinate for the first TGraph
205 /// \param[in] y2 the y coordinate for the second TGraph
207 {
208  fGraphs[0]->Set(fIndex+1);
209  fGraphs[1]->Set(fIndex+1);
210  fGraphs[0]->SetPoint(fIndex, x, y1);
211  fGraphs[1]->SetPoint(fIndex, x, y2);
212  fIndex++;
213  return;
214 }
215 
216 ////////////////////////////////////////////////////////////////////////////////
217 /// This function can add data points to as many TGraps as we have.
218 /// \param[in] dat vector of data points. The dat[0] contains the x coordinate,
219 /// dat[1] contains the y coordinate for first TGraph, dat[2] for second, ...
220 void TMVA::IPythonInteractive::AddPoint(std::vector<Double_t>& dat)
221 {
222  for(Int_t i=0; i<fNumGraphs;i++){
223  fGraphs[i]->Set(fIndex+1);
224  fGraphs[i]->SetPoint(fIndex, dat[0], dat[i+1]);
225  }
226  fIndex++;
227  return;
228 }
229 
230 
231 ////////////////////////////////////////////////////////////////////////////////
232 /// standard constructur
233 
235  Types::EMVA methodType,
236  const TString& methodTitle,
237  DataSetInfo& dsi,
238  const TString& theOption) :
239  IMethod(),
240  Configurable ( theOption ),
241  fTmpEvent ( 0 ),
242  fRanking ( 0 ),
243  fInputVars ( 0 ),
244  fAnalysisType ( Types::kNoAnalysisType ),
245  fRegressionReturnVal ( 0 ),
246  fMulticlassReturnVal ( 0 ),
247  fDataSetInfo ( dsi ),
248  fSignalReferenceCut ( 0.5 ),
249  fSignalReferenceCutOrientation( 1. ),
250  fVariableTransformType ( Types::kSignal ),
251  fJobName ( jobName ),
252  fMethodName ( methodTitle ),
253  fMethodType ( methodType ),
254  fTestvar ( "" ),
255  fTMVATrainingVersion ( TMVA_VERSION_CODE ),
256  fROOTTrainingVersion ( ROOT_VERSION_CODE ),
257  fConstructedFromWeightFile ( kFALSE ),
258  fBaseDir ( 0 ),
259  fMethodBaseDir ( 0 ),
260  fFile ( 0 ),
261  fSilentFile (kFALSE),
262  fModelPersistence (kTRUE),
263  fWeightFile ( "" ),
264  fEffS ( 0 ),
265  fDefaultPDF ( 0 ),
266  fMVAPdfS ( 0 ),
267  fMVAPdfB ( 0 ),
268  fSplS ( 0 ),
269  fSplB ( 0 ),
270  fSpleffBvsS ( 0 ),
271  fSplTrainS ( 0 ),
272  fSplTrainB ( 0 ),
273  fSplTrainEffBvsS ( 0 ),
274  fVarTransformString ( "None" ),
275  fTransformationPointer ( 0 ),
276  fTransformation ( dsi, methodTitle ),
277  fVerbose ( kFALSE ),
278  fVerbosityLevelString ( "Default" ),
279  fHelp ( kFALSE ),
280  fHasMVAPdfs ( kFALSE ),
281  fIgnoreNegWeightsInTraining( kFALSE ),
282  fSignalClass ( 0 ),
283  fBackgroundClass ( 0 ),
284  fSplRefS ( 0 ),
285  fSplRefB ( 0 ),
286  fSplTrainRefS ( 0 ),
287  fSplTrainRefB ( 0 ),
288  fSetupCompleted (kFALSE)
289 {
290  SetTestvarName();
292 
293 // // default extension for weight files
294 }
295 
296 ////////////////////////////////////////////////////////////////////////////////
297 /// constructor used for Testing + Application of the MVA,
298 /// only (no training), using given WeightFiles
299 
301  DataSetInfo& dsi,
302  const TString& weightFile ) :
303  IMethod(),
304  Configurable(""),
305  fTmpEvent ( 0 ),
306  fRanking ( 0 ),
307  fInputVars ( 0 ),
308  fAnalysisType ( Types::kNoAnalysisType ),
309  fRegressionReturnVal ( 0 ),
310  fMulticlassReturnVal ( 0 ),
311  fDataSetInfo ( dsi ),
312  fSignalReferenceCut ( 0.5 ),
313  fVariableTransformType ( Types::kSignal ),
314  fJobName ( "" ),
315  fMethodName ( "MethodBase" ),
316  fMethodType ( methodType ),
317  fTestvar ( "" ),
318  fTMVATrainingVersion ( 0 ),
319  fROOTTrainingVersion ( 0 ),
321  fBaseDir ( 0 ),
322  fMethodBaseDir ( 0 ),
323  fFile ( 0 ),
326  fWeightFile ( weightFile ),
327  fEffS ( 0 ),
328  fDefaultPDF ( 0 ),
329  fMVAPdfS ( 0 ),
330  fMVAPdfB ( 0 ),
331  fSplS ( 0 ),
332  fSplB ( 0 ),
333  fSpleffBvsS ( 0 ),
334  fSplTrainS ( 0 ),
335  fSplTrainB ( 0 ),
336  fSplTrainEffBvsS ( 0 ),
337  fVarTransformString ( "None" ),
339  fTransformation ( dsi, "" ),
340  fVerbose ( kFALSE ),
341  fVerbosityLevelString ( "Default" ),
342  fHelp ( kFALSE ),
343  fHasMVAPdfs ( kFALSE ),
345  fSignalClass ( 0 ),
346  fBackgroundClass ( 0 ),
347  fSplRefS ( 0 ),
348  fSplRefB ( 0 ),
349  fSplTrainRefS ( 0 ),
350  fSplTrainRefB ( 0 ),
352 {
354 // // constructor used for Testing + Application of the MVA,
355 // // only (no training), using given WeightFiles
356 }
357 
358 ////////////////////////////////////////////////////////////////////////////////
359 /// destructor
360 
362 {
363  // destructor
364  if (!fSetupCompleted) Log() << kFATAL <<Form("Dataset[%s] : ",DataInfo().GetName())<< "Calling destructor of method which got never setup" << Endl;
365 
366  // destructor
367  if (fInputVars != 0) { fInputVars->clear(); delete fInputVars; }
368  if (fRanking != 0) delete fRanking;
369 
370  // PDFs
371  if (fDefaultPDF!= 0) { delete fDefaultPDF; fDefaultPDF = 0; }
372  if (fMVAPdfS != 0) { delete fMVAPdfS; fMVAPdfS = 0; }
373  if (fMVAPdfB != 0) { delete fMVAPdfB; fMVAPdfB = 0; }
374 
375  // Splines
376  if (fSplS) { delete fSplS; fSplS = 0; }
377  if (fSplB) { delete fSplB; fSplB = 0; }
378  if (fSpleffBvsS) { delete fSpleffBvsS; fSpleffBvsS = 0; }
379  if (fSplRefS) { delete fSplRefS; fSplRefS = 0; }
380  if (fSplRefB) { delete fSplRefB; fSplRefB = 0; }
381  if (fSplTrainRefS) { delete fSplTrainRefS; fSplTrainRefS = 0; }
382  if (fSplTrainRefB) { delete fSplTrainRefB; fSplTrainRefB = 0; }
384 
385  for (Int_t i = 0; i < 2; i++ ) {
386  if (fEventCollections.at(i)) {
387  for (std::vector<Event*>::const_iterator it = fEventCollections.at(i)->begin();
388  it != fEventCollections.at(i)->end(); it++) {
389  delete (*it);
390  }
391  delete fEventCollections.at(i);
392  fEventCollections.at(i) = 0;
393  }
394  }
395 
398 }
399 
400 ////////////////////////////////////////////////////////////////////////////////
401 /// setup of methods
402 
404 {
405  // setup of methods
406 
407  if (fSetupCompleted) Log() << kFATAL <<Form("Dataset[%s] : ",DataInfo().GetName())<< "Calling SetupMethod for the second time" << Endl;
408  InitBase();
410  Init();
411  DeclareOptions();
413 }
414 
415 ////////////////////////////////////////////////////////////////////////////////
416 /// process all options
417 /// the "CheckForUnusedOptions" is done in an independent call, since it may be overridden by derived class
418 /// (sometimes, eg, fitters are used which can only be implemented during training phase)
419 
421 {
423  ProcessOptions();
424 }
425 
426 ////////////////////////////////////////////////////////////////////////////////
427 /// check may be overridden by derived class
428 /// (sometimes, eg, fitters are used which can only be implemented during training phase)
429 
431 {
433 }
434 
435 ////////////////////////////////////////////////////////////////////////////////
436 /// default initialization called by all constructors
437 
439 {
440  SetConfigDescription( "Configuration options for classifier architecture and tuning" );
441 
445 
446  fSplTrainS = 0;
447  fSplTrainB = 0;
448  fSplTrainEffBvsS = 0;
449  fMeanS = -1;
450  fMeanB = -1;
451  fRmsS = -1;
452  fRmsB = -1;
453  fXmin = DBL_MAX;
454  fXmax = -DBL_MAX;
456  fSplRefS = 0;
457  fSplRefB = 0;
458 
459  fTrainTime = -1.;
460  fTestTime = -1.;
461 
462  fRanking = 0;
463 
464  // temporary until the move to DataSet is complete
465  fInputVars = new std::vector<TString>;
466  for (UInt_t ivar=0; ivar<GetNvar(); ivar++) {
467  fInputVars->push_back(DataInfo().GetVariableInfo(ivar).GetLabel());
468  }
471 
472  fEventCollections.resize( 2 );
473  fEventCollections.at(0) = 0;
474  fEventCollections.at(1) = 0;
475 
476  // retrieve signal and background class index
477  if (DataInfo().GetClassInfo("Signal") != 0) {
478  fSignalClass = DataInfo().GetClassInfo("Signal")->GetNumber();
479  }
480  if (DataInfo().GetClassInfo("Background") != 0) {
481  fBackgroundClass = DataInfo().GetClassInfo("Background")->GetNumber();
482  }
483 
484  SetConfigDescription( "Configuration options for MVA method" );
485  SetConfigName( TString("Method") + GetMethodTypeName() );
486 }
487 
488 ////////////////////////////////////////////////////////////////////////////////
489 /// define the options (their key words) that can be set in the option string
490 /// here the options valid for ALL MVA methods are declared.
491 /// know options: VariableTransform=None,Decorrelated,PCA to use transformed variables
492 /// instead of the original ones
493 /// VariableTransformType=Signal,Background which decorrelation matrix to use
494 /// in the method. Only the Likelihood
495 /// Method can make proper use of independent
496 /// transformations of signal and background
497 /// fNbinsMVAPdf = 50 Number of bins used to create a PDF of MVA
498 /// fNsmoothMVAPdf = 2 Number of times a histogram is smoothed before creating the PDF
499 /// fHasMVAPdfs create PDFs for the MVA outputs
500 /// V for Verbose output (!V) for non verbos
501 /// H for Help message
502 
504 {
505  DeclareOptionRef( fVerbose, "V", "Verbose output (short form of \"VerbosityLevel\" below - overrides the latter one)" );
506 
507  DeclareOptionRef( fVerbosityLevelString="Default", "VerbosityLevel", "Verbosity level" );
508  AddPreDefVal( TString("Default") ); // uses default defined in MsgLogger header
509  AddPreDefVal( TString("Debug") );
510  AddPreDefVal( TString("Verbose") );
511  AddPreDefVal( TString("Info") );
512  AddPreDefVal( TString("Warning") );
513  AddPreDefVal( TString("Error") );
514  AddPreDefVal( TString("Fatal") );
515 
516  // If True (default): write all training results (weights) as text files only;
517  // if False: write also in ROOT format (not available for all methods - will abort if not
518  fTxtWeightsOnly = kTRUE; // OBSOLETE !!!
519  fNormalise = kFALSE; // OBSOLETE !!!
520 
521  DeclareOptionRef( fVarTransformString, "VarTransform", "List of variable transformations performed before training, e.g., \"D_Background,P_Signal,G,N_AllClasses\" for: \"Decorrelation, PCA-transformation, Gaussianisation, Normalisation, each for the given class of events ('AllClasses' denotes all events of all classes, if no class indication is given, 'All' is assumed)\"" );
522 
523  DeclareOptionRef( fHelp, "H", "Print method-specific help message" );
524 
525  DeclareOptionRef( fHasMVAPdfs, "CreateMVAPdfs", "Create PDFs for classifier outputs (signal and background)" );
526 
527  DeclareOptionRef( fIgnoreNegWeightsInTraining, "IgnoreNegWeightsInTraining",
528  "Events with negative weights are ignored in the training (but are included for testing and performance evaluation)" );
529 }
530 
531 ////////////////////////////////////////////////////////////////////////////////
532 /// the option string is decoded, for availabel options see "DeclareOptions"
533 
535 {
536  if (HasMVAPdfs()) {
537  // setting the default bin num... maybe should be static ? ==> Please no static (JS)
538  // You can't use the logger in the constructor!!! Log() << kINFO << "Create PDFs" << Endl;
539  // reading every PDF's definition and passing the option string to the next one to be read and marked
540  fDefaultPDF = new PDF( TString(GetName())+"_PDF", GetOptions(), "MVAPdf" );
544  fMVAPdfB = new PDF( TString(GetName())+"_PDFBkg", fDefaultPDF->GetOptions(), "MVAPdfBkg", fDefaultPDF );
548  fMVAPdfS = new PDF( TString(GetName())+"_PDFSig", fMVAPdfB->GetOptions(), "MVAPdfSig", fDefaultPDF );
552 
553  // the final marked option string is written back to the original methodbase
555  }
556 
558  DataInfo(),
560  Log() );
561 
562  if (!HasMVAPdfs()) {
563  if (fDefaultPDF!= 0) { delete fDefaultPDF; fDefaultPDF = 0; }
564  if (fMVAPdfS != 0) { delete fMVAPdfS; fMVAPdfS = 0; }
565  if (fMVAPdfB != 0) { delete fMVAPdfB; fMVAPdfB = 0; }
566  }
567 
568  if (fVerbose) { // overwrites other settings
569  fVerbosityLevelString = TString("Verbose");
570  Log().SetMinType( kVERBOSE );
571  }
572  else if (fVerbosityLevelString == "Debug" ) Log().SetMinType( kDEBUG );
573  else if (fVerbosityLevelString == "Verbose" ) Log().SetMinType( kVERBOSE );
574  else if (fVerbosityLevelString == "Info" ) Log().SetMinType( kINFO );
575  else if (fVerbosityLevelString == "Warning" ) Log().SetMinType( kWARNING );
576  else if (fVerbosityLevelString == "Error" ) Log().SetMinType( kERROR );
577  else if (fVerbosityLevelString == "Fatal" ) Log().SetMinType( kFATAL );
578  else if (fVerbosityLevelString != "Default" ) {
579  Log() << kFATAL << "<ProcessOptions> Verbosity level type '"
580  << fVerbosityLevelString << "' unknown." << Endl;
581  }
583 }
584 
585 ////////////////////////////////////////////////////////////////////////////////
586 /// options that are used ONLY for the READER to ensure backward compatibility
587 /// they are hence without any effect (the reader is only reading the training
588 /// options that HAD been used at the training of the .xml weightfile at hand
589 
591 {
592  DeclareOptionRef( fNormalise=kFALSE, "Normalise", "Normalise input variables" ); // don't change the default !!!
593  DeclareOptionRef( fUseDecorr=kFALSE, "D", "Use-decorrelated-variables flag" );
594  DeclareOptionRef( fVariableTransformTypeString="Signal", "VarTransformType",
595  "Use signal or background events to derive for variable transformation (the transformation is applied on both types of, course)" );
596  AddPreDefVal( TString("Signal") );
597  AddPreDefVal( TString("Background") );
598  DeclareOptionRef( fTxtWeightsOnly=kTRUE, "TxtWeightFilesOnly", "If True: write all training results (weights) as text files (False: some are written in ROOT format)" );
599  // Why on earth ?? was this here? Was the verbosity level option meant to 'disapear? Not a good idea i think..
600  // DeclareOptionRef( fVerbosityLevelString="Default", "VerboseLevel", "Verbosity level" );
601  // AddPreDefVal( TString("Default") ); // uses default defined in MsgLogger header
602  // AddPreDefVal( TString("Debug") );
603  // AddPreDefVal( TString("Verbose") );
604  // AddPreDefVal( TString("Info") );
605  // AddPreDefVal( TString("Warning") );
606  // AddPreDefVal( TString("Error") );
607  // AddPreDefVal( TString("Fatal") );
608  DeclareOptionRef( fNbinsMVAPdf = 60, "NbinsMVAPdf", "Number of bins used for the PDFs of classifier outputs" );
609  DeclareOptionRef( fNsmoothMVAPdf = 2, "NsmoothMVAPdf", "Number of smoothing iterations for classifier PDFs" );
610 }
611 
612 
613 ////////////////////////////////////////////////////////////////////////////////
614 /// call the Optimzier with the set of paremeters and ranges that
615 /// are meant to be tuned.
616 
617 std::map<TString,Double_t> TMVA::MethodBase::OptimizeTuningParameters(TString /* fomType */ , TString /* fitType */)
618 {
619  // this is just a dummy... needs to be implemented for each method
620  // individually (as long as we don't have it automatized via the
621  // configuraion string
622 
623  Log() << kWARNING <<Form("Dataset[%s] : ",DataInfo().GetName())<< "Parameter optimization is not yet implemented for method "
624  << GetName() << Endl;
625  Log() << kWARNING <<Form("Dataset[%s] : ",DataInfo().GetName())<< "Currently we need to set hardcoded which parameter is tuned in which ranges"<<Endl;
626 
627  std::map<TString,Double_t> tunedParameters;
628  tunedParameters.size(); // just to get rid of "unused" warning
629  return tunedParameters;
630 
631 }
632 
633 ////////////////////////////////////////////////////////////////////////////////
634 /// set the tuning parameters accoding to the argument
635 /// This is just a dummy .. have a look at the MethodBDT how you could
636 /// perhaps implment the same thing for the other Classifiers..
637 
638 void TMVA::MethodBase::SetTuneParameters(std::map<TString,Double_t> /* tuneParameters */)
639 {
640 }
641 
642 ////////////////////////////////////////////////////////////////////////////////
643 
645 {
647  Event::SetIsTraining(kTRUE); // used to set negative event weights to zero if chosen to do so
648 
649  // train the MVA method
650  if (Help()) PrintHelpMessage();
651 
652  // all histograms should be created in the method's subdirectory
653  if(!IsSilentFile()) BaseDir()->cd();
654 
655  // once calculate all the transformation (e.g. the sequence of Decorr:Gauss:Decorr)
656  // needed for this classifier
658 
659  // call training of derived MVA
660  Log() << kDEBUG //<<Form("\tDataset[%s] : ",DataInfo().GetName())
661  << "Begin training" << Endl;
663  Timer traintimer( nEvents, GetName(), kTRUE );
664  Train();
665  Log() << kDEBUG //<<Form("Dataset[%s] : ",DataInfo().GetName()
666  << "\tEnd of training " << Endl;
667  SetTrainTime(traintimer.ElapsedSeconds());
668  Log() << kINFO //<<Form("Dataset[%s] : ",DataInfo().GetName())
669  << "Elapsed time for training with " << nEvents << " events: "
670  << traintimer.GetElapsedTime() << " " << Endl;
671 
672  Log() << kDEBUG //<<Form("Dataset[%s] : ",DataInfo().GetName())
673  << "\tCreate MVA output for ";
674 
675  // create PDFs for the signal and background MVA distributions (if required)
676  if (DoMulticlass()) {
677  Log() <<Form("[%s] : ",DataInfo().GetName())<< "Multiclass classification on training sample" << Endl;
679  }
680  else if (!DoRegression()) {
681 
682  Log() <<Form("[%s] : ",DataInfo().GetName())<< "classification on training sample" << Endl;
684  if (HasMVAPdfs()) {
685  CreateMVAPdfs();
687  }
688 
689  } else {
690 
691  Log() <<Form("Dataset[%s] : ",DataInfo().GetName())<< "regression on training sample" << Endl;
693 
694  if (HasMVAPdfs() ) {
695  Log() <<Form("Dataset[%s] : ",DataInfo().GetName())<< "Create PDFs" << Endl;
696  CreateMVAPdfs();
697  }
698  }
699 
700  // write the current MVA state into stream
701  // produced are one text file and one ROOT file
703 
704  // produce standalone make class (presently only supported for classification)
705  if ((!DoRegression()) && (fModelPersistence)) MakeClass();
706 
707  // write additional monitoring histograms to main target file (not the weight file)
708  // again, make sure the histograms go into the method's subdirectory
709  if(!IsSilentFile())
710  {
711  BaseDir()->cd();
713  }
714 }
715 
716 ////////////////////////////////////////////////////////////////////////////////
717 
719 {
720  if (!DoRegression()) Log() << kFATAL <<Form("Dataset[%s] : ",DataInfo().GetName())<< "Trying to use GetRegressionDeviation() with a classification job" << Endl;
721  Log() << kINFO <<Form("Dataset[%s] : ",DataInfo().GetName())<< "Create results for " << (type==Types::kTraining?"training":"testing") << Endl;
723  bool truncate = false;
724  TH1F* h1 = regRes->QuadraticDeviation( tgtNum , truncate, 1.);
725  stddev = sqrt(h1->GetMean());
726  truncate = true;
727  Double_t yq[1], xq[]={0.9};
728  h1->GetQuantiles(1,yq,xq);
729  TH1F* h2 = regRes->QuadraticDeviation( tgtNum , truncate, yq[0]);
730  stddev90Percent = sqrt(h2->GetMean());
731  delete h1;
732  delete h2;
733 }
734 
735 ////////////////////////////////////////////////////////////////////////////////
736 /// prepare tree branch with the method's discriminating variable
737 
739 {
740  Data()->SetCurrentType(type);
741 
742  Log() << kINFO <<Form("Dataset[%s] : ",DataInfo().GetName())<< "Create results for " << (type==Types::kTraining?"training":"testing") << Endl;
743 
745 
747 
748  // use timer
749  Timer timer( nEvents, GetName(), kTRUE );
750  Log() << kINFO <<Form("Dataset[%s] : ",DataInfo().GetName()) << "Evaluation of " << GetMethodName() << " on "
751  << (type==Types::kTraining?"training":"testing") << " sample" << Endl;
752 
753  regRes->Resize( nEvents );
754  for (Int_t ievt=0; ievt<nEvents; ievt++) {
755  Data()->SetCurrentEvent(ievt);
756  std::vector< Float_t > vals = GetRegressionValues();
757  regRes->SetValue( vals, ievt );
758  timer.DrawProgressBar( ievt );
759  }
760 
761  Log() << kINFO <<Form("Dataset[%s] : ",DataInfo().GetName())
762  << "Elapsed time for evaluation of " << nEvents << " events: "
763  << timer.GetElapsedTime() << " " << Endl;
764 
765  // store time used for testing
766  if (type==Types::kTesting)
767  SetTestTime(timer.ElapsedSeconds());
768 
769  TString histNamePrefix(GetTestvarName());
770  histNamePrefix += (type==Types::kTraining?"train":"test");
771  regRes->CreateDeviationHistograms( histNamePrefix );
772 }
773 
774 ////////////////////////////////////////////////////////////////////////////////
775 /// prepare tree branch with the method's discriminating variable
776 
778 {
779  Data()->SetCurrentType(type);
780 
781  Log() << kINFO <<Form("Dataset[%s] : ",DataInfo().GetName())<< "Create results for " << (type==Types::kTraining?"training":"testing") << Endl;
782 
783  ResultsMulticlass* resMulticlass = dynamic_cast<ResultsMulticlass*>(Data()->GetResults(GetMethodName(), type, Types::kMulticlass));
784  if (!resMulticlass) Log() << kFATAL<<Form("Dataset[%s] : ",DataInfo().GetName())<< "unable to create pointer in AddMulticlassOutput, exiting."<<Endl;
785 
787 
788  // use timer
789  Timer timer( nEvents, GetName(), kTRUE );
790 
791  Log() << kINFO <<Form("Dataset[%s] : ",DataInfo().GetName())<< "Multiclass evaluation of " << GetMethodName() << " on "
792  << (type==Types::kTraining?"training":"testing") << " sample" << Endl;
793 
794  resMulticlass->Resize( nEvents );
795  for (Int_t ievt=0; ievt<nEvents; ievt++) {
796  Data()->SetCurrentEvent(ievt);
797  std::vector< Float_t > vals = GetMulticlassValues();
798  resMulticlass->SetValue( vals, ievt );
799  timer.DrawProgressBar( ievt );
800  }
801 
802  Log() << kINFO <<Form("Dataset[%s] : ",DataInfo().GetName())
803  << "Elapsed time for evaluation of " << nEvents << " events: "
804  << timer.GetElapsedTime() << " " << Endl;
805 
806  // store time used for testing
807  if (type==Types::kTesting)
808  SetTestTime(timer.ElapsedSeconds());
809 
810  TString histNamePrefix(GetTestvarName());
811  histNamePrefix += (type==Types::kTraining?"_Train":"_Test");
812  resMulticlass->CreateMulticlassHistos( histNamePrefix, fNbinsMVAoutput, fNbinsH );
813 }
814 
815 
816 
817 ////////////////////////////////////////////////////////////////////////////////
818 
819 void TMVA::MethodBase::NoErrorCalc(Double_t* const err, Double_t* const errUpper) {
820  if (err) *err=-1;
821  if (errUpper) *errUpper=-1;
822 }
823 
824 ////////////////////////////////////////////////////////////////////////////////
825 
826 Double_t TMVA::MethodBase::GetMvaValue( const Event* const ev, Double_t* err, Double_t* errUpper ) {
827  fTmpEvent = ev;
828  Double_t val = GetMvaValue(err, errUpper);
829  fTmpEvent = 0;
830  return val;
831 }
832 
833 ////////////////////////////////////////////////////////////////////////////////
834 /// uses a pre-set cut on the MVA output (SetSignalReferenceCut and SetSignalReferenceCutOrientation)
835 /// for a quick determination if an event would be selected as signal or background
836 
839 }
840 ////////////////////////////////////////////////////////////////////////////////
841 /// uses a pre-set cut on the MVA output (SetSignalReferenceCut and SetSignalReferenceCutOrientation)
842 /// for a quick determination if an event with this mva output value would tbe selected as signal or background
843 
846 }
847 
848 ////////////////////////////////////////////////////////////////////////////////
849 /// prepare tree branch with the method's discriminating variable
850 
852 {
853  Data()->SetCurrentType(type);
854 
855  ResultsClassification* clRes =
857 
859  clRes->Resize( nEvents );
860 
861  // use timer
862  Timer timer( nEvents, GetName(), kTRUE );
863  std::vector<Double_t> mvaValues = GetMvaValues(0, nEvents, true);
864 
865  // store time used for testing
866  if (type==Types::kTesting)
867  SetTestTime(timer.ElapsedSeconds());
868 
869  // load mva values to results object
870  for (Int_t ievt=0; ievt<nEvents; ievt++) {
871  clRes->SetValue( mvaValues[ievt], ievt );
872  }
873 }
874 
875 ////////////////////////////////////////////////////////////////////////////////
876 /// get all the MVA values for the events of the current Data type
877 std::vector<Double_t> TMVA::MethodBase::GetMvaValues(Long64_t firstEvt, Long64_t lastEvt, Bool_t logProgress)
878 {
879 
881  if (firstEvt > lastEvt || lastEvt > nEvents) lastEvt = nEvents;
882  if (firstEvt < 0) firstEvt = 0;
883  std::vector<Double_t> values(lastEvt-firstEvt);
884  // log in case of looping on all the events
885  nEvents = values.size();
886 
887  // use timer
888  Timer timer( nEvents, GetName(), kTRUE );
889 
890  if (logProgress)
891  Log() << kHEADER<<Form("[%s] : ",DataInfo().GetName())<< "Evaluation of " << GetMethodName() << " on "
892  << (Data()->GetCurrentType()==Types::kTraining?"training":"testing") << " sample (" << nEvents << " events)" << Endl;
893 
894 
895  for (Int_t ievt=firstEvt; ievt<lastEvt; ievt++) {
896  Data()->SetCurrentEvent(ievt);
897  values[ievt] = GetMvaValue();
898 
899  // print progress
900  if (logProgress) {
901  Int_t modulo = Int_t(nEvents/100);
902  if (modulo <= 0 ) modulo = 1;
903  if (ievt%modulo == 0) timer.DrawProgressBar( ievt );
904  }
905  }
906  if (logProgress) {
907  Log() << kINFO //<<Form("Dataset[%s] : ",DataInfo().GetName())
908  << "Elapsed time for evaluation of " << nEvents << " events: "
909  << timer.GetElapsedTime() << " " << Endl;
910  }
911 
912  return values;
913 }
914 
915 ////////////////////////////////////////////////////////////////////////////////
916 /// prepare tree branch with the method's discriminating variable
917 
919 {
920  Data()->SetCurrentType(type);
921 
922  ResultsClassification* mvaProb =
924 
926 
927  // use timer
928  Timer timer( nEvents, GetName(), kTRUE );
929 
930  Log() << kINFO <<Form("Dataset[%s] : ",DataInfo().GetName()) << "Evaluation of " << GetMethodName() << " on "
931  << (type==Types::kTraining?"training":"testing") << " sample" << Endl;
932 
933  mvaProb->Resize( nEvents );
934  for (Int_t ievt=0; ievt<nEvents; ievt++) {
935 
936  Data()->SetCurrentEvent(ievt);
937  Float_t proba = ((Float_t)GetProba( GetMvaValue(), 0.5 ));
938  if (proba < 0) break;
939  mvaProb->SetValue( proba, ievt );
940 
941  // print progress
942  Int_t modulo = Int_t(nEvents/100);
943  if (modulo <= 0 ) modulo = 1;
944  if (ievt%modulo == 0) timer.DrawProgressBar( ievt );
945  }
946 
947  Log() << kDEBUG <<Form("Dataset[%s] : ",DataInfo().GetName())
948  << "Elapsed time for evaluation of " << nEvents << " events: "
949  << timer.GetElapsedTime() << " " << Endl;
950 }
951 
952 ////////////////////////////////////////////////////////////////////////////////
953 /// calculate <sum-of-deviation-squared> of regression output versus "true" value from test sample
954 ///
955 /// bias = average deviation
956 /// dev = average absolute deviation
957 /// rms = rms of deviation
958 ///
959 
961  Double_t& dev, Double_t& devT,
962  Double_t& rms, Double_t& rmsT,
963  Double_t& mInf, Double_t& mInfT,
964  Double_t& corr,
966 {
967  Types::ETreeType savedType = Data()->GetCurrentType();
968  Data()->SetCurrentType(type);
969 
970  bias = 0; biasT = 0; dev = 0; devT = 0; rms = 0; rmsT = 0;
971  Double_t sumw = 0;
972  Double_t m1 = 0, m2 = 0, s1 = 0, s2 = 0, s12 = 0; // for correlation
973  const Int_t nevt = GetNEvents();
974  Float_t* rV = new Float_t[nevt];
975  Float_t* tV = new Float_t[nevt];
976  Float_t* wV = new Float_t[nevt];
977  Float_t xmin = 1e30, xmax = -1e30;
978  for (Long64_t ievt=0; ievt<nevt; ievt++) {
979 
980  const Event* ev = Data()->GetEvent(ievt); // NOTE: need untransformed event here !
981  Float_t t = ev->GetTarget(0);
982  Float_t w = ev->GetWeight();
984  Float_t d = (r-t);
985 
986  // find min/max
987  xmin = TMath::Min(xmin, TMath::Min(t, r));
988  xmax = TMath::Max(xmax, TMath::Max(t, r));
989 
990  // store for truncated RMS computation
991  rV[ievt] = r;
992  tV[ievt] = t;
993  wV[ievt] = w;
994 
995  // compute deviation-squared
996  sumw += w;
997  bias += w * d;
998  dev += w * TMath::Abs(d);
999  rms += w * d * d;
1000 
1001  // compute correlation between target and regression estimate
1002  m1 += t*w; s1 += t*t*w;
1003  m2 += r*w; s2 += r*r*w;
1004  s12 += t*r;
1005  }
1006 
1007  // standard quantities
1008  bias /= sumw;
1009  dev /= sumw;
1010  rms /= sumw;
1011  rms = TMath::Sqrt(rms - bias*bias);
1012 
1013  // correlation
1014  m1 /= sumw;
1015  m2 /= sumw;
1016  corr = s12/sumw - m1*m2;
1017  corr /= TMath::Sqrt( (s1/sumw - m1*m1) * (s2/sumw - m2*m2) );
1018 
1019  // create histogram required for computeation of mutual information
1020  TH2F* hist = new TH2F( "hist", "hist", 150, xmin, xmax, 100, xmin, xmax );
1021  TH2F* histT = new TH2F( "histT", "histT", 150, xmin, xmax, 100, xmin, xmax );
1022 
1023  // compute truncated RMS and fill histogram
1024  Double_t devMax = bias + 2*rms;
1025  Double_t devMin = bias - 2*rms;
1026  sumw = 0;
1027  int ic=0;
1028  for (Long64_t ievt=0; ievt<nevt; ievt++) {
1029  Float_t d = (rV[ievt] - tV[ievt]);
1030  hist->Fill( rV[ievt], tV[ievt], wV[ievt] );
1031  if (d >= devMin && d <= devMax) {
1032  sumw += wV[ievt];
1033  biasT += wV[ievt] * d;
1034  devT += wV[ievt] * TMath::Abs(d);
1035  rmsT += wV[ievt] * d * d;
1036  histT->Fill( rV[ievt], tV[ievt], wV[ievt] );
1037  ic++;
1038  }
1039  }
1040  biasT /= sumw;
1041  devT /= sumw;
1042  rmsT /= sumw;
1043  rmsT = TMath::Sqrt(rmsT - biasT*biasT);
1044  mInf = gTools().GetMutualInformation( *hist );
1045  mInfT = gTools().GetMutualInformation( *histT );
1046 
1047  delete hist;
1048  delete histT;
1049 
1050  delete [] rV;
1051  delete [] tV;
1052  delete [] wV;
1053 
1054  Data()->SetCurrentType(savedType);
1055 }
1056 
1057 
1058 ////////////////////////////////////////////////////////////////////////////////
1059 /// test multiclass classification
1060 
1062 {
1064  if (!resMulticlass) Log() << kFATAL<<Form("Dataset[%s] : ",DataInfo().GetName())<< "unable to create pointer in TestMulticlass, exiting."<<Endl;
1065  Log() << kINFO <<Form("Dataset[%s] : ",DataInfo().GetName())<< "Determine optimal multiclass cuts for test data..." << Endl;
1066  for (UInt_t icls = 0; icls<DataInfo().GetNClasses(); ++icls) {
1067  resMulticlass->GetBestMultiClassCuts(icls);
1068  }
1069 }
1070 
1071 
1072 ////////////////////////////////////////////////////////////////////////////////
1073 /// initialization
1074 
1076 {
1078 
1079  ResultsClassification* mvaRes = dynamic_cast<ResultsClassification*>
1081 
1082  // sanity checks: tree must exist, and theVar must be in tree
1083  if (0==mvaRes && !(GetMethodTypeName().Contains("Cuts"))) {
1084  Log()<<Form("Dataset[%s] : ",DataInfo().GetName()) << "mvaRes " << mvaRes << " GetMethodTypeName " << GetMethodTypeName()
1085  << " contains " << !(GetMethodTypeName().Contains("Cuts")) << Endl;
1086  Log() << kFATAL<<Form("Dataset[%s] : ",DataInfo().GetName()) << "<TestInit> Test variable " << GetTestvarName()
1087  << " not found in tree" << Endl;
1088  }
1089 
1090  // basic statistics operations are made in base class
1093 
1094  // choose reasonable histogram ranges, by removing outliers
1095  Double_t nrms = 10;
1096  fXmin = TMath::Max( TMath::Min( fMeanS - nrms*fRmsS, fMeanB - nrms*fRmsB ), fXmin );
1097  fXmax = TMath::Min( TMath::Max( fMeanS + nrms*fRmsS, fMeanB + nrms*fRmsB ), fXmax );
1098 
1099  // determine cut orientation
1101 
1102  // fill 2 types of histograms for the various analyses
1103  // this one is for actual plotting
1104 
1105  Double_t sxmax = fXmax+0.00001;
1106 
1107  // classifier response distributions for training sample
1108  // MVA plots used for graphics representation (signal)
1109  TString TestvarName;
1110  if(IsSilentFile())
1111  {
1112  TestvarName=Form("[%s]%s",DataInfo().GetName(),GetTestvarName().Data());
1113  }else
1114  {
1115  TestvarName=GetTestvarName();
1116  }
1117  TH1* mva_s = new TH1D( TestvarName + "_S",TestvarName + "_S", fNbinsMVAoutput, fXmin, sxmax );
1118  TH1* mva_b = new TH1D( TestvarName + "_B",TestvarName + "_B", fNbinsMVAoutput, fXmin, sxmax );
1119  mvaRes->Store(mva_s, "MVA_S");
1120  mvaRes->Store(mva_b, "MVA_B");
1121  mva_s->Sumw2();
1122  mva_b->Sumw2();
1123 
1124  TH1* proba_s = 0;
1125  TH1* proba_b = 0;
1126  TH1* rarity_s = 0;
1127  TH1* rarity_b = 0;
1128  if (HasMVAPdfs()) {
1129  // P(MVA) plots used for graphics representation
1130  proba_s = new TH1D( TestvarName + "_Proba_S", TestvarName + "_Proba_S", fNbinsMVAoutput, 0.0, 1.0 );
1131  proba_b = new TH1D( TestvarName + "_Proba_B", TestvarName + "_Proba_B", fNbinsMVAoutput, 0.0, 1.0 );
1132  mvaRes->Store(proba_s, "Prob_S");
1133  mvaRes->Store(proba_b, "Prob_B");
1134  proba_s->Sumw2();
1135  proba_b->Sumw2();
1136 
1137  // R(MVA) plots used for graphics representation
1138  rarity_s = new TH1D( TestvarName + "_Rarity_S", TestvarName + "_Rarity_S", fNbinsMVAoutput, 0.0, 1.0 );
1139  rarity_b = new TH1D( TestvarName + "_Rarity_B", TestvarName + "_Rarity_B", fNbinsMVAoutput, 0.0, 1.0 );
1140  mvaRes->Store(rarity_s, "Rar_S");
1141  mvaRes->Store(rarity_b, "Rar_B");
1142  rarity_s->Sumw2();
1143  rarity_b->Sumw2();
1144  }
1145 
1146  // MVA plots used for efficiency calculations (large number of bins)
1147  TH1* mva_eff_s = new TH1D( TestvarName + "_S_high", TestvarName + "_S_high", fNbinsH, fXmin, sxmax );
1148  TH1* mva_eff_b = new TH1D( TestvarName + "_B_high", TestvarName + "_B_high", fNbinsH, fXmin, sxmax );
1149  mvaRes->Store(mva_eff_s, "MVA_HIGHBIN_S");
1150  mvaRes->Store(mva_eff_b, "MVA_HIGHBIN_B");
1151  mva_eff_s->Sumw2();
1152  mva_eff_b->Sumw2();
1153 
1154  // fill the histograms
1155 
1156  ResultsClassification* mvaProb = dynamic_cast<ResultsClassification*>
1158 
1159  Log() << kHEADER <<Form("[%s] : ",DataInfo().GetName())<< "Loop over test events and fill histograms with classifier response..." << Endl << Endl;
1160  if (mvaProb) Log() << kINFO << "Also filling probability and rarity histograms (on request)..." << Endl;
1161  std::vector<Bool_t>* mvaResTypes = mvaRes->GetValueVectorTypes();
1162 
1163  //LM: this is needed to avoid crashes in ROOCCURVE
1164  if ( mvaRes->GetSize() != GetNEvents() ) {
1165  Log() << kFATAL << TString::Format("Inconsistent result size %lld with number of events %u ", mvaRes->GetSize() , GetNEvents() ) << Endl;
1166  assert(mvaRes->GetSize() == GetNEvents());
1167  }
1168 
1169  for (Long64_t ievt=0; ievt<GetNEvents(); ievt++) {
1170 
1171  const Event* ev = GetEvent(ievt);
1172  Float_t v = (*mvaRes)[ievt][0];
1173  Float_t w = ev->GetWeight();
1174 
1175  if (DataInfo().IsSignal(ev)) {
1176  mvaResTypes->push_back(kTRUE);
1177  mva_s ->Fill( v, w );
1178  if (mvaProb) {
1179  proba_s->Fill( (*mvaProb)[ievt][0], w );
1180  rarity_s->Fill( GetRarity( v ), w );
1181  }
1182 
1183  mva_eff_s ->Fill( v, w );
1184  }
1185  else {
1186  mvaResTypes->push_back(kFALSE);
1187  mva_b ->Fill( v, w );
1188  if (mvaProb) {
1189  proba_b->Fill( (*mvaProb)[ievt][0], w );
1190  rarity_b->Fill( GetRarity( v ), w );
1191  }
1192  mva_eff_b ->Fill( v, w );
1193  }
1194  }
1195 
1196  // uncomment those (and several others if you want unnormalized output
1197  gTools().NormHist( mva_s );
1198  gTools().NormHist( mva_b );
1199  gTools().NormHist( proba_s );
1200  gTools().NormHist( proba_b );
1201  gTools().NormHist( rarity_s );
1202  gTools().NormHist( rarity_b );
1203  gTools().NormHist( mva_eff_s );
1204  gTools().NormHist( mva_eff_b );
1205 
1206  // create PDFs from histograms, using default splines, and no additional smoothing
1207  if (fSplS) { delete fSplS; fSplS = 0; }
1208  if (fSplB) { delete fSplB; fSplB = 0; }
1209  fSplS = new PDF( TString(GetName()) + " PDF Sig", mva_s, PDF::kSpline2 );
1210  fSplB = new PDF( TString(GetName()) + " PDF Bkg", mva_b, PDF::kSpline2 );
1211 }
1212 
1213 ////////////////////////////////////////////////////////////////////////////////
1214 /// general method used in writing the header of the weight files where
1215 /// the used variables, variable transformation type etc. is specified
1216 
1217 void TMVA::MethodBase::WriteStateToStream( std::ostream& tf ) const
1218 {
1219  TString prefix = "";
1220  UserGroup_t * userInfo = gSystem->GetUserInfo();
1221 
1222  tf << prefix << "#GEN -*-*-*-*-*-*-*-*-*-*-*- general info -*-*-*-*-*-*-*-*-*-*-*-" << std::endl << prefix << std::endl;
1223  tf << prefix << "Method : " << GetMethodTypeName() << "::" << GetMethodName() << std::endl;
1224  tf.setf(std::ios::left);
1225  tf << prefix << "TMVA Release : " << std::setw(10) << GetTrainingTMVAVersionString() << " ["
1226  << GetTrainingTMVAVersionCode() << "]" << std::endl;
1227  tf << prefix << "ROOT Release : " << std::setw(10) << GetTrainingROOTVersionString() << " ["
1228  << GetTrainingROOTVersionCode() << "]" << std::endl;
1229  tf << prefix << "Creator : " << userInfo->fUser << std::endl;
1230  tf << prefix << "Date : "; TDatime *d = new TDatime; tf << d->AsString() << std::endl; delete d;
1231  tf << prefix << "Host : " << gSystem->GetBuildNode() << std::endl;
1232  tf << prefix << "Dir : " << gSystem->WorkingDirectory() << std::endl;
1233  tf << prefix << "Training events: " << Data()->GetNTrainingEvents() << std::endl;
1234 
1235  TString analysisType(((const_cast<TMVA::MethodBase*>(this)->GetAnalysisType()==Types::kRegression) ? "Regression" : "Classification"));
1236 
1237  tf << prefix << "Analysis type : " << "[" << ((GetAnalysisType()==Types::kRegression) ? "Regression" : "Classification") << "]" << std::endl;
1238  tf << prefix << std::endl;
1239 
1240  delete userInfo;
1241 
1242  // First write all options
1243  tf << prefix << std::endl << prefix << "#OPT -*-*-*-*-*-*-*-*-*-*-*-*- options -*-*-*-*-*-*-*-*-*-*-*-*-" << std::endl << prefix << std::endl;
1244  WriteOptionsToStream( tf, prefix );
1245  tf << prefix << std::endl;
1246 
1247  // Second write variable info
1248  tf << prefix << std::endl << prefix << "#VAR -*-*-*-*-*-*-*-*-*-*-*-* variables *-*-*-*-*-*-*-*-*-*-*-*-" << std::endl << prefix << std::endl;
1249  WriteVarsToStream( tf, prefix );
1250  tf << prefix << std::endl;
1251 }
1252 
1253 ////////////////////////////////////////////////////////////////////////////////
1254 /// xml writing
1255 
1256 void TMVA::MethodBase::AddInfoItem( void* gi, const TString& name, const TString& value) const
1257 {
1258  void* it = gTools().AddChild(gi,"Info");
1259  gTools().AddAttr(it,"name", name);
1260  gTools().AddAttr(it,"value", value);
1261 }
1262 
1263 ////////////////////////////////////////////////////////////////////////////////
1264 
1266  if (analysisType == Types::kRegression) {
1267  AddRegressionOutput( type );
1268  } else if (analysisType == Types::kMulticlass) {
1269  AddMulticlassOutput( type );
1270  } else {
1271  AddClassifierOutput( type );
1272  if (HasMVAPdfs())
1273  AddClassifierOutputProb( type );
1274  }
1275 }
1276 
1277 ////////////////////////////////////////////////////////////////////////////////
1278 /// general method used in writing the header of the weight files where
1279 /// the used variables, variable transformation type etc. is specified
1280 
1281 void TMVA::MethodBase::WriteStateToXML( void* parent ) const
1282 {
1283  if (!parent) return;
1284 
1285  UserGroup_t* userInfo = gSystem->GetUserInfo();
1286 
1287  void* gi = gTools().AddChild(parent, "GeneralInfo");
1288  AddInfoItem( gi, "TMVA Release", GetTrainingTMVAVersionString() + " [" + gTools().StringFromInt(GetTrainingTMVAVersionCode()) + "]" );
1289  AddInfoItem( gi, "ROOT Release", GetTrainingROOTVersionString() + " [" + gTools().StringFromInt(GetTrainingROOTVersionCode()) + "]");
1290  AddInfoItem( gi, "Creator", userInfo->fUser);
1291  TDatime dt; AddInfoItem( gi, "Date", dt.AsString());
1292  AddInfoItem( gi, "Host", gSystem->GetBuildNode() );
1293  AddInfoItem( gi, "Dir", gSystem->WorkingDirectory());
1294  AddInfoItem( gi, "Training events", gTools().StringFromInt(Data()->GetNTrainingEvents()));
1295  AddInfoItem( gi, "TrainingTime", gTools().StringFromDouble(const_cast<TMVA::MethodBase*>(this)->GetTrainTime()));
1296 
1297  Types::EAnalysisType aType = const_cast<TMVA::MethodBase*>(this)->GetAnalysisType();
1298  TString analysisType((aType==Types::kRegression) ? "Regression" :
1299  (aType==Types::kMulticlass ? "Multiclass" : "Classification"));
1300  AddInfoItem( gi, "AnalysisType", analysisType );
1301  delete userInfo;
1302 
1303  // write options
1304  AddOptionsXMLTo( parent );
1305 
1306  // write variable info
1307  AddVarsXMLTo( parent );
1308 
1309  // write spectator info
1310  if (fModelPersistence)
1311  AddSpectatorsXMLTo( parent );
1312 
1313  // write class info if in multiclass mode
1314  AddClassesXMLTo(parent);
1315 
1316  // write target info if in regression mode
1317  if (DoRegression()) AddTargetsXMLTo(parent);
1318 
1319  // write transformations
1320  GetTransformationHandler(false).AddXMLTo( parent );
1321 
1322  // write MVA variable distributions
1323  void* pdfs = gTools().AddChild(parent, "MVAPdfs");
1324  if (fMVAPdfS) fMVAPdfS->AddXMLTo(pdfs);
1325  if (fMVAPdfB) fMVAPdfB->AddXMLTo(pdfs);
1326 
1327  // write weights
1328  AddWeightsXMLTo( parent );
1329 }
1330 
1331 ////////////////////////////////////////////////////////////////////////////////
1332 /// write reference MVA distributions (and other information)
1333 /// to a ROOT type weight file
1334 
1336 {
1337  Bool_t addDirStatus = TH1::AddDirectoryStatus();
1338  TH1::AddDirectory( 0 ); // this avoids the binding of the hists in PDF to the current ROOT file
1339  fMVAPdfS = (TMVA::PDF*)rf.Get( "MVA_PDF_Signal" );
1340  fMVAPdfB = (TMVA::PDF*)rf.Get( "MVA_PDF_Background" );
1341 
1342  TH1::AddDirectory( addDirStatus );
1343 
1344  ReadWeightsFromStream( rf );
1345 
1346  SetTestvarName();
1347 }
1348 
1349 ////////////////////////////////////////////////////////////////////////////////
1350 /// write options and weights to file
1351 /// note that each one text file for the main configuration information
1352 /// and one ROOT file for ROOT objects are created
1353 
1355 {
1356  // ---- create the text file
1357  TString tfname( GetWeightFileName() );
1358 
1359  // writing xml file
1360  TString xmlfname( tfname ); xmlfname.ReplaceAll( ".txt", ".xml" );
1361  Log() << kINFO //<<Form("Dataset[%s] : ",DataInfo().GetName())
1362  << "Creating xml weight file: "
1363  << gTools().Color("lightblue") << xmlfname << gTools().Color("reset") << Endl;
1364  void* doc = gTools().xmlengine().NewDoc();
1365  void* rootnode = gTools().AddChild(0,"MethodSetup", "", true);
1366  gTools().xmlengine().DocSetRootElement(doc,rootnode);
1367  gTools().AddAttr(rootnode,"Method", GetMethodTypeName() + "::" + GetMethodName());
1368  WriteStateToXML(rootnode);
1369  gTools().xmlengine().SaveDoc(doc,xmlfname);
1370  gTools().xmlengine().FreeDoc(doc);
1371 }
1372 
1373 ////////////////////////////////////////////////////////////////////////////////
1374 /// Function to write options and weights to file
1375 
1377 {
1378  // get the filename
1379 
1380  TString tfname(GetWeightFileName());
1381 
1382  Log() << kDEBUG //<<Form("Dataset[%s] : ",DataInfo().GetName())
1383  << "Reading weight file: "
1384  << gTools().Color("lightblue") << tfname << gTools().Color("reset") << Endl;
1385 
1386  if (tfname.EndsWith(".xml") ) {
1387 #if ROOT_VERSION_CODE >= ROOT_VERSION(5,29,0)
1388  void* doc = gTools().xmlengine().ParseFile(tfname,gTools().xmlenginebuffersize()); // the default buffer size in TXMLEngine::ParseFile is 100k. Starting with ROOT 5.29 one can set the buffer size, see: http://savannah.cern.ch/bugs/?78864. This might be necessary for large XML files
1389 #else
1390  void* doc = gTools().xmlengine().ParseFile(tfname);
1391 #endif
1392  void* rootnode = gTools().xmlengine().DocGetRootElement(doc); // node "MethodSetup"
1393  ReadStateFromXML(rootnode);
1394  gTools().xmlengine().FreeDoc(doc);
1395  }
1396  else {
1397  std::filebuf fb;
1398  fb.open(tfname.Data(),std::ios::in);
1399  if (!fb.is_open()) { // file not found --> Error
1400  Log() << kFATAL <<Form("Dataset[%s] : ",DataInfo().GetName())<< "<ReadStateFromFile> "
1401  << "Unable to open input weight file: " << tfname << Endl;
1402  }
1403  std::istream fin(&fb);
1404  ReadStateFromStream(fin);
1405  fb.close();
1406  }
1407  if (!fTxtWeightsOnly) {
1408  // ---- read the ROOT file
1409  TString rfname( tfname ); rfname.ReplaceAll( ".txt", ".root" );
1410  Log() << kINFO <<Form("Dataset[%s] : ",DataInfo().GetName())<< "Reading root weight file: "
1411  << gTools().Color("lightblue") << rfname << gTools().Color("reset") << Endl;
1412  TFile* rfile = TFile::Open( rfname, "READ" );
1413  ReadStateFromStream( *rfile );
1414  rfile->Close();
1415  }
1416 }
1417 ////////////////////////////////////////////////////////////////////////////////
1418 /// for reading from memory
1419 
1420 void TMVA::MethodBase::ReadStateFromXMLString( const char* xmlstr ) {
1421 #if ROOT_VERSION_CODE >= ROOT_VERSION(5,26,00)
1422  void* doc = gTools().xmlengine().ParseString(xmlstr);
1423  void* rootnode = gTools().xmlengine().DocGetRootElement(doc); // node "MethodSetup"
1424  ReadStateFromXML(rootnode);
1425  gTools().xmlengine().FreeDoc(doc);
1426 #else
1427  Log() << kFATAL<<Form("Dataset[%s] : ",DataInfo().GetName()) << "Method MethodBase::ReadStateFromXMLString( const char* xmlstr = "
1428  << xmlstr << " ) is not available for ROOT versions prior to 5.26/00." << Endl;
1429 #endif
1430 
1431  return;
1432 }
1433 
1434 ////////////////////////////////////////////////////////////////////////////////
1435 
1436 void TMVA::MethodBase::ReadStateFromXML( void* methodNode )
1437 {
1438  TString fullMethodName;
1439  gTools().ReadAttr( methodNode, "Method", fullMethodName );
1440  fMethodName = fullMethodName(fullMethodName.Index("::")+2,fullMethodName.Length());
1441 
1442  // update logger
1443  Log().SetSource( GetName() );
1444  Log() << kDEBUG//<<Form("Dataset[%s] : ",DataInfo().GetName())
1445  << "Read method \"" << GetMethodName() << "\" of type \"" << GetMethodTypeName() << "\"" << Endl;
1446 
1447  // after the method name is read, the testvar can be set
1448  SetTestvarName();
1449 
1450  TString nodeName("");
1451  void* ch = gTools().GetChild(methodNode);
1452  while (ch!=0) {
1453  nodeName = TString( gTools().GetName(ch) );
1454 
1455  if (nodeName=="GeneralInfo") {
1456  // read analysis type
1457 
1458  TString name(""),val("");
1459  void* antypeNode = gTools().GetChild(ch);
1460  while (antypeNode) {
1461  gTools().ReadAttr( antypeNode, "name", name );
1462 
1463  if (name == "TrainingTime")
1464  gTools().ReadAttr( antypeNode, "value", fTrainTime );
1465 
1466  if (name == "AnalysisType") {
1467  gTools().ReadAttr( antypeNode, "value", val );
1468  val.ToLower();
1469  if (val == "regression" ) SetAnalysisType( Types::kRegression );
1470  else if (val == "classification" ) SetAnalysisType( Types::kClassification );
1471  else if (val == "multiclass" ) SetAnalysisType( Types::kMulticlass );
1472  else Log() << kFATAL <<Form("Dataset[%s] : ",DataInfo().GetName())<< "Analysis type " << val << " is not known." << Endl;
1473  }
1474 
1475  if (name == "TMVA Release" || name == "TMVA") {
1476  TString s;
1477  gTools().ReadAttr( antypeNode, "value", s);
1478  fTMVATrainingVersion = TString(s(s.Index("[")+1,s.Index("]")-s.Index("[")-1)).Atoi();
1479  Log() << kDEBUG <<Form("[%s] : ",DataInfo().GetName()) << "MVA method was trained with TMVA Version: " << GetTrainingTMVAVersionString() << Endl;
1480  }
1481 
1482  if (name == "ROOT Release" || name == "ROOT") {
1483  TString s;
1484  gTools().ReadAttr( antypeNode, "value", s);
1485  fROOTTrainingVersion = TString(s(s.Index("[")+1,s.Index("]")-s.Index("[")-1)).Atoi();
1486  Log() << kDEBUG //<<Form("Dataset[%s] : ",DataInfo().GetName())
1487  << "MVA method was trained with ROOT Version: " << GetTrainingROOTVersionString() << Endl;
1488  }
1489  antypeNode = gTools().GetNextChild(antypeNode);
1490  }
1491  }
1492  else if (nodeName=="Options") {
1493  ReadOptionsFromXML(ch);
1494  ParseOptions();
1495 
1496  }
1497  else if (nodeName=="Variables") {
1499  }
1500  else if (nodeName=="Spectators") {
1502  }
1503  else if (nodeName=="Classes") {
1504  if (DataInfo().GetNClasses()==0) ReadClassesFromXML(ch);
1505  }
1506  else if (nodeName=="Targets") {
1507  if (DataInfo().GetNTargets()==0 && DoRegression()) ReadTargetsFromXML(ch);
1508  }
1509  else if (nodeName=="Transformations") {
1511  }
1512  else if (nodeName=="MVAPdfs") {
1513  TString pdfname;
1514  if (fMVAPdfS) { delete fMVAPdfS; fMVAPdfS=0; }
1515  if (fMVAPdfB) { delete fMVAPdfB; fMVAPdfB=0; }
1516  void* pdfnode = gTools().GetChild(ch);
1517  if (pdfnode) {
1518  gTools().ReadAttr(pdfnode, "Name", pdfname);
1519  fMVAPdfS = new PDF(pdfname);
1520  fMVAPdfS->ReadXML(pdfnode);
1521  pdfnode = gTools().GetNextChild(pdfnode);
1522  gTools().ReadAttr(pdfnode, "Name", pdfname);
1523  fMVAPdfB = new PDF(pdfname);
1524  fMVAPdfB->ReadXML(pdfnode);
1525  }
1526  }
1527  else if (nodeName=="Weights") {
1528  ReadWeightsFromXML(ch);
1529  }
1530  else {
1531  Log() << kWARNING <<Form("Dataset[%s] : ",DataInfo().GetName())<< "Unparsed XML node: '" << nodeName << "'" << Endl;
1532  }
1533  ch = gTools().GetNextChild(ch);
1534 
1535  }
1536 
1537  // update transformation handler
1538  if (GetTransformationHandler().GetCallerName() == "") GetTransformationHandler().SetCallerName( GetName() );
1539 }
1540 
1541 ////////////////////////////////////////////////////////////////////////////////
1542 /// read the header from the weight files of the different MVA methods
1543 
1544 void TMVA::MethodBase::ReadStateFromStream( std::istream& fin )
1545 {
1546  char buf[512];
1547 
1548  // when reading from stream, we assume the files are produced with TMVA<=397
1550 
1551 
1552  // first read the method name
1553  GetLine(fin,buf);
1554  while (!TString(buf).BeginsWith("Method")) GetLine(fin,buf);
1555  TString namestr(buf);
1556 
1557  TString methodType = namestr(0,namestr.Index("::"));
1558  methodType = methodType(methodType.Last(' '),methodType.Length());
1559  methodType = methodType.Strip(TString::kLeading);
1560 
1561  TString methodName = namestr(namestr.Index("::")+2,namestr.Length());
1562  methodName = methodName.Strip(TString::kLeading);
1563  if (methodName == "") methodName = methodType;
1564  fMethodName = methodName;
1565 
1566  Log() << kINFO <<Form("Dataset[%s] : ",DataInfo().GetName())<< "Read method \"" << GetMethodName() << "\" of type \"" << GetMethodTypeName() << "\"" << Endl;
1567 
1568  // update logger
1569  Log().SetSource( GetName() );
1570 
1571  // now the question is whether to read the variables first or the options (well, of course the order
1572  // of writing them needs to agree)
1573  //
1574  // the option "Decorrelation" is needed to decide if the variables we
1575  // read are decorrelated or not
1576  //
1577  // the variables are needed by some methods (TMLP) to build the NN
1578  // which is done in ProcessOptions so for the time being we first Read and Parse the options then
1579  // we read the variables, and then we process the options
1580 
1581  // now read all options
1582  GetLine(fin,buf);
1583  while (!TString(buf).BeginsWith("#OPT")) GetLine(fin,buf);
1584  ReadOptionsFromStream(fin);
1585  ParseOptions();
1586 
1587  // Now read variable info
1588  fin.getline(buf,512);
1589  while (!TString(buf).BeginsWith("#VAR")) fin.getline(buf,512);
1590  ReadVarsFromStream(fin);
1591 
1592  // now we process the options (of the derived class)
1593  ProcessOptions();
1594 
1595  if (IsNormalised()) {
1598  norm->BuildTransformationFromVarInfo( DataInfo().GetVariableInfos() );
1599  }
1600  VariableTransformBase *varTrafo(0), *varTrafo2(0);
1601  if ( fVarTransformString == "None") {
1602  if (fUseDecorr)
1604  } else if ( fVarTransformString == "Decorrelate" ) {
1606  } else if ( fVarTransformString == "PCA" ) {
1608  } else if ( fVarTransformString == "Uniform" ) {
1609  varTrafo = GetTransformationHandler().AddTransformation( new VariableGaussTransform(DataInfo(),"Uniform"), -1 );
1610  } else if ( fVarTransformString == "Gauss" ) {
1612  } else if ( fVarTransformString == "GaussDecorr" ) {
1615  } else {
1616  Log() << kFATAL <<Form("Dataset[%s] : ",DataInfo().GetName())<< "<ProcessOptions> Variable transform '"
1617  << fVarTransformString << "' unknown." << Endl;
1618  }
1619  // Now read decorrelation matrix if available
1620  if (GetTransformationHandler().GetTransformationList().GetSize() > 0) {
1621  fin.getline(buf,512);
1622  while (!TString(buf).BeginsWith("#MAT")) fin.getline(buf,512);
1623  if (varTrafo) {
1625  varTrafo->ReadTransformationFromStream(fin, trafo );
1626  }
1627  if (varTrafo2) {
1629  varTrafo2->ReadTransformationFromStream(fin, trafo );
1630  }
1631  }
1632 
1633 
1634  if (HasMVAPdfs()) {
1635  // Now read the MVA PDFs
1636  fin.getline(buf,512);
1637  while (!TString(buf).BeginsWith("#MVAPDFS")) fin.getline(buf,512);
1638  if (fMVAPdfS != 0) { delete fMVAPdfS; fMVAPdfS = 0; }
1639  if (fMVAPdfB != 0) { delete fMVAPdfB; fMVAPdfB = 0; }
1640  fMVAPdfS = new PDF(TString(GetName()) + " MVA PDF Sig");
1641  fMVAPdfB = new PDF(TString(GetName()) + " MVA PDF Bkg");
1644 
1645  fin >> *fMVAPdfS;
1646  fin >> *fMVAPdfB;
1647  }
1648 
1649  // Now read weights
1650  fin.getline(buf,512);
1651  while (!TString(buf).BeginsWith("#WGT")) fin.getline(buf,512);
1652  fin.getline(buf,512);
1653  ReadWeightsFromStream( fin );;
1654 
1655  // update transformation handler
1656  if (GetTransformationHandler().GetCallerName() == "") GetTransformationHandler().SetCallerName( GetName() );
1657 
1658 }
1659 
1660 ////////////////////////////////////////////////////////////////////////////////
1661 /// write the list of variables (name, min, max) for a given data
1662 /// transformation method to the stream
1663 
1664 void TMVA::MethodBase::WriteVarsToStream( std::ostream& o, const TString& prefix ) const
1665 {
1666  o << prefix << "NVar " << DataInfo().GetNVariables() << std::endl;
1667  std::vector<VariableInfo>::const_iterator varIt = DataInfo().GetVariableInfos().begin();
1668  for (; varIt!=DataInfo().GetVariableInfos().end(); varIt++) { o << prefix; varIt->WriteToStream(o); }
1669  o << prefix << "NSpec " << DataInfo().GetNSpectators() << std::endl;
1670  varIt = DataInfo().GetSpectatorInfos().begin();
1671  for (; varIt!=DataInfo().GetSpectatorInfos().end(); varIt++) { o << prefix; varIt->WriteToStream(o); }
1672 }
1673 
1674 ////////////////////////////////////////////////////////////////////////////////
1675 /// Read the variables (name, min, max) for a given data
1676 /// transformation method from the stream. In the stream we only
1677 /// expect the limits which will be set
1678 
1679 void TMVA::MethodBase::ReadVarsFromStream( std::istream& istr )
1680 {
1681  TString dummy;
1682  UInt_t readNVar;
1683  istr >> dummy >> readNVar;
1684 
1685  if (readNVar!=DataInfo().GetNVariables()) {
1686  Log() << kFATAL <<Form("Dataset[%s] : ",DataInfo().GetName())<< "You declared "<< DataInfo().GetNVariables() << " variables in the Reader"
1687  << " while there are " << readNVar << " variables declared in the file"
1688  << Endl;
1689  }
1690 
1691  // we want to make sure all variables are read in the order they are defined
1692  VariableInfo varInfo;
1693  std::vector<VariableInfo>::iterator varIt = DataInfo().GetVariableInfos().begin();
1694  int varIdx = 0;
1695  for (; varIt!=DataInfo().GetVariableInfos().end(); varIt++, varIdx++) {
1696  varInfo.ReadFromStream(istr);
1697  if (varIt->GetExpression() == varInfo.GetExpression()) {
1698  varInfo.SetExternalLink((*varIt).GetExternalLink());
1699  (*varIt) = varInfo;
1700  }
1701  else {
1702  Log() << kINFO <<Form("Dataset[%s] : ",DataInfo().GetName())<< "ERROR in <ReadVarsFromStream>" << Endl;
1703  Log() << kINFO << "The definition (or the order) of the variables found in the input file is" << Endl;
1704  Log() << kINFO << "is not the same as the one declared in the Reader (which is necessary for" << Endl;
1705  Log() << kINFO << "the correct working of the method):" << Endl;
1706  Log() << kINFO << " var #" << varIdx <<" declared in Reader: " << varIt->GetExpression() << Endl;
1707  Log() << kINFO << " var #" << varIdx <<" declared in file : " << varInfo.GetExpression() << Endl;
1708  Log() << kFATAL << "The expression declared to the Reader needs to be checked (name or order are wrong)" << Endl;
1709  }
1710  }
1711 }
1712 
1713 ////////////////////////////////////////////////////////////////////////////////
1714 /// write variable info to XML
1715 
1716 void TMVA::MethodBase::AddVarsXMLTo( void* parent ) const
1717 {
1718  void* vars = gTools().AddChild(parent, "Variables");
1719  gTools().AddAttr( vars, "NVar", gTools().StringFromInt(DataInfo().GetNVariables()) );
1720 
1721  for (UInt_t idx=0; idx<DataInfo().GetVariableInfos().size(); idx++) {
1722  VariableInfo& vi = DataInfo().GetVariableInfos()[idx];
1723  void* var = gTools().AddChild( vars, "Variable" );
1724  gTools().AddAttr( var, "VarIndex", idx );
1725  vi.AddToXML( var );
1726  }
1727 }
1728 
1729 ////////////////////////////////////////////////////////////////////////////////
1730 /// write spectator info to XML
1731 
1732 void TMVA::MethodBase::AddSpectatorsXMLTo( void* parent ) const
1733 {
1734  void* specs = gTools().AddChild(parent, "Spectators");
1735 
1736  UInt_t writeIdx=0;
1737  for (UInt_t idx=0; idx<DataInfo().GetSpectatorInfos().size(); idx++) {
1738 
1739  VariableInfo& vi = DataInfo().GetSpectatorInfos()[idx];
1740 
1741  // we do not want to write spectators that are category-cuts,
1742  // except if the method is the category method and the spectators belong to it
1743  if (vi.GetVarType()=='C') continue;
1744 
1745  void* spec = gTools().AddChild( specs, "Spectator" );
1746  gTools().AddAttr( spec, "SpecIndex", writeIdx++ );
1747  vi.AddToXML( spec );
1748  }
1749  gTools().AddAttr( specs, "NSpec", gTools().StringFromInt(writeIdx) );
1750 }
1751 
1752 ////////////////////////////////////////////////////////////////////////////////
1753 /// write class info to XML
1754 
1755 void TMVA::MethodBase::AddClassesXMLTo( void* parent ) const
1756 {
1757  UInt_t nClasses=DataInfo().GetNClasses();
1758 
1759  void* classes = gTools().AddChild(parent, "Classes");
1760  gTools().AddAttr( classes, "NClass", nClasses );
1761 
1762  for (UInt_t iCls=0; iCls<nClasses; ++iCls) {
1763  ClassInfo *classInfo=DataInfo().GetClassInfo (iCls);
1764  TString className =classInfo->GetName();
1765  UInt_t classNumber=classInfo->GetNumber();
1766 
1767  void* classNode=gTools().AddChild(classes, "Class");
1768  gTools().AddAttr( classNode, "Name", className );
1769  gTools().AddAttr( classNode, "Index", classNumber );
1770  }
1771 }
1772 ////////////////////////////////////////////////////////////////////////////////
1773 /// write target info to XML
1774 
1775 void TMVA::MethodBase::AddTargetsXMLTo( void* parent ) const
1776 {
1777  void* targets = gTools().AddChild(parent, "Targets");
1778  gTools().AddAttr( targets, "NTrgt", gTools().StringFromInt(DataInfo().GetNTargets()) );
1779 
1780  for (UInt_t idx=0; idx<DataInfo().GetTargetInfos().size(); idx++) {
1781  VariableInfo& vi = DataInfo().GetTargetInfos()[idx];
1782  void* tar = gTools().AddChild( targets, "Target" );
1783  gTools().AddAttr( tar, "TargetIndex", idx );
1784  vi.AddToXML( tar );
1785  }
1786 }
1787 
1788 ////////////////////////////////////////////////////////////////////////////////
1789 /// read variable info from XML
1790 
1792 {
1793  UInt_t readNVar;
1794  gTools().ReadAttr( varnode, "NVar", readNVar);
1795 
1796  if (readNVar!=DataInfo().GetNVariables()) {
1797  Log() << kFATAL <<Form("Dataset[%s] : ",DataInfo().GetName())<< "You declared "<< DataInfo().GetNVariables() << " variables in the Reader"
1798  << " while there are " << readNVar << " variables declared in the file"
1799  << Endl;
1800  }
1801 
1802  // we want to make sure all variables are read in the order they are defined
1803  VariableInfo readVarInfo, existingVarInfo;
1804  int varIdx = 0;
1805  void* ch = gTools().GetChild(varnode);
1806  while (ch) {
1807  gTools().ReadAttr( ch, "VarIndex", varIdx);
1808  existingVarInfo = DataInfo().GetVariableInfos()[varIdx];
1809  readVarInfo.ReadFromXML(ch);
1810 
1811  if (existingVarInfo.GetExpression() == readVarInfo.GetExpression()) {
1812  readVarInfo.SetExternalLink(existingVarInfo.GetExternalLink());
1813  existingVarInfo = readVarInfo;
1814  }
1815  else {
1816  Log() << kINFO <<Form("Dataset[%s] : ",DataInfo().GetName())<< "ERROR in <ReadVariablesFromXML>" << Endl;
1817  Log() << kINFO << "The definition (or the order) of the variables found in the input file is" << Endl;
1818  Log() << kINFO << "not the same as the one declared in the Reader (which is necessary for the" << Endl;
1819  Log() << kINFO << "correct working of the method):" << Endl;
1820  Log() << kINFO << " var #" << varIdx <<" declared in Reader: " << existingVarInfo.GetExpression() << Endl;
1821  Log() << kINFO << " var #" << varIdx <<" declared in file : " << readVarInfo.GetExpression() << Endl;
1822  Log() << kFATAL << "The expression declared to the Reader needs to be checked (name or order are wrong)" << Endl;
1823  }
1824  ch = gTools().GetNextChild(ch);
1825  }
1826 }
1827 
1828 ////////////////////////////////////////////////////////////////////////////////
1829 /// read spectator info from XML
1830 
1832 {
1833  UInt_t readNSpec;
1834  gTools().ReadAttr( specnode, "NSpec", readNSpec);
1835 
1836  if (readNSpec!=DataInfo().GetNSpectators(kFALSE)) {
1837  Log() << kFATAL<<Form("Dataset[%s] : ",DataInfo().GetName()) << "You declared "<< DataInfo().GetNSpectators(kFALSE) << " spectators in the Reader"
1838  << " while there are " << readNSpec << " spectators declared in the file"
1839  << Endl;
1840  }
1841 
1842  // we want to make sure all variables are read in the order they are defined
1843  VariableInfo readSpecInfo, existingSpecInfo;
1844  int specIdx = 0;
1845  void* ch = gTools().GetChild(specnode);
1846  while (ch) {
1847  gTools().ReadAttr( ch, "SpecIndex", specIdx);
1848  existingSpecInfo = DataInfo().GetSpectatorInfos()[specIdx];
1849  readSpecInfo.ReadFromXML(ch);
1850 
1851  if (existingSpecInfo.GetExpression() == readSpecInfo.GetExpression()) {
1852  readSpecInfo.SetExternalLink(existingSpecInfo.GetExternalLink());
1853  existingSpecInfo = readSpecInfo;
1854  }
1855  else {
1856  Log() << kINFO <<Form("Dataset[%s] : ",DataInfo().GetName())<< "ERROR in <ReadSpectatorsFromXML>" << Endl;
1857  Log() << kINFO << "The definition (or the order) of the spectators found in the input file is" << Endl;
1858  Log() << kINFO << "not the same as the one declared in the Reader (which is necessary for the" << Endl;
1859  Log() << kINFO << "correct working of the method):" << Endl;
1860  Log() << kINFO << " spec #" << specIdx <<" declared in Reader: " << existingSpecInfo.GetExpression() << Endl;
1861  Log() << kINFO << " spec #" << specIdx <<" declared in file : " << readSpecInfo.GetExpression() << Endl;
1862  Log() << kFATAL << "The expression declared to the Reader needs to be checked (name or order are wrong)" << Endl;
1863  }
1864  ch = gTools().GetNextChild(ch);
1865  }
1866 }
1867 
1868 ////////////////////////////////////////////////////////////////////////////////
1869 /// read number of classes from XML
1870 
1872 {
1873  UInt_t readNCls;
1874  // coverity[tainted_data_argument]
1875  gTools().ReadAttr( clsnode, "NClass", readNCls);
1876 
1877  TString className="";
1878  UInt_t classIndex=0;
1879  void* ch = gTools().GetChild(clsnode);
1880  if (!ch) {
1881  for (UInt_t icls = 0; icls<readNCls;++icls) {
1882  TString classname = Form("class%i",icls);
1883  DataInfo().AddClass(classname);
1884 
1885  }
1886  }
1887  else{
1888  while (ch) {
1889  gTools().ReadAttr( ch, "Index", classIndex);
1890  gTools().ReadAttr( ch, "Name", className );
1891  DataInfo().AddClass(className);
1892 
1893  ch = gTools().GetNextChild(ch);
1894  }
1895  }
1896 
1897  // retrieve signal and background class index
1898  if (DataInfo().GetClassInfo("Signal") != 0) {
1899  fSignalClass = DataInfo().GetClassInfo("Signal")->GetNumber();
1900  }
1901  else
1902  fSignalClass=0;
1903  if (DataInfo().GetClassInfo("Background") != 0) {
1904  fBackgroundClass = DataInfo().GetClassInfo("Background")->GetNumber();
1905  }
1906  else
1907  fBackgroundClass=1;
1908 }
1909 
1910 ////////////////////////////////////////////////////////////////////////////////
1911 /// read target info from XML
1912 
1914 {
1915  UInt_t readNTar;
1916  gTools().ReadAttr( tarnode, "NTrgt", readNTar);
1917 
1918  int tarIdx = 0;
1919  TString expression;
1920  void* ch = gTools().GetChild(tarnode);
1921  while (ch) {
1922  gTools().ReadAttr( ch, "TargetIndex", tarIdx);
1923  gTools().ReadAttr( ch, "Expression", expression);
1924  DataInfo().AddTarget(expression,"","",0,0);
1925 
1926  ch = gTools().GetNextChild(ch);
1927  }
1928 }
1929 
1930 ////////////////////////////////////////////////////////////////////////////////
1931 /// returns the ROOT directory where info/histograms etc of the
1932 /// corresponding MVA method instance are stored
1933 
1935 {
1936  if (fBaseDir != 0) return fBaseDir;
1937  Log()<<kDEBUG<<Form("Dataset[%s] : ",DataInfo().GetName())<<" Base Directory for " << GetMethodName() << " not set yet --> check if already there.." <<Endl;
1938 
1939  TDirectory* methodDir = MethodBaseDir();
1940  if (methodDir==0)
1941  Log() << kFATAL <<Form("Dataset[%s] : ",DataInfo().GetName())<< "MethodBase::BaseDir() - MethodBaseDir() return a NULL pointer!" << Endl;
1942 
1943  TString defaultDir = GetMethodName();
1944  TDirectory *sdir = methodDir->GetDirectory(defaultDir.Data());
1945  if(!sdir)
1946  {
1947  Log()<<kDEBUG<<Form("Dataset[%s] : ",DataInfo().GetName())<<" Base Directory for " << GetMethodTypeName() << " does not exist yet--> created it" <<Endl;
1948  sdir = methodDir->mkdir(defaultDir);
1949  sdir->cd();
1950  // write weight file name into target file
1951  TObjString wfilePath( gSystem->WorkingDirectory() );
1952  TObjString wfileName( GetWeightFileName() );
1953  wfilePath.Write( "TrainingPath" );
1954  wfileName.Write( "WeightFileName" );
1955  }
1956 
1957  Log()<<kDEBUG<<Form("Dataset[%s] : ",DataInfo().GetName())<<" Base Directory for " << GetMethodTypeName() << " existed, return it.." <<Endl;
1958  return sdir;
1959 }
1960 
1961 ////////////////////////////////////////////////////////////////////////////////
1962 /// returns the ROOT directory where all instances of the
1963 /// corresponding MVA method are stored
1964 
1966  {
1967  if (fMethodBaseDir != 0) return fMethodBaseDir;
1968 
1969  Log()<<kDEBUG<<Form("Dataset[%s] : ",DataInfo().GetName())<<" Base Directory for " << GetMethodTypeName() << " not set yet --> check if already there.." <<Endl;
1970 
1971 
1972  TDirectory *fFactoryBaseDir=GetFile();
1973 
1974  fMethodBaseDir = fFactoryBaseDir->GetDirectory(DataInfo().GetName());
1975  if(!fMethodBaseDir) //creating dataset directory
1976  {
1977  fMethodBaseDir = fFactoryBaseDir->mkdir(DataInfo().GetName(),Form("Base directory for dataset %s",DataInfo().GetName()));
1978  if(!fMethodBaseDir)Log()<<kFATAL<<"Can not create dir "<<DataInfo().GetName();
1979  }
1980  TString _methodDir = Form("Method_%s",GetMethodName().Data());
1982 
1983  if(!fMethodBaseDir){
1984  fMethodBaseDir = fFactoryBaseDir->GetDirectory(DataInfo().GetName())->mkdir(_methodDir.Data(),Form("Directory for all %s methods", GetMethodTypeName().Data()));
1985  Log()<<kDEBUG<<Form("Dataset[%s] : ",DataInfo().GetName())<<" Base Directory for " << GetMethodName() << " does not exist yet--> created it" <<Endl;
1986  }
1987 
1988  Log()<<kDEBUG<<Form("Dataset[%s] : ",DataInfo().GetName())<<"Return from MethodBaseDir() after creating base directory "<<Endl;
1989  return fMethodBaseDir;
1990 }
1991 
1992 ////////////////////////////////////////////////////////////////////////////////
1993 /// set directory of weight file
1994 
1996 {
1997  fFileDir = fileDir;
1999 }
2000 
2001 ////////////////////////////////////////////////////////////////////////////////
2002 /// set the weight file name (depreciated)
2003 
2005 {
2006  fWeightFile = theWeightFile;
2007 }
2008 
2009 ////////////////////////////////////////////////////////////////////////////////
2010 /// retrieve weight file name
2011 
2013 {
2014  if (fWeightFile!="") return fWeightFile;
2015 
2016  // the default consists of
2017  // directory/jobname_methodname_suffix.extension.{root/txt}
2018  TString suffix = "";
2019  TString wFileDir(GetWeightFileDir());
2020  return ( wFileDir + (wFileDir[wFileDir.Length()-1]=='/' ? "" : "/")
2021  + GetJobName() + "_" + GetMethodName() +
2022  suffix + "." + gConfig().GetIONames().fWeightFileExtension + ".xml" );
2023 }
2024 
2025 ////////////////////////////////////////////////////////////////////////////////
2026 /// writes all MVA evaluation histograms to file
2027 
2029 {
2030  BaseDir()->cd();
2031 
2032 
2033  // write MVA PDFs to file - if exist
2034  if (0 != fMVAPdfS) {
2037  fMVAPdfS->GetPDFHist()->Write();
2038  }
2039  if (0 != fMVAPdfB) {
2042  fMVAPdfB->GetPDFHist()->Write();
2043  }
2044 
2045  // write result-histograms
2046  Results* results = Data()->GetResults( GetMethodName(), treetype, Types::kMaxAnalysisType );
2047  if (!results)
2048  Log() << kFATAL <<Form("Dataset[%s] : ",DataInfo().GetName())<< "<WriteEvaluationHistosToFile> Unknown result: "
2049  << GetMethodName() << (treetype==Types::kTraining?"/kTraining":"/kTesting")
2050  << "/kMaxAnalysisType" << Endl;
2051  results->GetStorage()->Write();
2052  if (treetype==Types::kTesting) {
2054  }
2055 }
2056 
2057 ////////////////////////////////////////////////////////////////////////////////
2058 /// write special monitoring histograms to file
2059 /// dummy implementation here -----------------
2060 
2062 {
2063 }
2064 
2065 ////////////////////////////////////////////////////////////////////////////////
2066 /// reads one line from the input stream
2067 /// checks for certain keywords and interprets
2068 /// the line if keywords are found
2069 
2070 Bool_t TMVA::MethodBase::GetLine(std::istream& fin, char* buf )
2071 {
2072  fin.getline(buf,512);
2073  TString line(buf);
2074  if (line.BeginsWith("TMVA Release")) {
2075  Ssiz_t start = line.First('[')+1;
2076  Ssiz_t length = line.Index("]",start)-start;
2077  TString code = line(start,length);
2078  std::stringstream s(code.Data());
2079  s >> fTMVATrainingVersion;
2080  Log() << kINFO <<Form("Dataset[%s] : ",DataInfo().GetName())<< "MVA method was trained with TMVA Version: " << GetTrainingTMVAVersionString() << Endl;
2081  }
2082  if (line.BeginsWith("ROOT Release")) {
2083  Ssiz_t start = line.First('[')+1;
2084  Ssiz_t length = line.Index("]",start)-start;
2085  TString code = line(start,length);
2086  std::stringstream s(code.Data());
2087  s >> fROOTTrainingVersion;
2088  Log() << kINFO <<Form("Dataset[%s] : ",DataInfo().GetName())<< "MVA method was trained with ROOT Version: " << GetTrainingROOTVersionString() << Endl;
2089  }
2090  if (line.BeginsWith("Analysis type")) {
2091  Ssiz_t start = line.First('[')+1;
2092  Ssiz_t length = line.Index("]",start)-start;
2093  TString code = line(start,length);
2094  std::stringstream s(code.Data());
2095  std::string analysisType;
2096  s >> analysisType;
2097  if (analysisType == "regression" || analysisType == "Regression") SetAnalysisType( Types::kRegression );
2098  else if (analysisType == "classification" || analysisType == "Classification") SetAnalysisType( Types::kClassification );
2099  else if (analysisType == "multiclass" || analysisType == "Multiclass") SetAnalysisType( Types::kMulticlass );
2100  else Log() << kFATAL << "Analysis type " << analysisType << " from weight-file not known!" << std::endl;
2101 
2102  Log() << kINFO <<Form("Dataset[%s] : ",DataInfo().GetName())<< "Method was trained for "
2103  << (GetAnalysisType() == Types::kRegression ? "Regression" :
2104  (GetAnalysisType() == Types::kMulticlass ? "Multiclass" : "Classification")) << Endl;
2105  }
2106 
2107  return true;
2108 }
2109 
2110 ////////////////////////////////////////////////////////////////////////////////
2111 /// Create PDFs of the MVA output variables
2112 
2114 {
2116 
2117  // the PDF's are stored as results ONLY if the corresponding "results" are booked,
2118  // otherwise they will be only used 'online'
2119  ResultsClassification * mvaRes = dynamic_cast<ResultsClassification*>
2121 
2122  if (mvaRes==0 || mvaRes->GetSize()==0) {
2123  Log() << kERROR<<Form("Dataset[%s] : ",DataInfo().GetName())<< "<CreateMVAPdfs> No result of classifier testing available" << Endl;
2124  }
2125 
2126  Double_t minVal = *std::min_element(mvaRes->GetValueVector()->begin(),mvaRes->GetValueVector()->end());
2127  Double_t maxVal = *std::max_element(mvaRes->GetValueVector()->begin(),mvaRes->GetValueVector()->end());
2128 
2129  // create histograms that serve as basis to create the MVA Pdfs
2130  TH1* histMVAPdfS = new TH1D( GetMethodTypeName() + "_tr_S", GetMethodTypeName() + "_tr_S",
2131  fMVAPdfS->GetHistNBins( mvaRes->GetSize() ), minVal, maxVal );
2132  TH1* histMVAPdfB = new TH1D( GetMethodTypeName() + "_tr_B", GetMethodTypeName() + "_tr_B",
2133  fMVAPdfB->GetHistNBins( mvaRes->GetSize() ), minVal, maxVal );
2134 
2135 
2136  // compute sum of weights properly
2137  histMVAPdfS->Sumw2();
2138  histMVAPdfB->Sumw2();
2139 
2140  // fill histograms
2141  for (UInt_t ievt=0; ievt<mvaRes->GetSize(); ievt++) {
2142  Double_t theVal = mvaRes->GetValueVector()->at(ievt);
2143  Double_t theWeight = Data()->GetEvent(ievt)->GetWeight();
2144 
2145  if (DataInfo().IsSignal(Data()->GetEvent(ievt))) histMVAPdfS->Fill( theVal, theWeight );
2146  else histMVAPdfB->Fill( theVal, theWeight );
2147  }
2148 
2149  gTools().NormHist( histMVAPdfS );
2150  gTools().NormHist( histMVAPdfB );
2151 
2152  // momentary hack for ROOT problem
2153  if(!IsSilentFile())
2154  {
2155  histMVAPdfS->Write();
2156  histMVAPdfB->Write();
2157  }
2158  // create PDFs
2159  fMVAPdfS->BuildPDF ( histMVAPdfS );
2160  fMVAPdfB->BuildPDF ( histMVAPdfB );
2161  fMVAPdfS->ValidatePDF( histMVAPdfS );
2162  fMVAPdfB->ValidatePDF( histMVAPdfB );
2163 
2164  if (DataInfo().GetNClasses() == 2) { // TODO: this is an ugly hack.. adapt this to new framework
2165  Log() << kINFO<<Form("Dataset[%s] : ",DataInfo().GetName())
2166  << Form( "<CreateMVAPdfs> Separation from histogram (PDF): %1.3f (%1.3f)",
2167  GetSeparation( histMVAPdfS, histMVAPdfB ), GetSeparation( fMVAPdfS, fMVAPdfB ) )
2168  << Endl;
2169  }
2170 
2171  delete histMVAPdfS;
2172  delete histMVAPdfB;
2173 }
2174 
2176  // the simple one, automatically calcualtes the mvaVal and uses the
2177  // SAME sig/bkg ratio as given in the training sample (typically 50/50
2178  // .. (NormMode=EqualNumEvents) but can be different)
2179  if (!fMVAPdfS || !fMVAPdfB) {
2180  Log() << kINFO<<Form("Dataset[%s] : ",DataInfo().GetName()) << "<GetProba> MVA PDFs for Signal and Background don't exist yet, we'll create them on demand" << Endl;
2181  CreateMVAPdfs();
2182  }
2184  Double_t mvaVal = GetMvaValue(ev);
2185 
2186  return GetProba(mvaVal,sigFraction);
2187 
2188 }
2189 ////////////////////////////////////////////////////////////////////////////////
2190 /// compute likelihood ratio
2191 
2193 {
2194  if (!fMVAPdfS || !fMVAPdfB) {
2195  Log() << kWARNING <<Form("Dataset[%s] : ",DataInfo().GetName())<< "<GetProba> MVA PDFs for Signal and Background don't exist" << Endl;
2196  return -1.0;
2197  }
2198  Double_t p_s = fMVAPdfS->GetVal( mvaVal );
2199  Double_t p_b = fMVAPdfB->GetVal( mvaVal );
2200 
2201  Double_t denom = p_s*ap_sig + p_b*(1 - ap_sig);
2202 
2203  return (denom > 0) ? (p_s*ap_sig) / denom : -1;
2204 }
2205 
2206 ////////////////////////////////////////////////////////////////////////////////
2207 /// compute rarity:
2208 /// R(x) = Integrate_[-oo..x] { PDF(x') dx' }
2209 /// where PDF(x) is the PDF of the classifier's signal or background distribution
2210 
2212 {
2213  if ((reftype == Types::kSignal && !fMVAPdfS) || (reftype == Types::kBackground && !fMVAPdfB)) {
2214  Log() << kWARNING <<Form("Dataset[%s] : ",DataInfo().GetName())<< "<GetRarity> Required MVA PDF for Signal or Backgroud does not exist: "
2215  << "select option \"CreateMVAPdfs\"" << Endl;
2216  return 0.0;
2217  }
2218 
2219  PDF* thePdf = ((reftype == Types::kSignal) ? fMVAPdfS : fMVAPdfB);
2220 
2221  return thePdf->GetIntegral( thePdf->GetXmin(), mvaVal );
2222 }
2223 
2224 ////////////////////////////////////////////////////////////////////////////////
2225 /// fill background efficiency (resp. rejection) versus signal efficiency plots
2226 /// returns signal efficiency at background efficiency indicated in theString
2227 
2229 {
2230  Data()->SetCurrentType(type);
2231  Results* results = Data()->GetResults( GetMethodName(), type, Types::kClassification );
2232  std::vector<Float_t>* mvaRes = dynamic_cast<ResultsClassification*>(results)->GetValueVector();
2233 
2234  // parse input string for required background efficiency
2235  TList* list = gTools().ParseFormatLine( theString );
2236 
2237  // sanity check
2238  Bool_t computeArea = kFALSE;
2239  if (!list || list->GetSize() < 2) computeArea = kTRUE; // the area is computed
2240  else if (list->GetSize() > 2) {
2241  Log() << kFATAL <<Form("Dataset[%s] : ",DataInfo().GetName())<< "<GetEfficiency> Wrong number of arguments"
2242  << " in string: " << theString
2243  << " | required format, e.g., Efficiency:0.05, or empty string" << Endl;
2244  delete list;
2245  return -1;
2246  }
2247 
2248  // sanity check
2249  if ( results->GetHist("MVA_S")->GetNbinsX() != results->GetHist("MVA_B")->GetNbinsX() ||
2250  results->GetHist("MVA_HIGHBIN_S")->GetNbinsX() != results->GetHist("MVA_HIGHBIN_B")->GetNbinsX() ) {
2251  Log() << kFATAL <<Form("Dataset[%s] : ",DataInfo().GetName())<< "<GetEfficiency> Binning mismatch between signal and background histos" << Endl;
2252  delete list;
2253  return -1.0;
2254  }
2255 
2256  // create histograms
2257 
2258  // first, get efficiency histograms for signal and background
2259  TH1 * effhist = results->GetHist("MVA_HIGHBIN_S");
2260  Double_t xmin = effhist->GetXaxis()->GetXmin();
2261  Double_t xmax = effhist->GetXaxis()->GetXmax();
2262 
2263  TTHREAD_TLS(Double_t) nevtS;
2264 
2265  // first round ? --> create histograms
2266  if (results->DoesExist("MVA_EFF_S")==0) {
2267 
2268  // for efficiency plot
2269  TH1* eff_s = new TH1D( GetTestvarName() + "_effS", GetTestvarName() + " (signal)", fNbinsH, xmin, xmax );
2270  TH1* eff_b = new TH1D( GetTestvarName() + "_effB", GetTestvarName() + " (background)", fNbinsH, xmin, xmax );
2271  results->Store(eff_s, "MVA_EFF_S");
2272  results->Store(eff_b, "MVA_EFF_B");
2273 
2274  // sign if cut
2275  Int_t sign = (fCutOrientation == kPositive) ? +1 : -1;
2276 
2277  // this method is unbinned
2278  nevtS = 0;
2279  for (UInt_t ievt=0; ievt<Data()->GetNEvents(); ievt++) {
2280 
2281  // read the tree
2282  Bool_t isSignal = DataInfo().IsSignal(GetEvent(ievt));
2283  Float_t theWeight = GetEvent(ievt)->GetWeight();
2284  Float_t theVal = (*mvaRes)[ievt];
2285 
2286  // select histogram depending on if sig or bgd
2287  TH1* theHist = isSignal ? eff_s : eff_b;
2288 
2289  // count signal and background events in tree
2290  if (isSignal) nevtS+=theWeight;
2291 
2292  TAxis* axis = theHist->GetXaxis();
2293  Int_t maxbin = Int_t((theVal - axis->GetXmin())/(axis->GetXmax() - axis->GetXmin())*fNbinsH) + 1;
2294  if (sign > 0 && maxbin > fNbinsH) continue; // can happen... event doesn't count
2295  if (sign < 0 && maxbin < 1 ) continue; // can happen... event doesn't count
2296  if (sign > 0 && maxbin < 1 ) maxbin = 1;
2297  if (sign < 0 && maxbin > fNbinsH) maxbin = fNbinsH;
2298 
2299  if (sign > 0)
2300  for (Int_t ibin=1; ibin<=maxbin; ibin++) theHist->AddBinContent( ibin , theWeight);
2301  else if (sign < 0)
2302  for (Int_t ibin=maxbin+1; ibin<=fNbinsH; ibin++) theHist->AddBinContent( ibin , theWeight );
2303  else
2304  Log() << kFATAL <<Form("Dataset[%s] : ",DataInfo().GetName())<< "<GetEfficiency> Mismatch in sign" << Endl;
2305  }
2306 
2307  // renormalise maximum to <=1
2308  // eff_s->Scale( 1.0/TMath::Max(1.,eff_s->GetMaximum()) );
2309  // eff_b->Scale( 1.0/TMath::Max(1.,eff_b->GetMaximum()) );
2310 
2313 
2314  // background efficiency versus signal efficiency
2315  TH1* eff_BvsS = new TH1D( GetTestvarName() + "_effBvsS", GetTestvarName() + "", fNbins, 0, 1 );
2316  results->Store(eff_BvsS, "MVA_EFF_BvsS");
2317  eff_BvsS->SetXTitle( "Signal eff" );
2318  eff_BvsS->SetYTitle( "Backgr eff" );
2319 
2320  // background rejection (=1-eff.) versus signal efficiency
2321  TH1* rej_BvsS = new TH1D( GetTestvarName() + "_rejBvsS", GetTestvarName() + "", fNbins, 0, 1 );
2322  results->Store(rej_BvsS);
2323  rej_BvsS->SetXTitle( "Signal eff" );
2324  rej_BvsS->SetYTitle( "Backgr rejection (1-eff)" );
2325 
2326  // inverse background eff (1/eff.) versus signal efficiency
2327  TH1* inveff_BvsS = new TH1D( GetTestvarName() + "_invBeffvsSeff",
2328  GetTestvarName(), fNbins, 0, 1 );
2329  results->Store(inveff_BvsS);
2330  inveff_BvsS->SetXTitle( "Signal eff" );
2331  inveff_BvsS->SetYTitle( "Inverse backgr. eff (1/eff)" );
2332 
2333  // use root finder
2334  // spline background efficiency plot
2335  // note that there is a bin shift when going from a TH1D object to a TGraph :-(
2336  if (Use_Splines_for_Eff_) {
2337  fSplRefS = new TSpline1( "spline2_signal", new TGraph( eff_s ) );
2338  fSplRefB = new TSpline1( "spline2_background", new TGraph( eff_b ) );
2339 
2340  // verify spline sanity
2341  gTools().CheckSplines( eff_s, fSplRefS );
2342  gTools().CheckSplines( eff_b, fSplRefB );
2343  }
2344 
2345  // make the background-vs-signal efficiency plot
2346 
2347  // create root finder
2348  RootFinder rootFinder( this, fXmin, fXmax );
2349 
2350  Double_t effB = 0;
2351  fEffS = eff_s; // to be set for the root finder
2352  for (Int_t bini=1; bini<=fNbins; bini++) {
2353 
2354  // find cut value corresponding to a given signal efficiency
2355  Double_t effS = eff_BvsS->GetBinCenter( bini );
2356  Double_t cut = rootFinder.Root( effS );
2357 
2358  // retrieve background efficiency for given cut
2359  if (Use_Splines_for_Eff_) effB = fSplRefB->Eval( cut );
2360  else effB = eff_b->GetBinContent( eff_b->FindBin( cut ) );
2361 
2362  // and fill histograms
2363  eff_BvsS->SetBinContent( bini, effB );
2364  rej_BvsS->SetBinContent( bini, 1.0-effB );
2366  inveff_BvsS->SetBinContent( bini, 1.0/effB );
2367  }
2368 
2369  // create splines for histogram
2370  fSpleffBvsS = new TSpline1( "effBvsS", new TGraph( eff_BvsS ) );
2371 
2372  // search for overlap point where, when cutting on it,
2373  // one would obtain: eff_S = rej_B = 1 - eff_B
2374  Double_t effS = 0., rejB, effS_ = 0., rejB_ = 0.;
2375  Int_t nbins_ = 5000;
2376  for (Int_t bini=1; bini<=nbins_; bini++) {
2377 
2378  // get corresponding signal and background efficiencies
2379  effS = (bini - 0.5)/Float_t(nbins_);
2380  rejB = 1.0 - fSpleffBvsS->Eval( effS );
2381 
2382  // find signal efficiency that corresponds to required background efficiency
2383  if ((effS - rejB)*(effS_ - rejB_) < 0) break;
2384  effS_ = effS;
2385  rejB_ = rejB;
2386  }
2387 
2388  // find cut that corresponds to signal efficiency and update signal-like criterion
2389  Double_t cut = rootFinder.Root( 0.5*(effS + effS_) );
2390  SetSignalReferenceCut( cut );
2391  fEffS = 0;
2392  }
2393 
2394  // must exist...
2395  if (0 == fSpleffBvsS) {
2396  delete list;
2397  return 0.0;
2398  }
2399 
2400  // now find signal efficiency that corresponds to required background efficiency
2401  Double_t effS = 0, effB = 0, effS_ = 0, effB_ = 0;
2402  Int_t nbins_ = 1000;
2403 
2404  if (computeArea) {
2405 
2406  // compute area of rej-vs-eff plot
2407  Double_t integral = 0;
2408  for (Int_t bini=1; bini<=nbins_; bini++) {
2409 
2410  // get corresponding signal and background efficiencies
2411  effS = (bini - 0.5)/Float_t(nbins_);
2412  effB = fSpleffBvsS->Eval( effS );
2413  integral += (1.0 - effB);
2414  }
2415  integral /= nbins_;
2416 
2417  delete list;
2418  return integral;
2419  }
2420  else {
2421 
2422  // that will be the value of the efficiency retured (does not affect
2423  // the efficiency-vs-bkg plot which is done anyway.
2424  Float_t effBref = atof( ((TObjString*)list->At(1))->GetString() );
2425 
2426  // find precise efficiency value
2427  for (Int_t bini=1; bini<=nbins_; bini++) {
2428 
2429  // get corresponding signal and background efficiencies
2430  effS = (bini - 0.5)/Float_t(nbins_);
2431  effB = fSpleffBvsS->Eval( effS );
2432 
2433  // find signal efficiency that corresponds to required background efficiency
2434  if ((effB - effBref)*(effB_ - effBref) <= 0) break;
2435  effS_ = effS;
2436  effB_ = effB;
2437  }
2438 
2439  // take mean between bin above and bin below
2440  effS = 0.5*(effS + effS_);
2441 
2442  effSerr = 0;
2443  if (nevtS > 0) effSerr = TMath::Sqrt( effS*(1.0 - effS)/nevtS );
2444 
2445  delete list;
2446  return effS;
2447  }
2448 
2449  return -1;
2450 }
2451 
2452 ////////////////////////////////////////////////////////////////////////////////
2453 
2455 {
2457 
2459 
2460  // fill background efficiency (resp. rejection) versus signal efficiency plots
2461  // returns signal efficiency at background efficiency indicated in theString
2462 
2463  // parse input string for required background efficiency
2464  TList* list = gTools().ParseFormatLine( theString );
2465  // sanity check
2466 
2467  if (list->GetSize() != 2) {
2468  Log() << kFATAL <<Form("Dataset[%s] : ",DataInfo().GetName())<< "<GetTrainingEfficiency> Wrong number of arguments"
2469  << " in string: " << theString
2470  << " | required format, e.g., Efficiency:0.05" << Endl;
2471  delete list;
2472  return -1;
2473  }
2474  // that will be the value of the efficiency retured (does not affect
2475  // the efficiency-vs-bkg plot which is done anyway.
2476  Float_t effBref = atof( ((TObjString*)list->At(1))->GetString() );
2477 
2478  delete list;
2479 
2480  // sanity check
2481  if (results->GetHist("MVA_S")->GetNbinsX() != results->GetHist("MVA_B")->GetNbinsX() ||
2482  results->GetHist("MVA_HIGHBIN_S")->GetNbinsX() != results->GetHist("MVA_HIGHBIN_B")->GetNbinsX() ) {
2483  Log() << kFATAL <<Form("Dataset[%s] : ",DataInfo().GetName())<< "<GetTrainingEfficiency> Binning mismatch between signal and background histos"
2484  << Endl;
2485  return -1.0;
2486  }
2487 
2488  // create histogram
2489 
2490  // first, get efficiency histograms for signal and background
2491  TH1 * effhist = results->GetHist("MVA_HIGHBIN_S");
2492  Double_t xmin = effhist->GetXaxis()->GetXmin();
2493  Double_t xmax = effhist->GetXaxis()->GetXmax();
2494 
2495  // first round ? --> create and fill histograms
2496  if (results->DoesExist("MVA_TRAIN_S")==0) {
2497 
2498  // classifier response distributions for test sample
2499  Double_t sxmax = fXmax+0.00001;
2500 
2501  // MVA plots on the training sample (check for overtraining)
2502  TH1* mva_s_tr = new TH1D( GetTestvarName() + "_Train_S",GetTestvarName() + "_Train_S", fNbinsMVAoutput, fXmin, sxmax );
2503  TH1* mva_b_tr = new TH1D( GetTestvarName() + "_Train_B",GetTestvarName() + "_Train_B", fNbinsMVAoutput, fXmin, sxmax );
2504  results->Store(mva_s_tr, "MVA_TRAIN_S");
2505  results->Store(mva_b_tr, "MVA_TRAIN_B");
2506  mva_s_tr->Sumw2();
2507  mva_b_tr->Sumw2();
2508 
2509  // Training efficiency plots
2510  TH1* mva_eff_tr_s = new TH1D( GetTestvarName() + "_trainingEffS", GetTestvarName() + " (signal)",
2511  fNbinsH, xmin, xmax );
2512  TH1* mva_eff_tr_b = new TH1D( GetTestvarName() + "_trainingEffB", GetTestvarName() + " (background)",
2513  fNbinsH, xmin, xmax );
2514  results->Store(mva_eff_tr_s, "MVA_TRAINEFF_S");
2515  results->Store(mva_eff_tr_b, "MVA_TRAINEFF_B");
2516 
2517  // sign if cut
2518  Int_t sign = (fCutOrientation == kPositive) ? +1 : -1;
2519 
2520  std::vector<Double_t> mvaValues = GetMvaValues(0,Data()->GetNEvents());
2521  assert( (Long64_t) mvaValues.size() == Data()->GetNEvents());
2522 
2523  // this method is unbinned
2524  for (Int_t ievt=0; ievt<Data()->GetNEvents(); ievt++) {
2525 
2526  Data()->SetCurrentEvent(ievt);
2527  const Event* ev = GetEvent();
2528 
2529  Double_t theVal = mvaValues[ievt];
2530  Double_t theWeight = ev->GetWeight();
2531 
2532  TH1* theEffHist = DataInfo().IsSignal(ev) ? mva_eff_tr_s : mva_eff_tr_b;
2533  TH1* theClsHist = DataInfo().IsSignal(ev) ? mva_s_tr : mva_b_tr;
2534 
2535  theClsHist->Fill( theVal, theWeight );
2536 
2537  TAxis* axis = theEffHist->GetXaxis();
2538  Int_t maxbin = Int_t((theVal - axis->GetXmin())/(axis->GetXmax() - axis->GetXmin())*fNbinsH) + 1;
2539  if (sign > 0 && maxbin > fNbinsH) continue; // can happen... event doesn't count
2540  if (sign < 0 && maxbin < 1 ) continue; // can happen... event doesn't count
2541  if (sign > 0 && maxbin < 1 ) maxbin = 1;
2542  if (sign < 0 && maxbin > fNbinsH) maxbin = fNbinsH;
2543 
2544  if (sign > 0) for (Int_t ibin=1; ibin<=maxbin; ibin++) theEffHist->AddBinContent( ibin , theWeight );
2545  else for (Int_t ibin=maxbin+1; ibin<=fNbinsH; ibin++) theEffHist->AddBinContent( ibin , theWeight );
2546  }
2547 
2548  // normalise output distributions
2549  // uncomment those (and several others if you want unnormalized output
2550  gTools().NormHist( mva_s_tr );
2551  gTools().NormHist( mva_b_tr );
2552 
2553  // renormalise to maximum
2554  mva_eff_tr_s->Scale( 1.0/TMath::Max(std::numeric_limits<double>::epsilon(), mva_eff_tr_s->GetMaximum()) );
2555  mva_eff_tr_b->Scale( 1.0/TMath::Max(std::numeric_limits<double>::epsilon(), mva_eff_tr_b->GetMaximum()) );
2556 
2557  // Training background efficiency versus signal efficiency
2558  TH1* eff_bvss = new TH1D( GetTestvarName() + "_trainingEffBvsS", GetTestvarName() + "", fNbins, 0, 1 );
2559  // Training background rejection (=1-eff.) versus signal efficiency
2560  TH1* rej_bvss = new TH1D( GetTestvarName() + "_trainingRejBvsS", GetTestvarName() + "", fNbins, 0, 1 );
2561  results->Store(eff_bvss, "EFF_BVSS_TR");
2562  results->Store(rej_bvss, "REJ_BVSS_TR");
2563 
2564  // use root finder
2565  // spline background efficiency plot
2566  // note that there is a bin shift when going from a TH1D object to a TGraph :-(
2567  if (Use_Splines_for_Eff_) {
2568  if (fSplTrainRefS) delete fSplTrainRefS;
2569  if (fSplTrainRefB) delete fSplTrainRefB;
2570  fSplTrainRefS = new TSpline1( "spline2_signal", new TGraph( mva_eff_tr_s ) );
2571  fSplTrainRefB = new TSpline1( "spline2_background", new TGraph( mva_eff_tr_b ) );
2572 
2573  // verify spline sanity
2574  gTools().CheckSplines( mva_eff_tr_s, fSplTrainRefS );
2575  gTools().CheckSplines( mva_eff_tr_b, fSplTrainRefB );
2576  }
2577 
2578  // make the background-vs-signal efficiency plot
2579 
2580  // create root finder
2581  RootFinder rootFinder(this, fXmin, fXmax );
2582 
2583  Double_t effB = 0;
2584  fEffS = results->GetHist("MVA_TRAINEFF_S");
2585  for (Int_t bini=1; bini<=fNbins; bini++) {
2586 
2587  // find cut value corresponding to a given signal efficiency
2588  Double_t effS = eff_bvss->GetBinCenter( bini );
2589 
2590  Double_t cut = rootFinder.Root( effS );
2591 
2592  // retrieve background efficiency for given cut
2593  if (Use_Splines_for_Eff_) effB = fSplTrainRefB->Eval( cut );
2594  else effB = mva_eff_tr_b->GetBinContent( mva_eff_tr_b->FindBin( cut ) );
2595 
2596  // and fill histograms
2597  eff_bvss->SetBinContent( bini, effB );
2598  rej_bvss->SetBinContent( bini, 1.0-effB );
2599  }
2600  fEffS = 0;
2601 
2602  // create splines for histogram
2603  fSplTrainEffBvsS = new TSpline1( "effBvsS", new TGraph( eff_bvss ) );
2604  }
2605 
2606  // must exist...
2607  if (0 == fSplTrainEffBvsS) return 0.0;
2608 
2609  // now find signal efficiency that corresponds to required background efficiency
2610  Double_t effS = 0., effB, effS_ = 0., effB_ = 0.;
2611  Int_t nbins_ = 1000;
2612  for (Int_t bini=1; bini<=nbins_; bini++) {
2613 
2614  // get corresponding signal and background efficiencies
2615  effS = (bini - 0.5)/Float_t(nbins_);
2616  effB = fSplTrainEffBvsS->Eval( effS );
2617 
2618  // find signal efficiency that corresponds to required background efficiency
2619  if ((effB - effBref)*(effB_ - effBref) <= 0) break;
2620  effS_ = effS;
2621  effB_ = effB;
2622  }
2623 
2624  return 0.5*(effS + effS_); // the mean between bin above and bin below
2625 }
2626 
2627 //_______________________________________________________________________
2628 
2629 
2630 std::vector<Float_t> TMVA::MethodBase::GetMulticlassEfficiency(std::vector<std::vector<Float_t> >& purity)
2631 {
2634  if (!resMulticlass) Log() << kFATAL<<Form("Dataset[%s] : ",DataInfo().GetName())<< "unable to create pointer in GetMulticlassEfficiency, exiting."<<Endl;
2635 
2636  purity.push_back(resMulticlass->GetAchievablePur());
2637  return resMulticlass->GetAchievableEff();
2638 }
2639 
2640 //_______________________________________________________________________
2641 
2642 std::vector<Float_t> TMVA::MethodBase::GetMulticlassTrainingEfficiency(std::vector<std::vector<Float_t> >& purity)
2643 {
2646  if (!resMulticlass) Log() << kFATAL<< "unable to create pointer in GetMulticlassTrainingEfficiency, exiting."<<Endl;
2647 
2648  Log() << kINFO <<Form("Dataset[%s] : ",DataInfo().GetName())<< "Determine optimal multiclass cuts for training data..." << Endl;
2649  for (UInt_t icls = 0; icls<DataInfo().GetNClasses(); ++icls) {
2650  resMulticlass->GetBestMultiClassCuts(icls);
2651  }
2652 
2653  purity.push_back(resMulticlass->GetAchievablePur());
2654  return resMulticlass->GetAchievableEff();
2655 }
2656 
2657 
2658 ////////////////////////////////////////////////////////////////////////////////
2659 /// compute significance of mean difference
2660 /// significance = |<S> - <B>|/Sqrt(RMS_S2 + RMS_B2)
2661 
2663 {
2664  Double_t rms = sqrt( fRmsS*fRmsS + fRmsB*fRmsB );
2665 
2666  return (rms > 0) ? TMath::Abs(fMeanS - fMeanB)/rms : 0;
2667 }
2668 
2669 ////////////////////////////////////////////////////////////////////////////////
2670 /// compute "separation" defined as
2671 /// <s2> = (1/2) Int_-oo..+oo { (S(x) - B(x))^2/(S(x) + B(x)) dx }
2672 
2674 {
2675  return gTools().GetSeparation( histoS, histoB );
2676 }
2677 
2678 ////////////////////////////////////////////////////////////////////////////////
2679 /// compute "separation" defined as
2680 /// <s2> = (1/2) Int_-oo..+oo { (S(x) - B(x))^2/(S(x) + B(x)) dx }
2681 
2683 {
2684  // note, if zero pointers given, use internal pdf
2685  // sanity check first
2686  if ((!pdfS && pdfB) || (pdfS && !pdfB))
2687  Log() << kFATAL <<Form("Dataset[%s] : ",DataInfo().GetName())<< "<GetSeparation> Mismatch in pdfs" << Endl;
2688  if (!pdfS) pdfS = fSplS;
2689  if (!pdfB) pdfB = fSplB;
2690 
2691  if (!fSplS || !fSplB) {
2692  Log()<<kDEBUG<<Form("[%s] : ",DataInfo().GetName())<< "could not calculate the separation, distributions"
2693  << " fSplS or fSplB are not yet filled" << Endl;
2694  return 0;
2695  }else{
2696  return gTools().GetSeparation( *pdfS, *pdfB );
2697  }
2698 }
2699 
2700 ////////////////////////////////////////////////////////////////////////////////
2701 /// calculate the area (integral) under the ROC curve as a
2702 /// overall quality measure of the classification
2703 
2705 {
2706  // note, if zero pointers given, use internal pdf
2707  // sanity check first
2708  if ((!histS && histB) || (histS && !histB))
2709  Log() << kFATAL <<Form("Dataset[%s] : ",DataInfo().GetName())<< "<GetROCIntegral(TH1D*, TH1D*)> Mismatch in hists" << Endl;
2710 
2711  if (histS==0 || histB==0) return 0.;
2712 
2713  TMVA::PDF *pdfS = new TMVA::PDF( " PDF Sig", histS, TMVA::PDF::kSpline3 );
2714  TMVA::PDF *pdfB = new TMVA::PDF( " PDF Bkg", histB, TMVA::PDF::kSpline3 );
2715 
2716 
2717  Double_t xmin = TMath::Min(pdfS->GetXmin(), pdfB->GetXmin());
2718  Double_t xmax = TMath::Max(pdfS->GetXmax(), pdfB->GetXmax());
2719 
2720  Double_t integral = 0;
2721  UInt_t nsteps = 1000;
2722  Double_t step = (xmax-xmin)/Double_t(nsteps);
2723  Double_t cut = xmin;
2724  for (UInt_t i=0; i<nsteps; i++) {
2725  integral += (1-pdfB->GetIntegral(cut,xmax)) * pdfS->GetVal(cut);
2726  cut+=step;
2727  }
2728  return integral*step;
2729 }
2730 
2731 
2732 ////////////////////////////////////////////////////////////////////////////////
2733 /// calculate the area (integral) under the ROC curve as a
2734 /// overall quality measure of the classification
2735 
2737 {
2738  // note, if zero pointers given, use internal pdf
2739  // sanity check first
2740  if ((!pdfS && pdfB) || (pdfS && !pdfB))
2741  Log() << kFATAL <<Form("Dataset[%s] : ",DataInfo().GetName())<< "<GetSeparation> Mismatch in pdfs" << Endl;
2742  if (!pdfS) pdfS = fSplS;
2743  if (!pdfB) pdfB = fSplB;
2744 
2745  if (pdfS==0 || pdfB==0) return 0.;
2746 
2747  Double_t xmin = TMath::Min(pdfS->GetXmin(), pdfB->GetXmin());
2748  Double_t xmax = TMath::Max(pdfS->GetXmax(), pdfB->GetXmax());
2749 
2750  Double_t integral = 0;
2751  UInt_t nsteps = 1000;
2752  Double_t step = (xmax-xmin)/Double_t(nsteps);
2753  Double_t cut = xmin;
2754  for (UInt_t i=0; i<nsteps; i++) {
2755  integral += (1-pdfB->GetIntegral(cut,xmax)) * pdfS->GetVal(cut);
2756  cut+=step;
2757  }
2758  return integral*step;
2759 }
2760 
2761 ////////////////////////////////////////////////////////////////////////////////
2762 /// plot significance, S/Sqrt(S^2 + B^2), curve for given number
2763 /// of signal and background events; returns cut for maximum significance
2764 /// also returned via reference is the maximum significance
2765 
2767  Double_t BackgroundEvents,
2768  Double_t& max_significance_value ) const
2769 {
2771 
2772  Double_t max_significance(0);
2773  Double_t effS(0),effB(0),significance(0);
2774  TH1D *temp_histogram = new TH1D("temp", "temp", fNbinsH, fXmin, fXmax );
2775 
2776  if (SignalEvents <= 0 || BackgroundEvents <= 0) {
2777  Log() << kFATAL <<Form("Dataset[%s] : ",DataInfo().GetName())<< "<GetMaximumSignificance> "
2778  << "Number of signal or background events is <= 0 ==> abort"
2779  << Endl;
2780  }
2781 
2782  Log() << kINFO <<Form("Dataset[%s] : ",DataInfo().GetName())<< "Using ratio SignalEvents/BackgroundEvents = "
2783  << SignalEvents/BackgroundEvents << Endl;
2784 
2785  TH1* eff_s = results->GetHist("MVA_EFF_S");
2786  TH1* eff_b = results->GetHist("MVA_EFF_B");
2787 
2788  if ( (eff_s==0) || (eff_b==0) ) {
2789  Log() << kWARNING <<Form("Dataset[%s] : ",DataInfo().GetName())<< "Efficiency histograms empty !" << Endl;
2790  Log() << kWARNING <<Form("Dataset[%s] : ",DataInfo().GetName())<< "no maximum cut found, return 0" << Endl;
2791  return 0;
2792  }
2793 
2794  for (Int_t bin=1; bin<=fNbinsH; bin++) {
2795  effS = eff_s->GetBinContent( bin );
2796  effB = eff_b->GetBinContent( bin );
2797 
2798  // put significance into a histogram
2799  significance = sqrt(SignalEvents)*( effS )/sqrt( effS + ( BackgroundEvents / SignalEvents) * effB );
2800 
2801  temp_histogram->SetBinContent(bin,significance);
2802  }
2803 
2804  // find maximum in histogram
2805  max_significance = temp_histogram->GetBinCenter( temp_histogram->GetMaximumBin() );
2806  max_significance_value = temp_histogram->GetBinContent( temp_histogram->GetMaximumBin() );
2807 
2808  // delete
2809  delete temp_histogram;
2810 
2811  Log() << kINFO <<Form("Dataset[%s] : ",DataInfo().GetName())<< "Optimal cut at : " << max_significance << Endl;
2812  Log() << kINFO<<Form("Dataset[%s] : ",DataInfo().GetName()) << "Maximum significance: " << max_significance_value << Endl;
2813 
2814  return max_significance;
2815 }
2816 
2817 ////////////////////////////////////////////////////////////////////////////////
2818 /// calculates rms,mean, xmin, xmax of the event variable
2819 /// this can be either done for the variables as they are or for
2820 /// normalised variables (in the range of 0-1) if "norm" is set to kTRUE
2821 
2822 void TMVA::MethodBase::Statistics( Types::ETreeType treeType, const TString& theVarName,
2823  Double_t& meanS, Double_t& meanB,
2824  Double_t& rmsS, Double_t& rmsB,
2825  Double_t& xmin, Double_t& xmax )
2826 {
2827  Types::ETreeType previousTreeType = Data()->GetCurrentType();
2828  Data()->SetCurrentType(treeType);
2829 
2830  Long64_t entries = Data()->GetNEvents();
2831 
2832  // sanity check
2833  if (entries <=0)
2834  Log() << kFATAL <<Form("Dataset[%s] : ",DataInfo().GetName())<< "<CalculateEstimator> Wrong tree type: " << treeType << Endl;
2835 
2836  // index of the wanted variable
2837  UInt_t varIndex = DataInfo().FindVarIndex( theVarName );
2838 
2839  // first fill signal and background in arrays before analysis
2840  xmin = +DBL_MAX;
2841  xmax = -DBL_MAX;
2842  Long64_t nEventsS = -1;
2843  Long64_t nEventsB = -1;
2844 
2845  // take into account event weights
2846  meanS = 0;
2847  meanB = 0;
2848  rmsS = 0;
2849  rmsB = 0;
2850  Double_t sumwS = 0, sumwB = 0;
2851 
2852  // loop over all training events
2853  for (Int_t ievt = 0; ievt < entries; ievt++) {
2854 
2855  const Event* ev = GetEvent(ievt);
2856 
2857  Double_t theVar = ev->GetValue(varIndex);
2858  Double_t weight = ev->GetWeight();
2859 
2860  if (DataInfo().IsSignal(ev)) {
2861  sumwS += weight;
2862  meanS += weight*theVar;
2863  rmsS += weight*theVar*theVar;
2864  }
2865  else {
2866  sumwB += weight;
2867  meanB += weight*theVar;
2868  rmsB += weight*theVar*theVar;
2869  }
2870  xmin = TMath::Min( xmin, theVar );
2871  xmax = TMath::Max( xmax, theVar );
2872  }
2873  ++nEventsS;
2874  ++nEventsB;
2875 
2876  meanS = meanS/sumwS;
2877  meanB = meanB/sumwB;
2878  rmsS = TMath::Sqrt( rmsS/sumwS - meanS*meanS );
2879  rmsB = TMath::Sqrt( rmsB/sumwB - meanB*meanB );
2880 
2881  Data()->SetCurrentType(previousTreeType);
2882 }
2883 
2884 ////////////////////////////////////////////////////////////////////////////////
2885 /// create reader class for method (classification only at present)
2886 
2887 void TMVA::MethodBase::MakeClass( const TString& theClassFileName ) const
2888 {
2889  // the default consists of
2890  TString classFileName = "";
2891  if (theClassFileName == "")
2892  classFileName = GetWeightFileDir() + "/" + GetJobName() + "_" + GetMethodName() + ".class.C";
2893  else
2894  classFileName = theClassFileName;
2895 
2896  TString className = TString("Read") + GetMethodName();
2897 
2898  TString tfname( classFileName );
2899  Log() << kINFO //<<Form("Dataset[%s] : ",DataInfo().GetName())
2900  << "Creating standalone class: "
2901  << gTools().Color("lightblue") << classFileName << gTools().Color("reset") << Endl;
2902 
2903  std::ofstream fout( classFileName );
2904  if (!fout.good()) { // file could not be opened --> Error
2905  Log() << kFATAL << "<MakeClass> Unable to open file: " << classFileName << Endl;
2906  }
2907 
2908  // now create the class
2909  // preamble
2910  fout << "// Class: " << className << std::endl;
2911  fout << "// Automatically generated by MethodBase::MakeClass" << std::endl << "//" << std::endl;
2912 
2913  // print general information and configuration state
2914  fout << std::endl;
2915  fout << "/* configuration options =====================================================" << std::endl << std::endl;
2916  WriteStateToStream( fout );
2917  fout << std::endl;
2918  fout << "============================================================================ */" << std::endl;
2919 
2920  // generate the class
2921  fout << "" << std::endl;
2922  fout << "#include <vector>" << std::endl;
2923  fout << "#include <cmath>" << std::endl;
2924  fout << "#include <string>" << std::endl;
2925  fout << "#include <iostream>" << std::endl;
2926  fout << "" << std::endl;
2927  // now if the classifier needs to write some addicional classes for its response implementation
2928  // this code goes here: (at least the header declarations need to come before the main class
2929  this->MakeClassSpecificHeader( fout, className );
2930 
2931  fout << "#ifndef IClassifierReader__def" << std::endl;
2932  fout << "#define IClassifierReader__def" << std::endl;
2933  fout << std::endl;
2934  fout << "class IClassifierReader {" << std::endl;
2935  fout << std::endl;
2936  fout << " public:" << std::endl;
2937  fout << std::endl;
2938  fout << " // constructor" << std::endl;
2939  fout << " IClassifierReader() : fStatusIsClean( true ) {}" << std::endl;
2940  fout << " virtual ~IClassifierReader() {}" << std::endl;
2941  fout << std::endl;
2942  fout << " // return classifier response" << std::endl;
2943  fout << " virtual double GetMvaValue( const std::vector<double>& inputValues ) const = 0;" << std::endl;
2944  fout << std::endl;
2945  fout << " // returns classifier status" << std::endl;
2946  fout << " bool IsStatusClean() const { return fStatusIsClean; }" << std::endl;
2947  fout << std::endl;
2948  fout << " protected:" << std::endl;
2949  fout << std::endl;
2950  fout << " bool fStatusIsClean;" << std::endl;
2951  fout << "};" << std::endl;
2952  fout << std::endl;
2953  fout << "#endif" << std::endl;
2954  fout << std::endl;
2955  fout << "class " << className << " : public IClassifierReader {" << std::endl;
2956  fout << std::endl;
2957  fout << " public:" << std::endl;
2958  fout << std::endl;
2959  fout << " // constructor" << std::endl;
2960  fout << " " << className << "( std::vector<std::string>& theInputVars ) " << std::endl;
2961  fout << " : IClassifierReader()," << std::endl;
2962  fout << " fClassName( \"" << className << "\" )," << std::endl;
2963  fout << " fNvars( " << GetNvar() << " )," << std::endl;
2964  fout << " fIsNormalised( " << (IsNormalised() ? "true" : "false") << " )" << std::endl;
2965  fout << " { " << std::endl;
2966  fout << " // the training input variables" << std::endl;
2967  fout << " const char* inputVars[] = { ";
2968  for (UInt_t ivar=0; ivar<GetNvar(); ivar++) {
2969  fout << "\"" << GetOriginalVarName(ivar) << "\"";
2970  if (ivar<GetNvar()-1) fout << ", ";
2971  }
2972  fout << " };" << std::endl;
2973  fout << std::endl;
2974  fout << " // sanity checks" << std::endl;
2975  fout << " if (theInputVars.size() <= 0) {" << std::endl;
2976  fout << " std::cout << \"Problem in class \\\"\" << fClassName << \"\\\": empty input vector\" << std::endl;" << std::endl;
2977  fout << " fStatusIsClean = false;" << std::endl;
2978  fout << " }" << std::endl;
2979  fout << std::endl;
2980  fout << " if (theInputVars.size() != fNvars) {" << std::endl;
2981  fout << " std::cout << \"Problem in class \\\"\" << fClassName << \"\\\": mismatch in number of input values: \"" << std::endl;
2982  fout << " << theInputVars.size() << \" != \" << fNvars << std::endl;" << std::endl;
2983  fout << " fStatusIsClean = false;" << std::endl;
2984  fout << " }" << std::endl;
2985  fout << std::endl;
2986  fout << " // validate input variables" << std::endl;
2987  fout << " for (size_t ivar = 0; ivar < theInputVars.size(); ivar++) {" << std::endl;
2988  fout << " if (theInputVars[ivar] != inputVars[ivar]) {" << std::endl;
2989  fout << " std::cout << \"Problem in class \\\"\" << fClassName << \"\\\": mismatch in input variable names\" << std::endl" << std::endl;
2990  fout << " << \" for variable [\" << ivar << \"]: \" << theInputVars[ivar].c_str() << \" != \" << inputVars[ivar] << std::endl;" << std::endl;
2991  fout << " fStatusIsClean = false;" << std::endl;
2992  fout << " }" << std::endl;
2993  fout << " }" << std::endl;
2994  fout << std::endl;
2995  fout << " // initialize min and max vectors (for normalisation)" << std::endl;
2996  for (UInt_t ivar = 0; ivar < GetNvar(); ivar++) {
2997  fout << " fVmin[" << ivar << "] = " << std::setprecision(15) << GetXmin( ivar ) << ";" << std::endl;
2998  fout << " fVmax[" << ivar << "] = " << std::setprecision(15) << GetXmax( ivar ) << ";" << std::endl;
2999  }
3000  fout << std::endl;
3001  fout << " // initialize input variable types" << std::endl;
3002  for (UInt_t ivar=0; ivar<GetNvar(); ivar++) {
3003  fout << " fType[" << ivar << "] = \'" << DataInfo().GetVariableInfo(ivar).GetVarType() << "\';" << std::endl;
3004  }
3005  fout << std::endl;
3006  fout << " // initialize constants" << std::endl;
3007  fout << " Initialize();" << std::endl;
3008  fout << std::endl;
3009  if (GetTransformationHandler().GetTransformationList().GetSize() != 0) {
3010  fout << " // initialize transformation" << std::endl;
3011  fout << " InitTransform();" << std::endl;
3012  }
3013  fout << " }" << std::endl;
3014  fout << std::endl;
3015  fout << " // destructor" << std::endl;
3016  fout << " virtual ~" << className << "() {" << std::endl;
3017  fout << " Clear(); // method-specific" << std::endl;
3018  fout << " }" << std::endl;
3019  fout << std::endl;
3020  fout << " // the classifier response" << std::endl;
3021  fout << " // \"inputValues\" is a vector of input values in the same order as the " << std::endl;
3022  fout << " // variables given to the constructor" << std::endl;
3023  fout << " double GetMvaValue( const std::vector<double>& inputValues ) const;" << std::endl;
3024  fout << std::endl;
3025  fout << " private:" << std::endl;
3026  fout << std::endl;
3027  fout << " // method-specific destructor" << std::endl;
3028  fout << " void Clear();" << std::endl;
3029  fout << std::endl;
3030  if (GetTransformationHandler().GetTransformationList().GetSize()!=0) {
3031  fout << " // input variable transformation" << std::endl;
3032  GetTransformationHandler().MakeFunction(fout, className,1);
3033  fout << " void InitTransform();" << std::endl;
3034  fout << " void Transform( std::vector<double> & iv, int sigOrBgd ) const;" << std::endl;
3035  fout << std::endl;
3036  }
3037  fout << " // common member variables" << std::endl;
3038  fout << " const char* fClassName;" << std::endl;
3039  fout << std::endl;
3040  fout << " const size_t fNvars;" << std::endl;
3041  fout << " size_t GetNvar() const { return fNvars; }" << std::endl;
3042  fout << " char GetType( int ivar ) const { return fType[ivar]; }" << std::endl;
3043  fout << std::endl;
3044  fout << " // normalisation of input variables" << std::endl;
3045  fout << " const bool fIsNormalised;" << std::endl;
3046  fout << " bool IsNormalised() const { return fIsNormalised; }" << std::endl;
3047  fout << " double fVmin[" << GetNvar() << "];" << std::endl;
3048  fout << " double fVmax[" << GetNvar() << "];" << std::endl;
3049  fout << " double NormVariable( double x, double xmin, double xmax ) const {" << std::endl;
3050  fout << " // normalise to output range: [-1, 1]" << std::endl;
3051  fout << " return 2*(x - xmin)/(xmax - xmin) - 1.0;" << std::endl;
3052  fout << " }" << std::endl;
3053  fout << std::endl;
3054  fout << " // type of input variable: 'F' or 'I'" << std::endl;
3055  fout << " char fType[" << GetNvar() << "];" << std::endl;
3056  fout << std::endl;
3057  fout << " // initialize internal variables" << std::endl;
3058  fout << " void Initialize();" << std::endl;
3059  fout << " double GetMvaValue__( const std::vector<double>& inputValues ) const;" << std::endl;
3060  fout << "" << std::endl;
3061  fout << " // private members (method specific)" << std::endl;
3062 
3063  // call the classifier specific output (the classifier must close the class !)
3064  MakeClassSpecific( fout, className );
3065 
3066  fout << " inline double " << className << "::GetMvaValue( const std::vector<double>& inputValues ) const" << std::endl;
3067  fout << " {" << std::endl;
3068  fout << " // classifier response value" << std::endl;
3069  fout << " double retval = 0;" << std::endl;
3070  fout << std::endl;
3071  fout << " // classifier response, sanity check first" << std::endl;
3072  fout << " if (!IsStatusClean()) {" << std::endl;
3073  fout << " std::cout << \"Problem in class \\\"\" << fClassName << \"\\\": cannot return classifier response\"" << std::endl;
3074  fout << " << \" because status is dirty\" << std::endl;" << std::endl;
3075  fout << " retval = 0;" << std::endl;
3076  fout << " }" << std::endl;
3077  fout << " else {" << std::endl;
3078  fout << " if (IsNormalised()) {" << std::endl;
3079  fout << " // normalise variables" << std::endl;
3080  fout << " std::vector<double> iV;" << std::endl;
3081  fout << " iV.reserve(inputValues.size());" << std::endl;
3082  fout << " int ivar = 0;" << std::endl;
3083  fout << " for (std::vector<double>::const_iterator varIt = inputValues.begin();" << std::endl;
3084  fout << " varIt != inputValues.end(); varIt++, ivar++) {" << std::endl;
3085  fout << " iV.push_back(NormVariable( *varIt, fVmin[ivar], fVmax[ivar] ));" << std::endl;
3086  fout << " }" << std::endl;
3087  if (GetTransformationHandler().GetTransformationList().GetSize()!=0 &&
3090  fout << " Transform( iV, -1 );" << std::endl;
3091  }
3092  fout << " retval = GetMvaValue__( iV );" << std::endl;
3093  fout << " }" << std::endl;
3094  fout << " else {" << std::endl;
3095  if (GetTransformationHandler().GetTransformationList().GetSize()!=0 &&
3098  fout << " std::vector<double> iV;" << std::endl;
3099  fout << " int ivar = 0;" << std::endl;
3100  fout << " for (std::vector<double>::const_iterator varIt = inputValues.begin();" << std::endl;
3101  fout << " varIt != inputValues.end(); varIt++, ivar++) {" << std::endl;
3102  fout << " iV.push_back(*varIt);" << std::endl;
3103  fout << " }" << std::endl;
3104  fout << " Transform( iV, -1 );" << std::endl;
3105  fout << " retval = GetMvaValue__( iV );" << std::endl;
3106  }
3107  else {
3108  fout << " retval = GetMvaValue__( inputValues );" << std::endl;
3109  }
3110  fout << " }" << std::endl;
3111  fout << " }" << std::endl;
3112  fout << std::endl;
3113  fout << " return retval;" << std::endl;
3114  fout << " }" << std::endl;
3115 
3116  // create output for transformation - if any
3117  if (GetTransformationHandler().GetTransformationList().GetSize()!=0)
3118  GetTransformationHandler().MakeFunction(fout, className,2);
3119 
3120  // close the file
3121  fout.close();
3122 }
3123 
3124 ////////////////////////////////////////////////////////////////////////////////
3125 /// prints out method-specific help method
3126 
3128 {
3129  // if options are written to reference file, also append help info
3130  std::streambuf* cout_sbuf = std::cout.rdbuf(); // save original sbuf
3131  std::ofstream* o = 0;
3132  if (gConfig().WriteOptionsReference()) {
3133  Log() << kINFO << "Print Help message for class " << GetName() << " into file: " << GetReferenceFile() << Endl;
3134  o = new std::ofstream( GetReferenceFile(), std::ios::app );
3135  if (!o->good()) { // file could not be opened --> Error
3136  Log() << kFATAL << "<PrintHelpMessage> Unable to append to output file: " << GetReferenceFile() << Endl;
3137  }
3138  std::cout.rdbuf( o->rdbuf() ); // redirect 'std::cout' to file
3139  }
3140 
3141  // "|--------------------------------------------------------------|"
3142  if (!o) {
3143  Log() << kINFO << Endl;
3144  Log() << gTools().Color("bold")
3145  << "================================================================"
3146  << gTools().Color( "reset" )
3147  << Endl;
3148  Log() << gTools().Color("bold")
3149  << "H e l p f o r M V A m e t h o d [ " << GetName() << " ] :"
3150  << gTools().Color( "reset" )
3151  << Endl;
3152  }
3153  else {
3154  Log() << "Help for MVA method [ " << GetName() << " ] :" << Endl;
3155  }
3156 
3157  // print method-specific help message
3158  GetHelpMessage();
3159 
3160  if (!o) {
3161  Log() << Endl;
3162  Log() << "<Suppress this message by specifying \"!H\" in the booking option>" << Endl;
3163  Log() << gTools().Color("bold")
3164  << "================================================================"
3165  << gTools().Color( "reset" )
3166  << Endl;
3167  Log() << Endl;
3168  }
3169  else {
3170  // indicate END
3171  Log() << "# End of Message___" << Endl;
3172  }
3173 
3174  std::cout.rdbuf( cout_sbuf ); // restore the original stream buffer
3175  if (o) o->close();
3176 }
3177 
3178 // ----------------------- r o o t f i n d i n g ----------------------------
3179 
3180 ////////////////////////////////////////////////////////////////////////////////
3181 /// returns efficiency as function of cut
3182 
3184 {
3185  Double_t retval=0;
3186 
3187  // retrieve the class object
3188  if (Use_Splines_for_Eff_) {
3189  retval = fSplRefS->Eval( theCut );
3190  }
3191  else retval = fEffS->GetBinContent( fEffS->FindBin( theCut ) );
3192 
3193  // caution: here we take some "forbidden" action to hide a problem:
3194  // in some cases, in particular for likelihood, the binned efficiency distributions
3195  // do not equal 1, at xmin, and 0 at xmax; of course, in principle we have the
3196  // unbinned information available in the trees, but the unbinned minimization is
3197  // too slow, and we don't need to do a precision measurement here. Hence, we force
3198  // this property.
3199  Double_t eps = 1.0e-5;
3200  if (theCut-fXmin < eps) retval = (GetCutOrientation() == kPositive) ? 1.0 : 0.0;
3201  else if (fXmax-theCut < eps) retval = (GetCutOrientation() == kPositive) ? 0.0 : 1.0;
3202 
3203  return retval;
3204 }
3205 
3206 ////////////////////////////////////////////////////////////////////////////////
3207 /// returns the event collection (i.e. the dataset) TRANSFORMED using the
3208 /// classifiers specific Variable Transformation (e.g. Decorr or Decorr:Gauss:Decorr)
3209 
3211 {
3212  // if there's no variable transformation for this classifier, just hand back the
3213  // event collection of the data set
3214  if (GetTransformationHandler().GetTransformationList().GetEntries() <= 0) {
3215  return (Data()->GetEventCollection(type));
3216  }
3217 
3218  // otherwise, transform ALL the events and hand back the vector of the pointers to the
3219  // transformed events. If the pointer is already != 0, i.e. the whole thing has been
3220  // done before, I don't need to do it again, but just "hand over" the pointer to those events.
3221  Int_t idx = Data()->TreeIndex(type); //index indicating Training,Testing,... events/datasets
3222  if (fEventCollections.at(idx) == 0) {
3223  fEventCollections.at(idx) = &(Data()->GetEventCollection(type));
3225  }
3226  return *(fEventCollections.at(idx));
3227 }
3228 
3229 ////////////////////////////////////////////////////////////////////////////////
3230 /// calculates the TMVA version string from the training version code on the fly
3231 
3233 {
3234  UInt_t a = GetTrainingTMVAVersionCode() & 0xff0000; a>>=16;
3235  UInt_t b = GetTrainingTMVAVersionCode() & 0x00ff00; b>>=8;
3236  UInt_t c = GetTrainingTMVAVersionCode() & 0x0000ff;
3237 
3238  return TString(Form("%i.%i.%i",a,b,c));
3239 }
3240 
3241 ////////////////////////////////////////////////////////////////////////////////
3242 /// calculates the ROOT version string from the training version code on the fly
3243 
3245 {
3246  UInt_t a = GetTrainingROOTVersionCode() & 0xff0000; a>>=16;
3247  UInt_t b = GetTrainingROOTVersionCode() & 0x00ff00; b>>=8;
3248  UInt_t c = GetTrainingROOTVersionCode() & 0x0000ff;
3249 
3250  return TString(Form("%i.%02i/%02i",a,b,c));
3251 }
3252 
3253 ////////////////////////////////////////////////////////////////////////////////
3254 
3256  ResultsClassification* mvaRes = dynamic_cast<ResultsClassification*>
3258 
3259  if (mvaRes != NULL) {
3260  TH1D *mva_s = dynamic_cast<TH1D*> (mvaRes->GetHist("MVA_S"));
3261  TH1D *mva_b = dynamic_cast<TH1D*> (mvaRes->GetHist("MVA_B"));
3262  TH1D *mva_s_tr = dynamic_cast<TH1D*> (mvaRes->GetHist("MVA_TRAIN_S"));
3263  TH1D *mva_b_tr = dynamic_cast<TH1D*> (mvaRes->GetHist("MVA_TRAIN_B"));
3264 
3265  if ( !mva_s || !mva_b || !mva_s_tr || !mva_b_tr) return -1;
3266 
3267  if (SorB == 's' || SorB == 'S')
3268  return mva_s->KolmogorovTest( mva_s_tr, opt.Data() );
3269  else
3270  return mva_b->KolmogorovTest( mva_b_tr, opt.Data() );
3271  }
3272  return -1;
3273 }
virtual void DeclareOptions()=0
Bool_t HasMVAPdfs() const
Definition: MethodBase.h:431
Types::EAnalysisType fAnalysisType
Definition: MethodBase.h:589
Config & gConfig()
Definition: Config.cxx:43
virtual const char * GetName() const
Returns name of object.
Definition: TNamed.h:51
virtual Int_t Write(const char *name=0, Int_t option=0, Int_t bufsize=0)
Write this object to the current directory.
Definition: TObject.cxx:830
TString fMethodName
Definition: MethodBase.h:609
virtual void AddClassifierOutputProb(Types::ETreeType type)
prepare tree branch with the method&#39;s discriminating variable
Definition: MethodBase.cxx:918
virtual Int_t FindBin(Double_t x, Double_t y=0, Double_t z=0)
Return Global bin number corresponding to x,y,z.
Definition: TH1.cxx:3440
virtual void Scale(Double_t c1=1, Option_t *option="")
Multiply this histogram by a constant c1.
Definition: TH1.cxx:5936
virtual Int_t Fill(Double_t x)
Increment bin with abscissa X by 1.
Definition: TH1.cxx:3125
void WriteStateToXML(void *parent) const
general method used in writing the header of the weight files where the used variables, variable transformation type etc.
virtual void SetTuneParameters(std::map< TString, Double_t > tuneParameters)
set the tuning parameters accoding to the argument This is just a dummy .
Definition: MethodBase.cxx:638
virtual void MakeClass(const TString &classFileName=TString("")) const
create reader class for method (classification only at present)
virtual Double_t GetMaximum(Double_t maxval=FLT_MAX) const
Return maximum value smaller than maxval of bins in the range, unless the value has been overridden b...
Definition: TH1.cxx:7664
UInt_t GetNVariables() const
Definition: DataSetInfo.h:128
TXMLEngine & xmlengine()
Definition: Tools.h:278
float xmin
Definition: THbookFile.cxx:93
virtual Double_t GetBinCenter(Int_t bin) const
Return bin center for 1D histogram.
Definition: TH1.cxx:8251
virtual const std::vector< Float_t > & GetMulticlassValues()
Definition: MethodBase.h:230
#define TMVA_VERSION_CODE
Definition: Version.h:47
void SetCurrentEvent(Long64_t ievt) const
Definition: DataSet.h:113
MsgLogger & Endl(MsgLogger &ml)
Definition: MsgLogger.h:162
Bool_t GetLine(std::istream &fin, char *buf)
reads one line from the input stream checks for certain keywords and interprets the line if keywords ...
void ReadOptionsFromXML(void *node)
void ReadXML(void *pdfnode)
XML file reading.
Definition: PDF.cxx:957
long long Long64_t
Definition: RtypesCore.h:69
void AddOutput(Types::ETreeType type, Types::EAnalysisType analysisType)
VariableInfo & AddTarget(const TString &expression, const TString &title, const TString &unit, Double_t min, Double_t max, Bool_t normalized=kTRUE, void *external=0)
add a variable (can be a complex expression) to the set of variables used in the MV analysis ...
void AddPoint(Double_t x, Double_t y1, Double_t y2)
This function is used only in 2 TGraph case, and it will add new data points to graphs.
Definition: MethodBase.cxx:206
Bool_t fIgnoreNegWeightsInTraining
Definition: MethodBase.h:676
virtual const char * WorkingDirectory()
Return working directory.
Definition: TSystem.cxx:866
void ReadStateFromXML(void *parent)
std::vector< VariableInfo > & GetSpectatorInfos()
Definition: DataSetInfo.h:122
void WriteVarsToStream(std::ostream &tf, const TString &prefix="") const
write the list of variables (name, min, max) for a given data transformation method to the stream ...
virtual Double_t GetMvaValue(Double_t *errLower=0, Double_t *errUpper=0)=0
TLine * line
virtual void MakeClassSpecificHeader(std::ostream &, const TString &="") const
Definition: MethodBase.h:517
Collectable string class.
Definition: TObjString.h:32
TSpline1 * fSplTrainRefS
Definition: MethodBase.h:699
float Float_t
Definition: RtypesCore.h:53
return c
virtual Double_t GetValueForRoot(Double_t)
returns efficiency as function of cut
std::vector< TGraph * > fGraphs
Definition: MethodBase.h:114
void ReadOptionsFromStream(std::istream &istr)
read option back from the weight file
TString & ReplaceAll(const TString &s1, const TString &s2)
Definition: TString.h:635
UInt_t GetNvar() const
Definition: MethodBase.h:340
TH1 * GetSmoothedHist() const
Definition: PDF.h:103
virtual const char * GetBuildNode() const
Return the build node name.
Definition: TSystem.cxx:3777
void BuildPDF(const TH1 *theHist)
Definition: PDF.cxx:254
virtual void WriteEvaluationHistosToFile(Types::ETreeType treetype)
writes all MVA evaluation histograms to file
const TString & GetOriginalVarName(Int_t ivar) const
Definition: MethodBase.h:505
virtual std::map< TString, Double_t > OptimizeTuningParameters(TString fomType="ROCIntegral", TString fitType="FitGA")
call the Optimzier with the set of paremeters and ranges that are meant to be tuned.
Definition: MethodBase.cxx:617
TString fWeightFile
Definition: MethodBase.h:632
TString fVariableTransformTypeString
Definition: MethodBase.h:718
void PlotVariables(const std::vector< Event *> &events, TDirectory *theDirectory=0)
create histograms from the input variables
MsgLogger & Log() const
Definition: Configurable.h:128
XMLDocPointer_t NewDoc(const char *version="1.0")
creates new xml document with provided version
OptionBase * DeclareOptionRef(T &ref, const TString &name, const TString &desc="")
TransformationHandler * fTransformationPointer
Definition: MethodBase.h:665
Types::ESBType fVariableTransformType
Definition: MethodBase.h:605
virtual Double_t GetBinContent(Int_t bin) const
Return content of bin number bin.
Definition: TH1.cxx:4638
A ROOT file is a suite of consecutive data records (TKey instances) with a well defined format...
Definition: TFile.h:50
virtual Double_t Eval(Double_t x) const
returns linearly interpolated TGraph entry around x
Definition: TSpline1.cxx:61
EAnalysisType
Definition: Types.h:129
A TMultiGraph is a collection of TGraph (or derived) objects.
Definition: TMultiGraph.h:37
virtual Int_t GetQuantiles(Int_t nprobSum, Double_t *q, const Double_t *probSum=0)
Compute Quantiles for this histogram Quantile x_q of a probability distribution Function F is defined...
Definition: TH1.cxx:4190
TH1 * GetPDFHist() const
Definition: PDF.h:100
virtual int MakeDirectory(const char *name)
Make a directory.
Definition: TSystem.cxx:822
virtual TObject * Get(const char *namecycle)
Return pointer to object identified by namecycle.
void DrawProgressBar(Int_t, const TString &comment="")
draws progress bar in color or B&W caution:
Definition: Timer.cxx:186
virtual Double_t GetMean(Int_t axis=1) const
For axis = 1,2 or 3 returns the mean value of the histogram along X,Y or Z axis.
Definition: TH1.cxx:6760
Ssiz_t Index(const char *pat, Ssiz_t i=0, ECaseCompare cmp=kExact) const
Definition: TString.h:582
virtual const std::vector< Float_t > & GetRegressionValues()
Definition: MethodBase.h:224
const TString & GetReferenceFile() const
Definition: Configurable.h:108
Basic string class.
Definition: TString.h:137
static Bool_t AddDirectoryStatus()
Static function: cannot be inlined on Windows/NT.
Definition: TH1.cxx:700
tomato 1-D histogram with a float per channel (see TH1 documentation)}
Definition: TH1.h:575
void SetTrainTime(Double_t trainTime)
Definition: MethodBase.h:165
TMultiGraph * fMultiGraph
Definition: MethodBase.h:111
Double_t GetMutualInformation(const TH2F &)
Mutual Information method for non-linear correlations estimates in 2D histogram Author: Moritz Backes...
Definition: Tools.cxx:598
TransformationHandler & GetTransformationHandler(Bool_t takeReroutedIfAvailable=true)
Definition: MethodBase.h:390
Short_t Min(Short_t a, Short_t b)
Definition: TMathBase.h:170
void ToLower()
Change string to lower-case.
Definition: TString.cxx:1089
virtual Double_t GetKSTrainingVsTest(Char_t SorB, TString opt="X")
int Int_t
Definition: RtypesCore.h:41
virtual void SetYTitle(const char *title)
Definition: TH1.h:414
virtual TDirectory * mkdir(const char *name, const char *title="")
Create a sub-directory and return a pointer to the created directory.
Definition: TDirectory.cxx:957
bool Bool_t
Definition: RtypesCore.h:59
TArc * a
Definition: textangle.C:12
const Bool_t kFALSE
Definition: Rtypes.h:92
virtual void TestMulticlass()
test multiclass classification
TString fJobName
Definition: MethodBase.h:608
TString GetTrainingROOTVersionString() const
calculates the ROOT version string from the training version code on the fly
UInt_t GetNClasses() const
Definition: DataSetInfo.h:154
TSpline1 * fSplRefB
Definition: MethodBase.h:697
UInt_t GetNTargets() const
Definition: MethodBase.h:342
TSpline1 * fSplRefS
Definition: MethodBase.h:696
const std::vector< Event * > & GetEventCollection(Types::ETreeType type=Types::kMaxTreeType) const
Definition: DataSet.h:239
std::vector< TString > * fInputVars
Definition: MethodBase.h:582
virtual void GetRegressionDeviation(UInt_t tgtNum, Types::ETreeType type, Double_t &stddev, Double_t &stddev90Percent) const
Definition: MethodBase.cxx:718
#define ROOT_VERSION_CODE
Definition: RVersion.h:21
void ReadTargetsFromXML(void *tarnode)
read target info from XML
virtual void Init()=0
virtual Double_t GetMaximumSignificance(Double_t SignalEvents, Double_t BackgroundEvents, Double_t &optimal_significance_value) const
plot significance, S/Sqrt(S^2 + B^2), curve for given number of signal and background events; returns...
void AddInfoItem(void *gi, const TString &name, const TString &value) const
xml writing
TString GetElapsedTime(Bool_t Scientific=kTRUE)
Definition: Timer.cxx:129
void AddAttr(void *node, const char *, const T &value, Int_t precision=16)
Definition: Tools.h:309
Double_t fTrainTime
Definition: MethodBase.h:689
virtual Double_t GetROCIntegral(TH1D *histS, TH1D *histB) const
calculate the area (integral) under the ROC curve as a overall quality measure of the classification ...
TDirectory * MethodBaseDir() const
returns the ROOT directory where all instances of the corresponding MVA method are stored ...
Double_t GetTrainingSumSignalWeights()
void * AddChild(void *parent, const char *childname, const char *content=0, bool isRootNode=false)
add child node
Definition: Tools.cxx:1134
Double_t fTestTime
Definition: MethodBase.h:690
void FreeDoc(XMLDocPointer_t xmldoc)
frees allocated document data and deletes document itself
Short_t Abs(Short_t d)
Definition: TMathBase.h:110
MsgLogger * fLogger
Definition: Configurable.h:134
Double_t GetTrainTime() const
Definition: MethodBase.h:166
virtual Bool_t IsSignalLike()
uses a pre-set cut on the MVA output (SetSignalReferenceCut and SetSignalReferenceCutOrientation) for...
Definition: MethodBase.cxx:837
void CreateMVAPdfs()
Create PDFs of the MVA output variables.
static void AddDirectory(Bool_t add=kTRUE)
Sets the flag controlling the automatic add of histograms in memory.
Definition: TH1.cxx:1218
void ReadVariablesFromXML(void *varnode)
read variable info from XML
const TString & GetExpression() const
Definition: VariableInfo.h:65
static TFile * Open(const char *name, Option_t *option="", const char *ftitle="", Int_t compress=1, Int_t netopt=0)
Create / open a file.
Definition: TFile.cxx:3907
const TString & GetWeightFileDir() const
Definition: MethodBase.h:486
UInt_t fSignalClass
Definition: MethodBase.h:683
void WriteStateToFile() const
write options and weights to file note that each one text file for the main configuration information...
double sqrt(double)
static void SetIsTraining(Bool_t)
when this static function is called, it sets the flag whether events with negative event weight shoul...
Definition: Event.cxx:388
Tools & gTools()
Definition: Tools.cxx:79
Double_t GetXmin() const
Definition: TAxis.h:139
char GetVarType() const
Definition: VariableInfo.h:69
TString GetTrainingTMVAVersionString() const
calculates the TMVA version string from the training version code on the fly
TStopwatch timer
Definition: pirndm.C:37
Double_t x[n]
Definition: legend1.C:17
DataSetInfo & fDataSetInfo
Definition: MethodBase.h:601
UInt_t TreeIndex(Types::ETreeType type) const
Definition: DataSet.h:204
static TString Format(const char *fmt,...)
Static method which formats a string using a printf style format descriptor and return a TString...
Definition: TString.cxx:2335
ECutOrientation fCutOrientation
Definition: MethodBase.h:693
virtual ~MethodBase()
destructor
Definition: MethodBase.cxx:361
void DocSetRootElement(XMLDocPointer_t xmldoc, XMLNodePointer_t xmlnode)
set main (root) node for document
UInt_t GetTrainingTMVAVersionCode() const
Definition: MethodBase.h:385
virtual std::vector< Double_t > GetMvaValues(Long64_t firstEvt=0, Long64_t lastEvt=-1, Bool_t logProgress=false)
get all the MVA values for the events of the current Data type
Definition: MethodBase.cxx:877
void AddXMLTo(void *parent=0) const
XML node describing the transformation return;.
const Event * GetEvent() const
Definition: MethodBase.h:745
MethodBase(const TString &jobName, Types::EMVA methodType, const TString &methodTitle, DataSetInfo &dsi, const TString &theOption="")
standard constructur
Definition: MethodBase.cxx:234
DataSet * Data() const
Definition: MethodBase.h:405
void ClearGraphs()
This function sets the point number to 0 for all graphs.
Definition: MethodBase.cxx:194
Bool_t CheckSplines(const TH1 *, const TSpline *)
Definition: Tools.cxx:487
void * GetChild(void *parent, const char *childname=0)
get child node
Definition: Tools.cxx:1158
void ReadStateFromFile()
Function to write options and weights to file.
Types::ETreeType GetCurrentType() const
Definition: DataSet.h:217
std::vector< Float_t > * GetValueVector()
bool BeginsWith(const std::string &theString, const std::string &theSubstring)
void SetCallerName(const TString &name)
~IPythonInteractive()
standard destructor
Definition: MethodBase.cxx:158
virtual void AddClassifierOutput(Types::ETreeType type)
prepare tree branch with the method&#39;s discriminating variable
Definition: MethodBase.cxx:851
virtual Double_t GetRarity(Double_t mvaVal, Types::ESBType reftype=Types::kBackground) const
compute rarity: R(x) = Integrate_[-oo..x] { PDF(x&#39;) dx&#39; } where PDF(x) is the PDF of the classifier&#39;s...
void PrintHelpMessage() const
prints out method-specific help method
Double_t fMeanB
Definition: MethodBase.h:656
void ReadClassesFromXML(void *clsnode)
read number of classes from XML
Bool_t EndsWith(const char *pat, ECaseCompare cmp=kExact) const
Return true if string ends with the specified string.
Definition: TString.cxx:2221
IONames & GetIONames()
Definition: Config.h:78
Double_t NormHist(TH1 *theHist, Double_t norm=1.0)
normalises histogram
Definition: Tools.cxx:395
virtual void ParseOptions()
options parser
Double_t GetXmin(Int_t ivar) const
Definition: MethodBase.h:352
void SetupMethod()
setup of methods
Definition: MethodBase.cxx:403
void Init(std::vector< TString > &graphTitles)
This function gets some title and it creates a TGraph for every title.
Definition: MethodBase.cxx:171
DataSetInfo & DataInfo() const
Definition: MethodBase.h:406
void SetOptions(const TString &s)
Definition: Configurable.h:91
virtual UserGroup_t * GetUserInfo(Int_t uid)
Returns all user info in the UserGroup_t structure.
Definition: TSystem.cxx:1564
Bool_t DoRegression() const
Definition: MethodBase.h:434
XMLDocPointer_t ParseString(const char *xmlstring)
parses content of string and tries to produce xml structures
void SetMinType(EMsgType minType)
Definition: MsgLogger.h:76
TString fTestvar
Definition: MethodBase.h:611
Ssiz_t First(char c) const
Find first occurrence of a character c.
Definition: TString.cxx:467
Bool_t DoesExist(const TString &alias) const
Definition: Results.cxx:118
TFile * GetFile() const
Definition: MethodBase.h:366
virtual void ProcessOptions()=0
virtual Double_t GetProba(const Event *ev)
Definition: PDF.h:71
TH1F * h1
Definition: legend1.C:5
TSpline * fSpleffBvsS
Definition: MethodBase.h:646
virtual void AddBinContent(Int_t bin)
Increment bin content by 1.
Definition: TH1.cxx:1193
Double_t GetWeight() const
return the event weight - depending on whether the flag IgnoreNegWeightsInTraining is or not...
Definition: Event.cxx:378
Long64_t GetNTrainingEvents() const
Definition: DataSet.h:93
std::vector< VariableInfo > & GetTargetInfos()
Definition: DataSetInfo.h:117
virtual Double_t GetEfficiency(const TString &, Types::ETreeType, Double_t &err)
fill background efficiency (resp.
void CreateVariableTransforms(const TString &trafoDefinition, TMVA::DataSetInfo &dataInfo, TMVA::TransformationHandler &transformationHandler, TMVA::MsgLogger &log)
Bool_t fModelPersistence
Definition: MethodBase.h:627
virtual std::vector< Float_t > GetMulticlassEfficiency(std::vector< std::vector< Float_t > > &purity)
Double_t Root(Double_t refValue)
Root finding using Brents algorithm; taken from CERNLIB function RZERO.
Definition: RootFinder.cxx:68
virtual void AddWeightsXMLTo(void *parent) const =0
UInt_t fTMVATrainingVersion
Definition: MethodBase.h:612
UInt_t GetNEvents() const
temporary event when testing on a different DataSet than the own one
Definition: MethodBase.h:413
const std::vector< Event * > * CalcTransformations(const std::vector< Event *> &, Bool_t createNewVector=kFALSE)
computation of transformation
A doubly linked list.
Definition: TList.h:47
Double_t GetXmax(Int_t ivar) const
Definition: MethodBase.h:353
TransformationHandler fTransformation
Definition: MethodBase.h:666
void ReadStateFromXMLString(const char *xmlstr)
for reading from memory
Bool_t DoMulticlass() const
Definition: MethodBase.h:435
virtual Double_t KolmogorovTest(const TH1 *h2, Option_t *option="") const
Statistical test of compatibility in shape between this histogram and h2, using Kolmogorov test...
Definition: TH1.cxx:7359
virtual void MakeClassSpecific(std::ostream &, const TString &="") const
Definition: MethodBase.h:514
virtual void ReadWeightsFromXML(void *wghtnode)=0
const int nEvents
Definition: testRooFit.cxx:42
Int_t GetHistNBins(Int_t evtNum=0)
Definition: PDF.cxx:298
void SaveDoc(XMLDocPointer_t xmldoc, const char *filename, Int_t layout=1)
store document content to file if layout<=0, no any spaces or newlines will be placed between xmlnode...
TString fWeightFileExtension
Definition: Config.h:101
TString fUser
Definition: TSystem.h:152
virtual void Train()=0
void * GetExternalLink() const
Definition: VariableInfo.h:89
Float_t GetTarget(UInt_t itgt) const
Definition: Event.h:104
void WriteStateToStream(std::ostream &tf) const
general method used in writing the header of the weight files where the used variables, variable transformation type etc.
Double_t fRmsB
Definition: MethodBase.h:658
Double_t fXmin
Definition: MethodBase.h:659
Results * GetResults(const TString &, Types::ETreeType type, Types::EAnalysisType analysistype)
TString info(resultsName+"/"); switch(type) { case Types::kTraining: info += "kTraining/"; break; cas...
Definition: DataSet.cxx:286
TSpline1 * fSplTrainRefB
Definition: MethodBase.h:700
TRandom2 r(17)
Class to manage histogram axis.
Definition: TAxis.h:36
R__EXTERN TSystem * gSystem
Definition: TSystem.h:549
TDirectory * fMethodBaseDir
Definition: MethodBase.h:620
SVector< double, 2 > v
Definition: Dict.h:5
UInt_t fROOTTrainingVersion
Definition: MethodBase.h:613
const char * GetName() const
Definition: MethodBase.h:330
ClassInfo * GetClassInfo(Int_t clNum) const
void ReadVarsFromStream(std::istream &istr)
Read the variables (name, min, max) for a given data transformation method from the stream...
void AddClassesXMLTo(void *parent) const
write class info to XML
virtual void ReadTransformationFromStream(std::istream &istr, const TString &classname="")=0
const Int_t NBIN_HIST_HIGH
Definition: MethodBase.cxx:137
tomato 2-D histogram with a float per channel (see TH1 documentation)}
Definition: TH2.h:255
class TMVA::Config::VariablePlotting fVariablePlotting
void Statistics(Types::ETreeType treeType, const TString &theVarName, Double_t &, Double_t &, Double_t &, Double_t &, Double_t &, Double_t &)
calculates rms,mean, xmin, xmax of the event variable this can be either done for the variables as th...
Bool_t BeginsWith(const char *s, ECaseCompare cmp=kExact) const
Definition: TString.h:558
virtual void SetBinContent(Int_t bin, Double_t content)
Set bin content see convention for numbering bins in TH1::GetBin In case the bin number is greater th...
Definition: TH1.cxx:8323
Float_t GetAchievablePur(UInt_t cls)
void SetReadingVersion(UInt_t rv)
Definition: PDF.h:127
void SetValue(Float_t value, Int_t ievt)
set MVA response
UInt_t GetTrainingROOTVersionCode() const
Definition: MethodBase.h:386
unsigned int UInt_t
Definition: RtypesCore.h:42
Double_t fMeanS
Definition: MethodBase.h:655
char * Form(const char *fmt,...)
Ssiz_t Length() const
Definition: TString.h:390
void ReadFromXML(void *varnode)
read VariableInfo from stream
Bool_t Help() const
Definition: MethodBase.h:498
Int_t fNsmoothMVAPdf
Definition: MethodBase.h:721
Bool_t fTxtWeightsOnly
Definition: MethodBase.h:719
TSubString Strip(EStripType s=kTrailing, char c=' ') const
Return a substring of self stripped at beginning and/or end.
Definition: TString.cxx:1070
const TString & GetJobName() const
Definition: MethodBase.h:326
Double_t GetXmin() const
Definition: PDF.h:112
const TString & GetMethodName() const
Definition: MethodBase.h:327
TDirectory * fBaseDir
Definition: MethodBase.h:619
UInt_t GetNSpectators(bool all=kTRUE) const
virtual Double_t Eval(Double_t x) const =0
void ReadAttr(void *node, const char *, T &value)
Definition: Tools.h:296
Bool_t fHasMVAPdfs
Definition: MethodBase.h:674
TSpline * fSplTrainEffBvsS
Definition: MethodBase.h:650
float xmax
Definition: THbookFile.cxx:93
virtual TObject * At(Int_t idx) const
Returns the object at position idx. Returns 0 if idx is out of range.
Definition: TList.cxx:311
void DeclareBaseOptions()
define the options (their key words) that can be set in the option string here the options valid for ...
Definition: MethodBase.cxx:503
void BuildTransformationFromVarInfo(const std::vector< TMVA::VariableInfo > &var)
this method is only used when building a normalization transformation from old text files in this cas...
tomato 1-D histogram with a double per channel (see TH1 documentation)}
Definition: TH1.h:618
Bool_t IsSilentFile()
Definition: MethodBase.h:375
virtual Double_t GetSignificance() const
compute significance of mean difference significance = |<S> - |/Sqrt(RMS_S2 + RMS_B2) ...
void MakeFunction(std::ostream &fout, const TString &fncName, Int_t part) const
create transformation function
TString GetWeightFileName() const
retrieve weight file name
Double_t GetSignalReferenceCutOrientation() const
Definition: MethodBase.h:357
REAL epsilon
Definition: triangle.c:617
void ProcessBaseOptions()
the option string is decoded, for availabel options see "DeclareOptions"
Definition: MethodBase.cxx:534
Double_t ElapsedSeconds(void)
computes elapsed tim in seconds
Definition: Timer.cxx:122
Int_t FindVarIndex(const TString &) const
find variable by name
UInt_t GetNVariables() const
Definition: MethodBase.h:341
std::vector< const std::vector< TMVA::Event * > * > fEventCollections
Definition: MethodBase.h:702
void AddSpectatorsXMLTo(void *parent) const
write spectator info to XML
Float_t GetValue(UInt_t ivar) const
return value of i&#39;th variable
Definition: Event.cxx:233
TString fVerbosityLevelString
Definition: MethodBase.h:671
Double_t fRmsS
Definition: MethodBase.h:657
UInt_t fBackgroundClass
Definition: MethodBase.h:684
void DeclareOptions()
define the options (their key words) that can be set in the option string know options: PDFInterpol[i...
Definition: PDF.cxx:816
TList * GetStorage() const
Definition: Results.h:79
static void SetIgnoreNegWeightsInTraining(Bool_t)
when this static function is called, it sets the flag whether events with negative event weight shoul...
Definition: Event.cxx:397
int Ssiz_t
Definition: RtypesCore.h:63
XMLDocPointer_t ParseFile(const char *filename, Int_t maxbuf=100000)
Parses content of file and tries to produce xml structures.
const std::vector< TMVA::Event * > & GetEventCollection(Types::ETreeType type)
returns the event collection (i.e.
virtual void CheckSetup()
check may be overridden by derived class (sometimes, eg, fitters are used which can only be implement...
Definition: MethodBase.cxx:430
#define ClassImp(name)
Definition: Rtypes.h:279
void SetTestTime(Double_t testTime)
Definition: MethodBase.h:169
virtual void AddRegressionOutput(Types::ETreeType type)
prepare tree branch with the method&#39;s discriminating variable
Definition: MethodBase.cxx:738
double Double_t
Definition: RtypesCore.h:55
std::vector< Double_t > GetBestMultiClassCuts(UInt_t targetClass)
void SetWeightFileName(TString)
set the weight file name (depreciated)
virtual Double_t GetSeparation(TH1 *, TH1 *) const
compute "separation" defined as <s2> = (1/2) Int_-oo..+oo { (S(x) - B(x))^2/(S(x) + B(x)) dx } ...
Double_t GetXmax() const
Definition: PDF.h:113
Describe directory structure in memory.
Definition: TDirectory.h:44
std::vector< Float_t > * fMulticlassReturnVal
Definition: MethodBase.h:592
Bool_t IsNormalised() const
Definition: MethodBase.h:490
Double_t GetTrainingSumBackgrWeights()
TH1 * GetHist(const TString &alias) const
Definition: Results.cxx:127
int type
Definition: TGX11.cxx:120
static RooMathCoreReg dummy
virtual void GetHelpMessage() const =0
void * GetNextChild(void *prevchild, const char *childname=0)
XML helpers.
Definition: Tools.cxx:1170
void SetCurrentType(Types::ETreeType type) const
Definition: DataSet.h:114
void AddVarsXMLTo(void *parent) const
write variable info to XML
Bool_t Contains(const char *pat, ECaseCompare cmp=kExact) const
Definition: TString.h:567
The TH1 histogram class.
Definition: TH1.h:80
const Bool_t Use_Splines_for_Eff_
Definition: MethodBase.cxx:134
const char * AsString() const
Return the date & time as a string (ctime() format).
Definition: TDatime.cxx:101
VariableInfo & GetVariableInfo(Int_t i)
Definition: DataSetInfo.h:114
void AddPreDefVal(const T &)
Definition: Configurable.h:174
IPythonInteractive()
standard constructur
Definition: MethodBase.cxx:150
virtual void WriteMonitoringHistosToFile() const
write special monitoring histograms to file dummy implementation here --------------— ...
ClassInfo * AddClass(const TString &className)
void AddXMLTo(void *parent)
XML file writing.
Definition: PDF.cxx:915
UInt_t GetNumber() const
Definition: ClassInfo.h:73
Bool_t fConstructedFromWeightFile
Definition: MethodBase.h:614
virtual const char * GetName() const
Returns name of object.
Definition: DataSetInfo.h:85
void ProcessSetup()
process all options the "CheckForUnusedOptions" is done in an independent call, since it may be overr...
Definition: MethodBase.cxx:420
TString fVarTransformString
Definition: MethodBase.h:663
virtual void AddMulticlassOutput(Types::ETreeType type)
prepare tree branch with the method&#39;s discriminating variable
Definition: MethodBase.cxx:777
void ComputeStat(const std::vector< TMVA::Event *> &, std::vector< Float_t > *, Double_t &, Double_t &, Double_t &, Double_t &, Double_t &, Double_t &, Int_t signalClass, Bool_t norm=kFALSE)
sanity check
Definition: Tools.cxx:215
const TString & GetOptions() const
Definition: Configurable.h:90
const TString & Color(const TString &)
human readable color strings
Definition: Tools.cxx:837
void SetConfigName(const char *n)
Definition: Configurable.h:69
void ValidatePDF(TH1 *original=0) const
comparison of original histogram with reference PDF
Definition: PDF.cxx:576
Float_t GetAchievableEff(UInt_t cls)
void SetSource(const std::string &source)
Definition: MsgLogger.h:74
Types::EMVA fMethodType
Definition: MethodBase.h:610
char Char_t
Definition: RtypesCore.h:29
virtual std::vector< Float_t > GetMulticlassTrainingEfficiency(std::vector< std::vector< Float_t > > &purity)
Ranking * fRanking
Definition: MethodBase.h:581
virtual void SetXTitle(const char *title)
Definition: TH1.h:413
virtual void TestRegression(Double_t &bias, Double_t &biasT, Double_t &dev, Double_t &devT, Double_t &rms, Double_t &rmsT, Double_t &mInf, Double_t &mInfT, Double_t &corr, Types::ETreeType type)
calculate <sum-of-deviation-squared> of regression output versus "true" value from test sample ...
Definition: MethodBase.cxx:960
virtual Bool_t cd(const char *path=0)
Change current directory to "this" directory.
Definition: TDirectory.cxx:435
void ReadFromStream(std::istream &istr)
TDirectory * BaseDir() const
returns the ROOT directory where info/histograms etc of the corresponding MVA method instance are sto...
TString GetMethodTypeName() const
Definition: MethodBase.h:328
virtual void DeclareCompatibilityOptions()
options that are used ONLY for the READER to ensure backward compatibility they are hence without any...
Definition: MethodBase.cxx:590
void ProcessOptions()
Definition: PDF.cxx:866
Short_t Max(Short_t a, Short_t b)
Definition: TMathBase.h:202
void AddToXML(void *varnode)
write class to XML
Double_t fSignalReferenceCut
the data set information (sometimes needed)
Definition: MethodBase.h:603
const Event * fTmpEvent
Definition: MethodBase.h:408
void SetWeightFileDir(TString fileDir)
set directory of weight file
XMLNodePointer_t DocGetRootElement(XMLDocPointer_t xmldoc)
returns root node of document
Double_t GetSignalReferenceCut() const
Definition: MethodBase.h:356
virtual void Sumw2(Bool_t flag=kTRUE)
Create structure to store sum of squares of weights.
Definition: TH1.cxx:8130
A Graph is a graphics object made of two arrays X and Y with npoints each.
Definition: TGraph.h:53
TH1 * GetOriginalHist() const
Definition: PDF.h:102
virtual TDirectory * GetDirectory(const char *namecycle, Bool_t printError=false, const char *funcname="GetDirectory")
Find a directory using apath.
Definition: TDirectory.cxx:338
you should not use this method at all Int_t Int_t Double_t Double_t Double_t Int_t Double_t Double_t Double_t Double_t b
Definition: TRolke.cxx:630
TList * ParseFormatLine(TString theString, const char *sep=":")
Parse the string and cut into labels separated by ":".
Definition: Tools.cxx:413
#define NULL
Definition: Rtypes.h:82
Int_t fNbinsMVAoutput
Definition: MethodBase.h:586
THist< 1, double, THistStatContent, THistStatUncertainty > TH1D
Definition: THist.hxx:301
Long64_t GetNEvents(Types::ETreeType type=Types::kMaxTreeType) const
Definition: DataSet.h:229
virtual Double_t GetTrainingEfficiency(const TString &)
void AddOptionsXMLTo(void *parent) const
write options to XML file
Bool_t fSilentFile
Definition: MethodBase.h:625
Double_t GetIntegral(Double_t xmin, Double_t xmax)
computes PDF integral within given ranges
Definition: PDF.cxx:652
Double_t fXmax
Definition: MethodBase.h:660
void ReadSpectatorsFromXML(void *specnode)
read spectator info from XML
Int_t Atoi() const
Return integer value of string.
Definition: TString.cxx:1965
Bool_t IsSignal(const Event *ev) const
std::vector< Bool_t > * GetValueVectorTypes()
ECutOrientation GetCutOrientation() const
Definition: MethodBase.h:546
void InitBase()
default initialization called by all constructors
Definition: MethodBase.cxx:438
std::vector< Float_t > * fRegressionReturnVal
Definition: MethodBase.h:591
Types::EAnalysisType GetAnalysisType() const
Definition: MethodBase.h:433
void AddTargetsXMLTo(void *parent) const
write target info to XML
void Store(TObject *obj, const char *alias=0)
Definition: Results.cxx:83
virtual Int_t GetNbinsX() const
Definition: TH1.h:301
virtual void Add(TGraph *graph, Option_t *chopt="")
Add a new graph to the list of graphs.
Double_t Sqrt(Double_t x)
Definition: TMath.h:464
const TString & GetTestvarName() const
Definition: MethodBase.h:331
virtual Int_t GetSize() const
Definition: TCollection.h:95
virtual void ReadWeightsFromStream(std::istream &)=0
const Bool_t kTRUE
Definition: Rtypes.h:91
Int_t Fill(Double_t)
Invalid Fill method.
Definition: TH2.cxx:292
THist< 2, float, THistStatContent, THistStatUncertainty > TH2F
Definition: THist.hxx:308
void SetTestvarName(const TString &v="")
Definition: MethodBase.h:337
TString fFileDir
Definition: MethodBase.h:631
double norm(double *x, double *p)
Definition: unuranDistr.cxx:40
Types::EMVA GetMethodType() const
Definition: MethodBase.h:329
Double_t GetXmax() const
Definition: TAxis.h:140
void CheckForUnusedOptions() const
checks for unused options in option string
virtual Int_t GetMaximumBin() const
Return location of bin with maximum value in the range.
Definition: TH1.cxx:7694
virtual void TestClassification()
initialization
void ReadStateFromStream(std::istream &tf)
read the header from the weight files of the different MVA methods
virtual Int_t Write(const char *name=0, Int_t option=0, Int_t bufsize=0)
Write all objects in this collection.
const Event * GetEvent() const
Definition: DataSet.cxx:211
std::vector< VariableInfo > & GetVariableInfos()
Definition: DataSetInfo.h:112
void SetExternalLink(void *p)
Definition: VariableInfo.h:81
virtual void SetAnalysisType(Types::EAnalysisType type)
Definition: MethodBase.h:432
char name[80]
Definition: TGX11.cxx:109
Bool_t fSetupCompleted
Definition: MethodBase.h:705
TAxis * GetXaxis()
Definition: TH1.h:324
void NoErrorCalc(Double_t *const err, Double_t *const errUpper)
Definition: MethodBase.cxx:819
void SetSignalReferenceCut(Double_t cut)
Definition: MethodBase.h:360
void WriteOptionsToStream(std::ostream &o, const TString &prefix) const
write options to output stream (e.g. in writing the MVA weight files
void Resize(Ssiz_t n)
Resize the string. Truncate or add blanks as necessary.
Definition: TString.cxx:1059
This class stores the date and time with a precision of one second in an unsigned 32 bit word (950130...
Definition: TDatime.h:39
Double_t GetVal(Double_t x) const
returns value PDF(x)
Definition: PDF.cxx:699
VariableTransformBase * AddTransformation(VariableTransformBase *, Int_t cls)
void SetConfigDescription(const char *d)
Definition: Configurable.h:70
Double_t GetSeparation(TH1 *S, TH1 *B) const
compute "separation" defined as <s2> = (1/2) Int_-oo..+oo { (S(x) - B(x))^2/(S(x) + B(x)) dx } ...
Definition: Tools.cxx:136
virtual void Close(Option_t *option="")
Close a file.
Definition: TFile.cxx:904
const char * Data() const
Definition: TString.h:349