Logo ROOT   6.14/05
Reference Guide
MethodBase.h
Go to the documentation of this file.
1 // @(#)root/tmva $Id$
2 // Author: Andreas Hoecker, Peter Speckmayer, Joerg Stelzer, Helge Voss, Kai Voss, Eckhard von Toerne, Jan Therhaag
3 
4 /**********************************************************************************
5  * Project: TMVA - a Root-integrated toolkit for multivariate data analysis *
6  * Package: TMVA *
7  * Class : MethodBase *
8  * Web : http://tmva.sourceforge.net *
9  * *
10  * Description: *
11  * Virtual base class for all MVA method *
12  * *
13  * Authors (alphabetical): *
14  * Andreas Hoecker <Andreas.Hocker@cern.ch> - CERN, Switzerland *
15  * Peter Speckmayer <peter.speckmayer@cern.ch> - CERN, Switzerland *
16  * Joerg Stelzer <Joerg.Stelzer@cern.ch> - CERN, Switzerland *
17  * Jan Therhaag <Jan.Therhaag@cern.ch> - U of Bonn, Germany *
18  * Eckhard v. Toerne <evt@uni-bonn.de> - U of Bonn, Germany *
19  * Helge Voss <Helge.Voss@cern.ch> - MPI-K Heidelberg, Germany *
20  * Kai Voss <Kai.Voss@cern.ch> - U. of Victoria, Canada *
21  * *
22  * Copyright (c) 2005-2011: *
23  * CERN, Switzerland *
24  * U. of Victoria, Canada *
25  * MPI-K Heidelberg, Germany *
26  * U. of Bonn, Germany *
27  * *
28  * Redistribution and use in source and binary forms, with or without *
29  * modification, are permitted according to the terms listed in LICENSE *
30  * (http://tmva.sourceforge.net/LICENSE) *
31  **********************************************************************************/
32 
33 #ifndef ROOT_TMVA_MethodBase
34 #define ROOT_TMVA_MethodBase
35 
36 //////////////////////////////////////////////////////////////////////////
37 // //
38 // MethodBase //
39 // //
40 // Virtual base class for all TMVA method //
41 // //
42 //////////////////////////////////////////////////////////////////////////
43 
44 #include <iosfwd>
45 #include <vector>
46 #include <map>
47 #include "assert.h"
48 
49 #include "TString.h"
50 
51 #include "TMVA/IMethod.h"
52 #include "TMVA/Configurable.h"
53 #include "TMVA/Types.h"
54 #include "TMVA/DataSet.h"
55 #include "TMVA/Event.h"
57 #include <TMVA/Results.h>
58 
59 #include <TFile.h>
60 
61 class TGraph;
62 class TTree;
63 class TDirectory;
64 class TSpline;
65 class TH1F;
66 class TH1D;
67 class TMultiGraph;
68 
69 /*! \class TMVA::IPythonInteractive
70 \ingroup TMVA
71 
72 This class is needed by JsMVA, and it's a helper class for tracking errors during
73 the training in Jupyter notebook. It’s only initialized in Jupyter notebook context.
74 In initialization we specify some title, and a TGraph will be created for every title.
75 We can add new data points easily to all TGraphs. These graphs are added to a
76 TMultiGraph, and during an interactive training we get this TMultiGraph object
77 and plot it with JsROOT.
78 */
79 
80 namespace TMVA {
81 
82  class Ranking;
83  class PDF;
84  class TSpline1;
85  class MethodCuts;
86  class MethodBoost;
87  class DataSetInfo;
88  namespace Experimental {
89  class Classification;
90  }
91 
93  public:
96  void Init(std::vector<TString>& graphTitles);
97  void ClearGraphs();
98  void AddPoint(Double_t x, Double_t y1, Double_t y2);
99  void AddPoint(std::vector<Double_t>& dat);
100  inline TMultiGraph* Get() {return fMultiGraph;}
101  inline bool NotInitialized(){ return fNumGraphs==0;};
102  private:
103  TMultiGraph* fMultiGraph;
104  std::vector<TGraph*> fGraphs;
107  };
108 
109  class MethodBase : virtual public IMethod, public Configurable {
110 
111  friend class CrossValidation;
112  friend class Factory;
113  friend class RootFinder;
114  friend class MethodBoost;
115  friend class MethodCrossValidation;
117 
118  public:
119 
120  enum EWeightFileType { kROOT=0, kTEXT };
121 
122  // default constructor
123  MethodBase( const TString& jobName,
124  Types::EMVA methodType,
125  const TString& methodTitle,
126  DataSetInfo& dsi,
127  const TString& theOption = "" );
128 
129  // constructor used for Testing + Application of the MVA, only (no training),
130  // using given weight file
131  MethodBase( Types::EMVA methodType,
132  DataSetInfo& dsi,
133  const TString& weightFile );
134 
135  // default destructor
136  virtual ~MethodBase();
137 
138  // declaration, processing and checking of configuration options
139  void SetupMethod();
140  void ProcessSetup();
141  virtual void CheckSetup(); // may be overwritten by derived classes
142 
143  // ---------- main training and testing methods ------------------------------
144 
145  // prepare tree branch with the method's discriminating variable
146  void AddOutput( Types::ETreeType type, Types::EAnalysisType analysisType );
147 
148  // performs classifier training
149  // calls methods Train() implemented by derived classes
150  void TrainMethod();
151 
152  // optimize tuning parameters
153  virtual std::map<TString,Double_t> OptimizeTuningParameters(TString fomType="ROCIntegral", TString fitType="FitGA");
154  virtual void SetTuneParameters(std::map<TString,Double_t> tuneParameters);
155 
156  virtual void Train() = 0;
157 
158  // store and retrieve time used for training
159  void SetTrainTime( Double_t trainTime ) { fTrainTime = trainTime; }
160  Double_t GetTrainTime() const { return fTrainTime; }
161 
162  // store and retrieve time used for testing
163  void SetTestTime ( Double_t testTime ) { fTestTime = testTime; }
164  Double_t GetTestTime () const { return fTestTime; }
165 
166  // performs classifier testing
167  virtual void TestClassification();
168  virtual Double_t GetKSTrainingVsTest(Char_t SorB, TString opt="X");
169 
170  // performs multiclass classifier testing
171  virtual void TestMulticlass();
172 
173  // performs regression testing
174  virtual void TestRegression( Double_t& bias, Double_t& biasT,
175  Double_t& dev, Double_t& devT,
176  Double_t& rms, Double_t& rmsT,
177  Double_t& mInf, Double_t& mInfT, // mutual information
178  Double_t& corr,
179  Types::ETreeType type );
180 
181  // options treatment
182  virtual void Init() = 0;
183  virtual void DeclareOptions() = 0;
184  virtual void ProcessOptions() = 0;
185  virtual void DeclareCompatibilityOptions(); // declaration of past options
186 
187  // reset the Method --> As if it was not yet trained, just instantiated
188  // virtual void Reset() = 0;
189  //for the moment, I provide a dummy (that would not work) default, just to make
190  // compilation/running w/o parameter optimisation still possible
191  virtual void Reset(){return;}
192 
193  // classifier response:
194  // some methods may return a per-event error estimate
195  // error calculation is skipped if err==0
196  virtual Double_t GetMvaValue( Double_t* errLower = 0, Double_t* errUpper = 0) = 0;
197 
198  // signal/background classification response
199  Double_t GetMvaValue( const TMVA::Event* const ev, Double_t* err = 0, Double_t* errUpper = 0 );
200 
201  protected:
202  // helper function to set errors to -1
203  void NoErrorCalc(Double_t* const err, Double_t* const errUpper);
204 
205  // signal/background classification response for all current set of data
206  virtual std::vector<Double_t> GetMvaValues(Long64_t firstEvt = 0, Long64_t lastEvt = -1, Bool_t logProgress = false);
207 
208 
209  public:
210  // regression response
211  const std::vector<Float_t>& GetRegressionValues(const TMVA::Event* const ev){
212  fTmpEvent = ev;
213  const std::vector<Float_t>* ptr = &GetRegressionValues();
214  fTmpEvent = 0;
215  return (*ptr);
216  }
217 
218  virtual const std::vector<Float_t>& GetRegressionValues() {
219  std::vector<Float_t>* ptr = new std::vector<Float_t>(0);
220  return (*ptr);
221  }
222 
223  // multiclass classification response
224  virtual const std::vector<Float_t>& GetMulticlassValues() {
225  std::vector<Float_t>* ptr = new std::vector<Float_t>(0);
226  return (*ptr);
227  }
228 
229  // probability of classifier response (mvaval) to be signal (requires "CreateMvaPdf" option set)
230  virtual Double_t GetProba( const Event *ev); // the simple one, automatically calculates the mvaVal and uses the SAME sig/bkg ratio as given in the training sample (typically 50/50 .. (NormMode=EqualNumEvents) but can be different)
231  virtual Double_t GetProba( Double_t mvaVal, Double_t ap_sig );
232 
233  // Rarity of classifier response (signal or background (default) is uniform in [0,1])
234  virtual Double_t GetRarity( Double_t mvaVal, Types::ESBType reftype = Types::kBackground ) const;
235 
236  // create ranking
237  virtual const Ranking* CreateRanking() = 0;
238 
239  // make ROOT-independent C++ class
240  virtual void MakeClass( const TString& classFileName = TString("") ) const;
241 
242  // print help message
243  void PrintHelpMessage() const;
244 
245  //
246  // streamer methods for training information (creates "weight" files) --------
247  //
248  public:
249  void WriteStateToFile () const;
250  void ReadStateFromFile ();
251 
252  protected:
253  // the actual "weights"
254  virtual void AddWeightsXMLTo ( void* parent ) const = 0;
255  virtual void ReadWeightsFromXML ( void* wghtnode ) = 0;
256  virtual void ReadWeightsFromStream( std::istream& ) = 0; // backward compatibility
257  virtual void ReadWeightsFromStream( TFile& ) {} // backward compatibility
258 
259  private:
260  friend class MethodCategory;
261  friend class MethodCompositeBase;
262  void WriteStateToXML ( void* parent ) const;
263  void ReadStateFromXML ( void* parent );
264  void WriteStateToStream ( std::ostream& tf ) const; // needed for MakeClass
265  void WriteVarsToStream ( std::ostream& tf, const TString& prefix = "" ) const; // needed for MakeClass
266 
267 
268  public: // these two need to be public, they are used to read in-memory weight-files
269  void ReadStateFromStream ( std::istream& tf ); // backward compatibility
270  void ReadStateFromStream ( TFile& rf ); // backward compatibility
271  void ReadStateFromXMLString( const char* xmlstr ); // for reading from memory
272 
273  private:
274  // the variable information
275  void AddVarsXMLTo ( void* parent ) const;
276  void AddSpectatorsXMLTo ( void* parent ) const;
277  void AddTargetsXMLTo ( void* parent ) const;
278  void AddClassesXMLTo ( void* parent ) const;
279  void ReadVariablesFromXML ( void* varnode );
280  void ReadSpectatorsFromXML( void* specnode);
281  void ReadTargetsFromXML ( void* tarnode );
282  void ReadClassesFromXML ( void* clsnode );
283  void ReadVarsFromStream ( std::istream& istr ); // backward compatibility
284 
285  public:
286  // ---------------------------------------------------------------------------
287 
288  // write evaluation histograms into target file
289  virtual void WriteEvaluationHistosToFile(Types::ETreeType treetype);
290 
291  // write classifier-specific monitoring information to target file
292  virtual void WriteMonitoringHistosToFile() const;
293 
294  // ---------- public evaluation methods --------------------------------------
295 
296  // individual initialization for testing of each method
297  // overload this one for individual initialisation of the testing,
298  // it is then called automatically within the global "TestInit"
299 
300  // variables (and private member functions) for the Evaluation:
301  // get the efficiency. It fills a histogram for efficiency/vs/bkg
302  // and returns the one value fo the efficiency demanded for
303  // in the TString argument. (Watch the string format)
304  virtual Double_t GetEfficiency( const TString&, Types::ETreeType, Double_t& err );
305  virtual Double_t GetTrainingEfficiency(const TString& );
306  virtual std::vector<Float_t> GetMulticlassEfficiency( std::vector<std::vector<Float_t> >& purity );
307  virtual std::vector<Float_t> GetMulticlassTrainingEfficiency(std::vector<std::vector<Float_t> >& purity );
308  virtual TMatrixD GetMulticlassConfusionMatrix(Double_t effB, Types::ETreeType type);
309  virtual Double_t GetSignificance() const;
310  virtual Double_t GetROCIntegral(TH1D *histS, TH1D *histB) const;
311  virtual Double_t GetROCIntegral(PDF *pdfS=0, PDF *pdfB=0) const;
312  virtual Double_t GetMaximumSignificance( Double_t SignalEvents, Double_t BackgroundEvents,
313  Double_t& optimal_significance_value ) const;
314  virtual Double_t GetSeparation( TH1*, TH1* ) const;
315  virtual Double_t GetSeparation( PDF* pdfS = 0, PDF* pdfB = 0 ) const;
316 
317  virtual void GetRegressionDeviation(UInt_t tgtNum, Types::ETreeType type, Double_t& stddev,Double_t& stddev90Percent ) const;
318  // ---------- public accessors -----------------------------------------------
319 
320  // classifier naming (a lot of names ... aren't they ;-)
321  const TString& GetJobName () const { return fJobName; }
322  const TString& GetMethodName () const { return fMethodName; }
323  TString GetMethodTypeName() const { return Types::Instance().GetMethodName(fMethodType); }
324  Types::EMVA GetMethodType () const { return fMethodType; }
325  const char* GetName () const { return fMethodName.Data(); }
326  const TString& GetTestvarName () const { return fTestvar; }
327  const TString GetProbaName () const { return fTestvar + "_Proba"; }
328  TString GetWeightFileName() const;
329 
330  // build classifier name in Test tree
331  // MVA prefix (e.g., "TMVA_")
332  void SetTestvarName ( const TString & v="" ) { fTestvar = (v=="") ? ("MVA_" + GetMethodName()) : v; }
333 
334  // number of input variable used by classifier
335  UInt_t GetNvar() const { return DataInfo().GetNVariables(); }
336  UInt_t GetNVariables() const { return DataInfo().GetNVariables(); }
337  UInt_t GetNTargets() const { return DataInfo().GetNTargets(); };
338 
339  // internal names and expressions of input variables
340  const TString& GetInputVar ( Int_t i ) const { return DataInfo().GetVariableInfo(i).GetInternalName(); }
341  const TString& GetInputLabel( Int_t i ) const { return DataInfo().GetVariableInfo(i).GetLabel(); }
342  const char * GetInputTitle( Int_t i ) const { return DataInfo().GetVariableInfo(i).GetTitle(); }
343 
344  // normalisation and limit accessors
345  Double_t GetMean( Int_t ivar ) const { return GetTransformationHandler().GetMean(ivar); }
346  Double_t GetRMS ( Int_t ivar ) const { return GetTransformationHandler().GetRMS(ivar); }
347  Double_t GetXmin( Int_t ivar ) const { return GetTransformationHandler().GetMin(ivar); }
348  Double_t GetXmax( Int_t ivar ) const { return GetTransformationHandler().GetMax(ivar); }
349 
350  // sets the minimum requirement on the MVA output to declare an event signal-like
351  Double_t GetSignalReferenceCut() const { return fSignalReferenceCut; }
352  Double_t GetSignalReferenceCutOrientation() const { return fSignalReferenceCutOrientation; }
353 
354  // sets the minimum requirement on the MVA output to declare an event signal-like
355  void SetSignalReferenceCut( Double_t cut ) { fSignalReferenceCut = cut; }
356  void SetSignalReferenceCutOrientation( Double_t cutOrientation ) { fSignalReferenceCutOrientation = cutOrientation; }
357 
358  // pointers to ROOT directories
359  TDirectory* BaseDir() const;
360  TDirectory* MethodBaseDir() const;
361  TFile* GetFile() const {return fFile;}
362 
363  void SetMethodDir ( TDirectory* methodDir ) { fBaseDir = fMethodBaseDir = methodDir; }
364  void SetBaseDir( TDirectory* methodDir ){ fBaseDir = methodDir; }
365  void SetMethodBaseDir( TDirectory* methodDir ){ fMethodBaseDir = methodDir; }
366  void SetFile(TFile* file){fFile=file;}
367 
368  //Silent file
369  void SetSilentFile(Bool_t status){fSilentFile=status;}
370  Bool_t IsSilentFile(){return fSilentFile;}
371 
372  //Model Persistence
373  void SetModelPersistence(Bool_t status){fModelPersistence=status;}//added support to create/remove dir here if exits or not
374  Bool_t IsModelPersistence(){return fModelPersistence;}
375 
376  // the TMVA version can be obtained and checked using
377  // if (GetTrainingTMVAVersionCode()>TMVA_VERSION(3,7,2)) {...}
378  // or
379  // if (GetTrainingROOTVersionCode()>ROOT_VERSION(5,15,5)) {...}
380  UInt_t GetTrainingTMVAVersionCode() const { return fTMVATrainingVersion; }
381  UInt_t GetTrainingROOTVersionCode() const { return fROOTTrainingVersion; }
382  TString GetTrainingTMVAVersionString() const;
383  TString GetTrainingROOTVersionString() const;
384 
386  {
387  if(fTransformationPointer && takeReroutedIfAvailable) return *fTransformationPointer; else return fTransformation;
388  }
389  const TransformationHandler& GetTransformationHandler(Bool_t takeReroutedIfAvailable=true) const
390  {
391  if(fTransformationPointer && takeReroutedIfAvailable) return *fTransformationPointer; else return fTransformation;
392  }
393 
394  void RerouteTransformationHandler (TransformationHandler* fTargetTransformation) { fTransformationPointer=fTargetTransformation; }
395 
396  // ---------- event accessors ------------------------------------------------
397 
398  // returns reference to data set
399  // NOTE: this DataSet is the "original" dataset, i.e. the one seen by ALL Classifiers WITHOUT transformation
400  DataSet* Data() const { return DataInfo().GetDataSet(); }
401  DataSetInfo& DataInfo() const { return fDataSetInfo; }
402 
403  mutable const Event* fTmpEvent; //! temporary event when testing on a different DataSet than the own one
404 
405  // event reference and update
406  // NOTE: these Event accessors make sure that you get the events transformed according to the
407  // particular classifiers transformation chosen
408  UInt_t GetNEvents () const { return Data()->GetNEvents(); }
409  const Event* GetEvent () const;
410  const Event* GetEvent ( const TMVA::Event* ev ) const;
411  const Event* GetEvent ( Long64_t ievt ) const;
412  const Event* GetEvent ( Long64_t ievt , Types::ETreeType type ) const;
413  const Event* GetTrainingEvent( Long64_t ievt ) const;
414  const Event* GetTestingEvent ( Long64_t ievt ) const;
415  const std::vector<TMVA::Event*>& GetEventCollection( Types::ETreeType type );
416 
417  // ---------- public auxiliary methods ---------------------------------------
418 
419  // this method is used to decide whether an event is signal- or background-like
420  // the reference cut "xC" is taken to be where
421  // Int_[-oo,xC] { PDF_S(x) dx } = Int_[xC,+oo] { PDF_B(x) dx }
422  virtual Bool_t IsSignalLike();
423  virtual Bool_t IsSignalLike(Double_t mvaVal);
424 
425 
426  Bool_t HasMVAPdfs() const { return fHasMVAPdfs; }
427  virtual void SetAnalysisType( Types::EAnalysisType type ) { fAnalysisType = type; }
428  Types::EAnalysisType GetAnalysisType() const { return fAnalysisType; }
429  Bool_t DoRegression() const { return fAnalysisType == Types::kRegression; }
430  Bool_t DoMulticlass() const { return fAnalysisType == Types::kMulticlass; }
431 
432  // setter method for suppressing writing to XML and writing of standalone classes
433  void DisableWriting(Bool_t setter){ fModelPersistence = setter?kFALSE:kTRUE; }//DEPRECATED
434 
435  protected:
436  // helper variables for JsMVA
437  IPythonInteractive *fInteractive = nullptr;
438  bool fExitFromTraining = false;
439  UInt_t fIPyMaxIter = 0, fIPyCurrentIter = 0;
440 
441  public:
442 
443  // initializing IPythonInteractive class (for JsMVA only)
444  inline void InitIPythonInteractive(){
445  if (fInteractive) delete fInteractive;
446  fInteractive = new IPythonInteractive();
447  }
448 
449  // get training errors (for JsMVA only)
450  inline TMultiGraph* GetInteractiveTrainingError(){return fInteractive->Get();}
451 
452  // stop's the training process (for JsMVA only)
453  inline void ExitFromTraining(){
454  fExitFromTraining = true;
455  }
456 
457  // check's if the training ended (for JsMVA only)
458  inline bool TrainingEnded(){
459  if (fExitFromTraining && fInteractive){
460  delete fInteractive;
461  fInteractive = nullptr;
462  }
463  return fExitFromTraining;
464  }
465 
466  // get fIPyMaxIter
467  inline UInt_t GetMaxIter(){ return fIPyMaxIter; }
468 
469  // get fIPyCurrentIter
470  inline UInt_t GetCurrentIter(){ return fIPyCurrentIter; }
471 
472  protected:
473 
474  // ---------- protected accessors -------------------------------------------
475 
476  //TDirectory* LocalTDir() const { return Data().LocalRootDir(); }
477 
478  // weight file name and directory (given by global config variable)
479  void SetWeightFileName( TString );
480 
481  const TString& GetWeightFileDir() const { return fFileDir; }
482  void SetWeightFileDir( TString fileDir );
483 
484  // are input variables normalised ?
485  Bool_t IsNormalised() const { return fNormalise; }
486  void SetNormalised( Bool_t norm ) { fNormalise = norm; }
487 
488  // set number of input variables (only used by MethodCuts, could perhaps be removed)
489  // void SetNvar( Int_t n ) { fNvar = n; }
490 
491  // verbose and help flags
492  Bool_t Verbose() const { return fVerbose; }
493  Bool_t Help () const { return fHelp; }
494 
495  // ---------- protected event and tree accessors -----------------------------
496 
497  // names of input variables (if the original names are expressions, they are
498  // transformed into regexps)
499  const TString& GetInternalVarName( Int_t ivar ) const { return (*fInputVars)[ivar]; }
500  const TString& GetOriginalVarName( Int_t ivar ) const { return DataInfo().GetVariableInfo(ivar).GetExpression(); }
501 
502  Bool_t HasTrainingTree() const { return Data()->GetNTrainingEvents() != 0; }
503 
504  // ---------- protected auxiliary methods ------------------------------------
505 
506  protected:
507 
508  // make ROOT-independent C++ class for classifier response (classifier-specific implementation)
509  virtual void MakeClassSpecific( std::ostream&, const TString& = "" ) const {}
510 
511  // header and auxiliary classes
512  virtual void MakeClassSpecificHeader( std::ostream&, const TString& = "" ) const {}
513 
514  // static pointer to this object - required for ROOT finder (to be solved differently)(solved by Omar)
515  //static MethodBase* GetThisBase();
516 
517  // some basic statistical analysis
518  void Statistics( Types::ETreeType treeType, const TString& theVarName,
520  Double_t&, Double_t&, Double_t& );
521 
522  // if TRUE, write weights only to text files
523  Bool_t TxtWeightsOnly() const { return kTRUE; }
524 
525  protected:
526 
527  // access to event information that needs method-specific information
528 
529  Bool_t IsConstructedFromWeightFile() const { return fConstructedFromWeightFile; }
530 
531  private:
532 
533  // ---------- private definitions --------------------------------------------
534  // Initialisation
535  void InitBase();
536  void DeclareBaseOptions();
537  void ProcessBaseOptions();
538 
539  // used in efficiency computation
540  enum ECutOrientation { kNegative = -1, kPositive = +1 };
541  ECutOrientation GetCutOrientation() const { return fCutOrientation; }
542 
543  // ---------- private accessors ---------------------------------------------
544 
545  // reset required for RootFinder
546  void ResetThisBase();
547 
548  // ---------- private auxiliary methods --------------------------------------
549 
550  // PDFs for classifier response (required to compute signal probability and Rarity)
551  void CreateMVAPdfs();
552 
553  // for root finder
554  //virtual method to find ROOT
555  virtual Double_t GetValueForRoot ( Double_t ); // implementation
556 
557  // used for file parsing
558  Bool_t GetLine( std::istream& fin, char * buf );
559 
560  // fill test tree with classification or regression results
561  virtual void AddClassifierOutput ( Types::ETreeType type );
562  virtual void AddClassifierOutputProb( Types::ETreeType type );
563  virtual void AddRegressionOutput ( Types::ETreeType type );
564  virtual void AddMulticlassOutput ( Types::ETreeType type );
565 
566  private:
567 
568  void AddInfoItem( void* gi, const TString& name,
569  const TString& value) const;
570 
571  // ========== class members ==================================================
572 
573  protected:
574 
575  // direct accessors
576  Ranking* fRanking; // pointer to ranking object (created by derived classifiers)
577  std::vector<TString>* fInputVars; // vector of input variables used in MVA
578 
579  // histogram binning
580  Int_t fNbins; // number of bins in input variable histograms
581  Int_t fNbinsMVAoutput; // number of bins in MVA output histograms
582  Int_t fNbinsH; // number of bins in evaluation histograms
583 
584  Types::EAnalysisType fAnalysisType; // method-mode : true --> regression, false --> classification
585 
586  std::vector<Float_t>* fRegressionReturnVal; // holds the return-values for the regression
587  std::vector<Float_t>* fMulticlassReturnVal; // holds the return-values for the multiclass classification
588 
589  private:
590 
591  // MethodCuts redefines some of the evaluation variables and histograms -> must access private members
592  friend class MethodCuts;
593 
594 
595  // data sets
596  DataSetInfo& fDataSetInfo; //! the data set information (sometimes needed)
597 
598  Double_t fSignalReferenceCut; // minimum requirement on the MVA output to declare an event signal-like
599  Double_t fSignalReferenceCutOrientation; // minimum requirement on the MVA output to declare an event signal-like
600  Types::ESBType fVariableTransformType; // this is the event type (sig or bgd) assumed for variable transform
601 
602  // naming and versioning
603  TString fJobName; // name of job -> user defined, appears in weight files
604  TString fMethodName; // name of the method (set in derived class)
605  Types::EMVA fMethodType; // type of method (set in derived class)
606  TString fTestvar; // variable used in evaluation, etc (mostly the MVA)
607  UInt_t fTMVATrainingVersion; // TMVA version used for training
608  UInt_t fROOTTrainingVersion; // ROOT version used for training
609  Bool_t fConstructedFromWeightFile; // is it obtained from weight file?
610 
611  // Directory structure: dataloader/fMethodBaseDir/fBaseDir
612  // where the first directory name is defined by the method type
613  // and the second is user supplied (the title given in Factory::BookMethod())
614  TDirectory* fBaseDir; // base directory for the instance, needed to know where to jump back from localDir
615  mutable TDirectory* fMethodBaseDir; // base directory for the method
616  //this will be the next way to save results
618 
619  //SilentFile
621  //Model Persistence
623 
624  TString fParentDir; // method parent name, like booster name
625 
626  TString fFileDir; // unix sub-directory for weight files (default: DataLoader's Name + "weights")
627  TString fWeightFile; // weight file name
628 
629  private:
630 
631  TH1* fEffS; // efficiency histogram for rootfinder
632 
633  PDF* fDefaultPDF; // default PDF definitions
634  PDF* fMVAPdfS; // signal MVA PDF
635  PDF* fMVAPdfB; // background MVA PDF
636 
637  // TH1D* fmvaS; // PDFs of MVA distribution (signal)
638  // TH1D* fmvaB; // PDFs of MVA distribution (background)
639  PDF* fSplS; // PDFs of MVA distribution (signal)
640  PDF* fSplB; // PDFs of MVA distribution (background)
641  TSpline* fSpleffBvsS; // splines for signal eff. versus background eff.
642 
643  PDF* fSplTrainS; // PDFs of training MVA distribution (signal)
644  PDF* fSplTrainB; // PDFs of training MVA distribution (background)
645  TSpline* fSplTrainEffBvsS; // splines for training signal eff. versus background eff.
646 
647  private:
648 
649  // basic statistics quantities of MVA
650  Double_t fMeanS; // mean (signal)
651  Double_t fMeanB; // mean (background)
652  Double_t fRmsS; // RMS (signal)
653  Double_t fRmsB; // RMS (background)
654  Double_t fXmin; // minimum (signal and background)
655  Double_t fXmax; // maximum (signal and background)
656 
657  // variable preprocessing
658  TString fVarTransformString; // labels variable transform method
659 
660  TransformationHandler* fTransformationPointer; // pointer to the rest of transformations
661  TransformationHandler fTransformation; // the list of transformations
662 
663 
664  // help and verbosity
665  Bool_t fVerbose; // verbose flag
666  TString fVerbosityLevelString; // verbosity level (user input string)
667  EMsgType fVerbosityLevel; // verbosity level
668  Bool_t fHelp; // help flag
669  Bool_t fHasMVAPdfs; // MVA Pdfs are created for this classifier
670 
671  Bool_t fIgnoreNegWeightsInTraining;// If true, events with negative weights are not used in training
672 
673  protected:
674 
675  Bool_t IgnoreEventsWithNegWeightsInTraining() const { return fIgnoreNegWeightsInTraining; }
676 
677  // for signal/background
678  UInt_t fSignalClass; // index of the Signal-class
679  UInt_t fBackgroundClass; // index of the Background-class
680 
681  private:
682 
683  // timing variables
684  Double_t fTrainTime; // for timing measurements
685  Double_t fTestTime; // for timing measurements
686 
687  // orientation of cut: depends on signal and background mean values
688  ECutOrientation fCutOrientation; // +1 if Sig>Bkg, -1 otherwise
689 
690  // for root finder
691  TSpline1* fSplRefS; // helper splines for RootFinder (signal)
692  TSpline1* fSplRefB; // helper splines for RootFinder (background)
693 
694  TSpline1* fSplTrainRefS; // helper splines for RootFinder (signal)
695  TSpline1* fSplTrainRefB; // helper splines for RootFinder (background)
696 
697  mutable std::vector<const std::vector<TMVA::Event*>*> fEventCollections; // if the method needs the complete event-collection, the transformed event coll. ist stored here.
698 
699  public:
700  Bool_t fSetupCompleted; // is method setup
701 
702  private:
703 
704  // This is a workaround for OSx where static thread_local data members are
705  // not supported. The C++ solution would indeed be the following:
706 // static MethodBase*& GetThisBaseThreadLocal() {TTHREAD_TLS(MethodBase*) fgThisBase(nullptr); return fgThisBase; };
707 
708  // ===== depreciated options, kept for backward compatibility =====
709  private:
710 
711  Bool_t fNormalise; // normalise input variables
712  Bool_t fUseDecorr; // synonymous for decorrelation
713  TString fVariableTransformTypeString; // labels variable transform type
714  Bool_t fTxtWeightsOnly; // if TRUE, write weights only to text files
715  Int_t fNbinsMVAPdf; // number of bins used in histogram that creates PDF
716  Int_t fNsmoothMVAPdf; // number of times a histogram is smoothed before creating the PDF
717 
718  protected:
720  ClassDef(MethodBase,0); // Virtual base class for all TMVA method
721 
722  };
723 } // namespace TMVA
724 
725 
726 
727 
728 
729 
730 
731 // ========== INLINE FUNCTIONS =========================================================
732 
733 
734 //_______________________________________________________________________
735 inline const TMVA::Event* TMVA::MethodBase::GetEvent( const TMVA::Event* ev ) const
736 {
737  return GetTransformationHandler().Transform(ev);
738 }
739 
741 {
742  if(fTmpEvent)
743  return GetTransformationHandler().Transform(fTmpEvent);
744  else
745  return GetTransformationHandler().Transform(Data()->GetEvent());
746 }
747 
749 {
750  assert(fTmpEvent==0);
751  return GetTransformationHandler().Transform(Data()->GetEvent(ievt));
752 }
753 
755 {
756  assert(fTmpEvent==0);
757  return GetTransformationHandler().Transform(Data()->GetEvent(ievt, type));
758 }
759 
761 {
762  assert(fTmpEvent==0);
763  return GetEvent(ievt, Types::kTraining);
764 }
765 
767 {
768  assert(fTmpEvent==0);
769  return GetEvent(ievt, Types::kTesting);
770 }
771 
772 #endif
Bool_t HasMVAPdfs() const
Definition: MethodBase.h:426
Types::EAnalysisType fAnalysisType
Definition: MethodBase.h:584
void SetModelPersistence(Bool_t status)
Definition: MethodBase.h:373
TString fMethodName
Definition: MethodBase.h:604
virtual void ReadWeightsFromStream(TFile &)
Definition: MethodBase.h:257
virtual const std::vector< Float_t > & GetMulticlassValues()
Definition: MethodBase.h:224
long long Long64_t
Definition: RtypesCore.h:69
TString GetMethodName(Types::EMVA method) const
Definition: Types.cxx:136
Bool_t fIgnoreNegWeightsInTraining
Definition: MethodBase.h:671
Bool_t IsConstructedFromWeightFile() const
Definition: MethodBase.h:529
virtual void MakeClassSpecificHeader(std::ostream &, const TString &="") const
Definition: MethodBase.h:512
TSpline1 * fSplTrainRefS
Definition: MethodBase.h:694
const TString GetProbaName() const
Definition: MethodBase.h:327
std::vector< TGraph * > fGraphs
Definition: MethodBase.h:104
const TransformationHandler & GetTransformationHandler(Bool_t takeReroutedIfAvailable=true) const
Definition: MethodBase.h:389
UInt_t GetNvar() const
Definition: MethodBase.h:335
static Types & Instance()
the the single instance of "Types" if existing already, or create it (Singleton)
Definition: Types.cxx:70
const TString & GetOriginalVarName(Int_t ivar) const
Definition: MethodBase.h:500
TString fWeightFile
Definition: MethodBase.h:627
TString fVariableTransformTypeString
Definition: MethodBase.h:713
void SetMethodBaseDir(TDirectory *methodDir)
Definition: MethodBase.h:365
Base class for spline implementation containing the Draw/Paint methods.
Definition: TSpline.h:20
TransformationHandler * fTransformationPointer
Definition: MethodBase.h:660
Types::ESBType fVariableTransformType
Definition: MethodBase.h:600
A ROOT file is a suite of consecutive data records (TKey instances) with a well defined format...
Definition: TFile.h:47
EAnalysisType
Definition: Types.h:127
A TMultiGraph is a collection of TGraph (or derived) objects.
Definition: TMultiGraph.h:35
void InitIPythonInteractive()
Definition: MethodBase.h:444
Virtual base Class for all MVA method.
Definition: MethodBase.h:109
const std::vector< Float_t > & GetRegressionValues(const TMVA::Event *const ev)
Definition: MethodBase.h:211
void SetSignalReferenceCutOrientation(Double_t cutOrientation)
Definition: MethodBase.h:356
virtual const std::vector< Float_t > & GetRegressionValues()
Definition: MethodBase.h:218
Basic string class.
Definition: TString.h:131
1-D histogram with a float per channel (see TH1 documentation)}
Definition: TH1.h:567
void SetTrainTime(Double_t trainTime)
Definition: MethodBase.h:159
const TString & GetInternalVarName(Int_t ivar) const
Definition: MethodBase.h:499
TransformationHandler & GetTransformationHandler(Bool_t takeReroutedIfAvailable=true)
Definition: MethodBase.h:385
Ranking for variables in method (implementation)
Definition: Ranking.h:48
int Int_t
Definition: RtypesCore.h:41
TMultiGraph * Get()
Definition: MethodBase.h:100
bool Bool_t
Definition: RtypesCore.h:59
Results * fResults
Definition: MethodBase.h:719
TString fJobName
Definition: MethodBase.h:603
TSpline1 * fSplRefB
Definition: MethodBase.h:692
UInt_t GetNTargets() const
Definition: MethodBase.h:337
TSpline1 * fSplRefS
Definition: MethodBase.h:691
std::vector< TString > * fInputVars
Definition: MethodBase.h:577
const char * GetInputTitle(Int_t i) const
Definition: MethodBase.h:342
void SetSilentFile(Bool_t status)
Definition: MethodBase.h:369
Double_t fTrainTime
Definition: MethodBase.h:684
Double_t fTestTime
Definition: MethodBase.h:685
Double_t GetMean(Int_t ivar) const
Definition: MethodBase.h:345
Double_t GetTrainTime() const
Definition: MethodBase.h:160
const TString & GetInputLabel(Int_t i) const
Definition: MethodBase.h:341
void SetMethodDir(TDirectory *methodDir)
Definition: MethodBase.h:363
const TString & GetWeightFileDir() const
Definition: MethodBase.h:481
UInt_t fSignalClass
Definition: MethodBase.h:678
const TString & GetInputVar(Int_t i) const
Definition: MethodBase.h:340
Double_t x[n]
Definition: legend1.C:17
DataSetInfo & fDataSetInfo
Definition: MethodBase.h:596
#define ClassDef(name, id)
Definition: Rtypes.h:320
ECutOrientation fCutOrientation
Definition: MethodBase.h:688
Bool_t TxtWeightsOnly() const
Definition: MethodBase.h:523
UInt_t GetTrainingTMVAVersionCode() const
Definition: MethodBase.h:380
const Event * GetEvent() const
Definition: MethodBase.h:740
DataSet * Data() const
Definition: MethodBase.h:400
void Init(TClassEdit::TInterpreterLookupHelper *helper)
Definition: TClassEdit.cxx:121
Virtual base class for combining several TMVA method.
Double_t fMeanB
Definition: MethodBase.h:651
Double_t GetXmin(Int_t ivar) const
Definition: MethodBase.h:347
DataSetInfo & DataInfo() const
Definition: MethodBase.h:401
Bool_t DoRegression() const
Definition: MethodBase.h:429
TString fTestvar
Definition: MethodBase.h:606
Class that contains all the data information.
Definition: DataSetInfo.h:60
TFile * GetFile() const
Definition: MethodBase.h:361
PDF wrapper for histograms; uses user-defined spline interpolation.
Definition: PDF.h:63
TSpline * fSpleffBvsS
Definition: MethodBase.h:641
Bool_t fModelPersistence
Definition: MethodBase.h:622
const Event * GetTrainingEvent(Long64_t ievt) const
Definition: MethodBase.h:760
Bool_t Verbose() const
Definition: MethodBase.h:492
UInt_t fTMVATrainingVersion
Definition: MethodBase.h:607
UInt_t GetNEvents() const
temporary event when testing on a different DataSet than the own one
Definition: MethodBase.h:408
Class for boosting a TMVA method.
Definition: MethodBoost.h:58
Double_t GetXmax(Int_t ivar) const
Definition: MethodBase.h:348
TransformationHandler fTransformation
Definition: MethodBase.h:661
Bool_t DoMulticlass() const
Definition: MethodBase.h:430
Class that contains all the data information.
Definition: DataSet.h:69
virtual void MakeClassSpecific(std::ostream &, const TString &="") const
Definition: MethodBase.h:509
const Event * GetTestingEvent(Long64_t ievt) const
Definition: MethodBase.h:766
Bool_t HasTrainingTree() const
Definition: MethodBase.h:502
Double_t fRmsB
Definition: MethodBase.h:653
Double_t fXmin
Definition: MethodBase.h:654
std::string GetMethodName(TCppMethod_t)
Definition: Cppyy.cxx:733
TSpline1 * fSplTrainRefB
Definition: MethodBase.h:695
TDirectory * fMethodBaseDir
Definition: MethodBase.h:615
SVector< double, 2 > v
Definition: Dict.h:5
UInt_t fROOTTrainingVersion
Definition: MethodBase.h:608
const char * GetName() const
Definition: MethodBase.h:325
UInt_t GetTrainingROOTVersionCode() const
Definition: MethodBase.h:381
unsigned int UInt_t
Definition: RtypesCore.h:42
Double_t fMeanS
Definition: MethodBase.h:650
Bool_t Help() const
Definition: MethodBase.h:493
Int_t fNsmoothMVAPdf
Definition: MethodBase.h:716
Bool_t fTxtWeightsOnly
Definition: MethodBase.h:714
const TString & GetJobName() const
Definition: MethodBase.h:321
const TString & GetMethodName() const
Definition: MethodBase.h:322
TDirectory * fBaseDir
Definition: MethodBase.h:614
Bool_t fHasMVAPdfs
Definition: MethodBase.h:669
TSpline * fSplTrainEffBvsS
Definition: MethodBase.h:645
Class that contains all the data information.
This is the main MVA steering class.
Definition: Factory.h:81
1-D histogram with a double per channel (see TH1 documentation)}
Definition: TH1.h:610
Bool_t IsSilentFile()
Definition: MethodBase.h:370
Linear interpolation of TGraph.
Definition: TSpline1.h:43
Double_t GetSignalReferenceCutOrientation() const
Definition: MethodBase.h:352
void SetNormalised(Bool_t norm)
Definition: MethodBase.h:486
Double_t GetTestTime() const
Definition: MethodBase.h:164
UInt_t GetNVariables() const
Definition: MethodBase.h:336
std::vector< const std::vector< TMVA::Event * > * > fEventCollections
Definition: MethodBase.h:697
const Bool_t kFALSE
Definition: RtypesCore.h:88
TString fVerbosityLevelString
Definition: MethodBase.h:666
Class for categorizing the phase space.
Double_t fRmsS
Definition: MethodBase.h:652
UInt_t fBackgroundClass
Definition: MethodBase.h:679
Bool_t IgnoreEventsWithNegWeightsInTraining() const
Definition: MethodBase.h:675
void RerouteTransformationHandler(TransformationHandler *fTargetTransformation)
Definition: MethodBase.h:394
void SetTestTime(Double_t testTime)
Definition: MethodBase.h:163
Multivariate optimisation of signal efficiency for given background efficiency, applying rectangular ...
Definition: MethodCuts.h:61
UInt_t GetMaxIter()
Definition: MethodBase.h:467
double Double_t
Definition: RtypesCore.h:55
EMsgType fVerbosityLevel
Definition: MethodBase.h:667
Describe directory structure in memory.
Definition: TDirectory.h:34
Class to perform cross validation, splitting the dataloader into folds.
std::vector< Float_t > * fMulticlassReturnVal
Definition: MethodBase.h:587
Bool_t IsNormalised() const
Definition: MethodBase.h:485
int type
Definition: TGX11.cxx:120
void SetFile(TFile *file)
Definition: MethodBase.h:366
virtual void Reset()
Definition: MethodBase.h:191
The TH1 histogram class.
Definition: TH1.h:56
void ExitFromTraining()
Definition: MethodBase.h:453
TString fParentDir
Definition: MethodBase.h:624
Bool_t fConstructedFromWeightFile
Definition: MethodBase.h:609
TString fVarTransformString
Definition: MethodBase.h:658
Interface for all concrete MVA method implementations.
Definition: IMethod.h:54
Types::EMVA fMethodType
Definition: MethodBase.h:605
char Char_t
Definition: RtypesCore.h:29
Double_t GetRMS(Int_t ivar) const
Definition: MethodBase.h:346
Root finding using Brents algorithm (translated from CERNLIB function RZERO)
Definition: RootFinder.h:48
This class is needed by JsMVA, and it&#39;s a helper class for tracking errors during the training in Jup...
Definition: MethodBase.h:92
Abstract ClassifierFactory template that handles arbitrary types.
Ranking * fRanking
Definition: MethodBase.h:576
TString GetMethodTypeName() const
Definition: MethodBase.h:323
Definition: file.py:1
bool TrainingEnded()
Definition: MethodBase.h:458
Class that is the base-class for a vector of result.
Definition: Results.h:57
Double_t fSignalReferenceCut
the data set information (sometimes needed)
Definition: MethodBase.h:598
const Event * fTmpEvent
Definition: MethodBase.h:403
Double_t GetSignalReferenceCut() const
Definition: MethodBase.h:351
A Graph is a graphics object made of two arrays X and Y with npoints each.
Definition: TGraph.h:41
Int_t fNbinsMVAoutput
Definition: MethodBase.h:581
Bool_t fSilentFile
Definition: MethodBase.h:620
UInt_t GetCurrentIter()
Definition: MethodBase.h:470
Double_t fXmax
Definition: MethodBase.h:655
void DisableWriting(Bool_t setter)
Definition: MethodBase.h:433
ECutOrientation GetCutOrientation() const
Definition: MethodBase.h:541
std::vector< Float_t > * fRegressionReturnVal
Definition: MethodBase.h:586
Types::EAnalysisType GetAnalysisType() const
Definition: MethodBase.h:428
A TTree object has a header with a name and a title.
Definition: TTree.h:70
const TString & GetTestvarName() const
Definition: MethodBase.h:326
void SetTestvarName(const TString &v="")
Definition: MethodBase.h:332
TString fFileDir
Definition: MethodBase.h:626
TMultiGraph * GetInteractiveTrainingError()
Definition: MethodBase.h:450
const Bool_t kTRUE
Definition: RtypesCore.h:87
Types::EMVA GetMethodType() const
Definition: MethodBase.h:324
void SetBaseDir(TDirectory *methodDir)
Definition: MethodBase.h:364
virtual void SetAnalysisType(Types::EAnalysisType type)
Definition: MethodBase.h:427
char name[80]
Definition: TGX11.cxx:109
Bool_t fSetupCompleted
Definition: MethodBase.h:700
void SetSignalReferenceCut(Double_t cut)
Definition: MethodBase.h:355
Double_t fSignalReferenceCutOrientation
Definition: MethodBase.h:599
Bool_t IsModelPersistence()
Definition: MethodBase.h:374