Logo ROOT   6.08/07
Reference Guide
MethodBase.h
Go to the documentation of this file.
1 // @(#)root/tmva $Id$
2 // Author: Andreas Hoecker, Peter Speckmayer, Joerg Stelzer, Helge Voss, Kai Voss, Eckhard von Toerne, Jan Therhaag
3 
4 /**********************************************************************************
5  * Project: TMVA - a Root-integrated toolkit for multivariate data analysis *
6  * Package: TMVA *
7  * Class : MethodBase *
8  * Web : http://tmva.sourceforge.net *
9  * *
10  * Description: *
11  * Virtual base class for all MVA method *
12  * *
13  * Authors (alphabetical): *
14  * Andreas Hoecker <Andreas.Hocker@cern.ch> - CERN, Switzerland *
15  * Peter Speckmayer <peter.speckmayer@cern.ch> - CERN, Switzerland *
16  * Joerg Stelzer <Joerg.Stelzer@cern.ch> - CERN, Switzerland *
17  * Jan Therhaag <Jan.Therhaag@cern.ch> - U of Bonn, Germany *
18  * Eckhard v. Toerne <evt@uni-bonn.de> - U of Bonn, Germany *
19  * Helge Voss <Helge.Voss@cern.ch> - MPI-K Heidelberg, Germany *
20  * Kai Voss <Kai.Voss@cern.ch> - U. of Victoria, Canada *
21  * *
22  * Copyright (c) 2005-2011: *
23  * CERN, Switzerland *
24  * U. of Victoria, Canada *
25  * MPI-K Heidelberg, Germany *
26  * U. of Bonn, Germany *
27  * *
28  * Redistribution and use in source and binary forms, with or without *
29  * modification, are permitted according to the terms listed in LICENSE *
30  * (http://tmva.sourceforge.net/LICENSE) *
31  **********************************************************************************/
32 
33 #ifndef ROOT_TMVA_MethodBase
34 #define ROOT_TMVA_MethodBase
35 
36 //////////////////////////////////////////////////////////////////////////
37 // //
38 // MethodBase //
39 // //
40 // Virtual base class for all TMVA method //
41 // //
42 //////////////////////////////////////////////////////////////////////////
43 
44 #include <iosfwd>
45 #include <vector>
46 #include <map>
47 #include "assert.h"
48 
49 #ifndef ROOT_TString
50 #include "TString.h"
51 #endif
52 
53 #ifndef ROOT_TMVA_IMethod
54 #include "TMVA/IMethod.h"
55 #endif
56 #ifndef ROOT_TMVA_Configurable
57 #include "TMVA/Configurable.h"
58 #endif
59 #ifndef ROOT_TMVA_Types
60 #include "TMVA/Types.h"
61 #endif
62 #ifndef ROOT_TMVA_DataSet
63 #include "TMVA/DataSet.h"
64 #endif
65 #ifndef ROOT_TMVA_Event
66 #include "TMVA/Event.h"
67 #endif
68 #ifndef ROOT_TMVA_TransformationHandler
70 #endif
71 #ifndef ROOT_TMVA_Results
72 #include<TMVA/Results.h>
73 #endif
74 
75 #ifndef ROOT_TFile
76 #include<TFile.h>
77 #endif
78 
79 class TGraph;
80 class TTree;
81 class TDirectory;
82 class TSpline;
83 class TH1F;
84 class TH1D;
85 class TMultiGraph;
86 
87 namespace TMVA {
88 
89  class Ranking;
90  class PDF;
91  class TSpline1;
92  class MethodCuts;
93  class MethodBoost;
94  class DataSetInfo;
95 
96  /** \class IPythonInteractive
97 This class is needed by JsMVA, and it's a helper class for tracking errors during the training in Jupyter notebook.
98 It’s only initialized in Jupyter notebook context. In initialization we specify some title,
99 and a TGraph will be created for every title. We can add new data points easily to all TGraphs.
100 These graphs are added to a TMultiGraph, and during an interactive training we get this TMultiGraph object and plot it with JsROOT.
101 */
103  public:
106  void Init(std::vector<TString>& graphTitles);
107  void ClearGraphs();
108  void AddPoint(Double_t x, Double_t y1, Double_t y2);
109  void AddPoint(std::vector<Double_t>& dat);
110  inline TMultiGraph* Get() {return fMultiGraph;}
111  inline bool NotInitialized(){ return fNumGraphs==0;};
112  private:
114  std::vector<TGraph*> fGraphs;
117  };
118 
119  class MethodBase : virtual public IMethod, public Configurable {
120 
121  friend class Factory;
122  friend class RootFinder;
123  friend class MethodBoost;
124  public:
125 
126  enum EWeightFileType { kROOT=0, kTEXT };
127 
128  // default constructur
129  MethodBase( const TString& jobName,
130  Types::EMVA methodType,
131  const TString& methodTitle,
132  DataSetInfo& dsi,
133  const TString& theOption = "" );
134 
135  // constructor used for Testing + Application of the MVA, only (no training),
136  // using given weight file
137  MethodBase( Types::EMVA methodType,
138  DataSetInfo& dsi,
139  const TString& weightFile );
140 
141  // default destructur
142  virtual ~MethodBase();
143 
144  // declaration, processing and checking of configuration options
145  void SetupMethod();
146  void ProcessSetup();
147  virtual void CheckSetup(); // may be overwritten by derived classes
148 
149  // ---------- main training and testing methods ------------------------------
150 
151  // prepare tree branch with the method's discriminating variable
152  void AddOutput( Types::ETreeType type, Types::EAnalysisType analysisType );
153 
154  // performs classifier training
155  // calls methods Train() implemented by derived classes
156  void TrainMethod();
157 
158  // optimize tuning parameters
159  virtual std::map<TString,Double_t> OptimizeTuningParameters(TString fomType="ROCIntegral", TString fitType="FitGA");
160  virtual void SetTuneParameters(std::map<TString,Double_t> tuneParameters);
161 
162  virtual void Train() = 0;
163 
164  // store and retrieve time used for training
165  void SetTrainTime( Double_t trainTime ) { fTrainTime = trainTime; }
166  Double_t GetTrainTime() const { return fTrainTime; }
167 
168  // store and retrieve time used for testing
169  void SetTestTime ( Double_t testTime ) { fTestTime = testTime; }
170  Double_t GetTestTime () const { return fTestTime; }
171 
172  // performs classifier testing
173  virtual void TestClassification();
174  virtual Double_t GetKSTrainingVsTest(Char_t SorB, TString opt="X");
175 
176  // performs multiclass classifier testing
177  virtual void TestMulticlass();
178 
179  // performs regression testing
180  virtual void TestRegression( Double_t& bias, Double_t& biasT,
181  Double_t& dev, Double_t& devT,
182  Double_t& rms, Double_t& rmsT,
183  Double_t& mInf, Double_t& mInfT, // mutual information
184  Double_t& corr,
185  Types::ETreeType type );
186 
187  // options treatment
188  virtual void Init() = 0;
189  virtual void DeclareOptions() = 0;
190  virtual void ProcessOptions() = 0;
191  virtual void DeclareCompatibilityOptions(); // declaration of past options
192 
193  // reset the Method --> As if it was not yet trained, just instantiated
194  // virtual void Reset() = 0;
195  //for the moment, I provide a dummy (that would not work) default, just to make
196  // compilation/running w/o parameter optimisation still possible
197  virtual void Reset(){return;}
198 
199  // classifier response:
200  // some methods may return a per-event error estimate
201  // error calculation is skipped if err==0
202  virtual Double_t GetMvaValue( Double_t* errLower = 0, Double_t* errUpper = 0) = 0;
203 
204  // signal/background classification response
205  Double_t GetMvaValue( const TMVA::Event* const ev, Double_t* err = 0, Double_t* errUpper = 0 );
206 
207  protected:
208  // helper function to set errors to -1
209  void NoErrorCalc(Double_t* const err, Double_t* const errUpper);
210 
211  // signal/background classification response for all current set of data
212  virtual std::vector<Double_t> GetMvaValues(Long64_t firstEvt = 0, Long64_t lastEvt = -1, Bool_t logProgress = false);
213 
214 
215  public:
216  // regression response
217  const std::vector<Float_t>& GetRegressionValues(const TMVA::Event* const ev){
218  fTmpEvent = ev;
219  const std::vector<Float_t>* ptr = &GetRegressionValues();
220  fTmpEvent = 0;
221  return (*ptr);
222  }
223 
224  virtual const std::vector<Float_t>& GetRegressionValues() {
225  std::vector<Float_t>* ptr = new std::vector<Float_t>(0);
226  return (*ptr);
227  }
228 
229  // multiclass classification response
230  virtual const std::vector<Float_t>& GetMulticlassValues() {
231  std::vector<Float_t>* ptr = new std::vector<Float_t>(0);
232  return (*ptr);
233  }
234 
235  // probability of classifier response (mvaval) to be signal (requires "CreateMvaPdf" option set)
236  virtual Double_t GetProba( const Event *ev); // the simple one, automatically calcualtes the mvaVal and uses the SAME sig/bkg ratio as given in the training sample (typically 50/50 .. (NormMode=EqualNumEvents) but can be different)
237  virtual Double_t GetProba( Double_t mvaVal, Double_t ap_sig );
238 
239  // Rarity of classifier response (signal or background (default) is uniform in [0,1])
240  virtual Double_t GetRarity( Double_t mvaVal, Types::ESBType reftype = Types::kBackground ) const;
241 
242  // create ranking
243  virtual const Ranking* CreateRanking() = 0;
244 
245  // make ROOT-independent C++ class
246  virtual void MakeClass( const TString& classFileName = TString("") ) const;
247 
248  // print help message
249  void PrintHelpMessage() const;
250 
251  //
252  // streamer methods for training information (creates "weight" files) --------
253  //
254  public:
255  void WriteStateToFile () const;
256  void ReadStateFromFile ();
257 
258  protected:
259  // the actual "weights"
260  virtual void AddWeightsXMLTo ( void* parent ) const = 0;
261  virtual void ReadWeightsFromXML ( void* wghtnode ) = 0;
262  virtual void ReadWeightsFromStream( std::istream& ) = 0; // backward compatibility
263  virtual void ReadWeightsFromStream( TFile& ) {} // backward compatibility
264 
265  private:
266  friend class MethodCategory;
267  friend class MethodCompositeBase;
268  void WriteStateToXML ( void* parent ) const;
269  void ReadStateFromXML ( void* parent );
270  void WriteStateToStream ( std::ostream& tf ) const; // needed for MakeClass
271  void WriteVarsToStream ( std::ostream& tf, const TString& prefix = "" ) const; // needed for MakeClass
272 
273 
274  public: // these two need to be public, they are used to read in-memory weight-files
275  void ReadStateFromStream ( std::istream& tf ); // backward compatibility
276  void ReadStateFromStream ( TFile& rf ); // backward compatibility
277  void ReadStateFromXMLString( const char* xmlstr ); // for reading from memory
278 
279  private:
280  // the variable information
281  void AddVarsXMLTo ( void* parent ) const;
282  void AddSpectatorsXMLTo ( void* parent ) const;
283  void AddTargetsXMLTo ( void* parent ) const;
284  void AddClassesXMLTo ( void* parent ) const;
285  void ReadVariablesFromXML ( void* varnode );
286  void ReadSpectatorsFromXML( void* specnode);
287  void ReadTargetsFromXML ( void* tarnode );
288  void ReadClassesFromXML ( void* clsnode );
289  void ReadVarsFromStream ( std::istream& istr ); // backward compatibility
290 
291  public:
292  // ---------------------------------------------------------------------------
293 
294  // write evaluation histograms into target file
295  virtual void WriteEvaluationHistosToFile(Types::ETreeType treetype);
296 
297  // write classifier-specific monitoring information to target file
298  virtual void WriteMonitoringHistosToFile() const;
299 
300  // ---------- public evaluation methods --------------------------------------
301 
302  // individual initialistion for testing of each method
303  // overload this one for individual initialisation of the testing,
304  // it is then called automatically within the global "TestInit"
305 
306  // variables (and private menber functions) for the Evaluation:
307  // get the effiency. It fills a histogram for efficiency/vs/bkg
308  // and returns the one value fo the efficiency demanded for
309  // in the TString argument. (Watch the string format)
310  virtual Double_t GetEfficiency( const TString&, Types::ETreeType, Double_t& err );
311  virtual Double_t GetTrainingEfficiency(const TString& );
312  virtual std::vector<Float_t> GetMulticlassEfficiency( std::vector<std::vector<Float_t> >& purity );
313  virtual std::vector<Float_t> GetMulticlassTrainingEfficiency(std::vector<std::vector<Float_t> >& purity );
314  virtual Double_t GetSignificance() const;
315  virtual Double_t GetROCIntegral(TH1D *histS, TH1D *histB) const;
316  virtual Double_t GetROCIntegral(PDF *pdfS=0, PDF *pdfB=0) const;
317  virtual Double_t GetMaximumSignificance( Double_t SignalEvents, Double_t BackgroundEvents,
318  Double_t& optimal_significance_value ) const;
319  virtual Double_t GetSeparation( TH1*, TH1* ) const;
320  virtual Double_t GetSeparation( PDF* pdfS = 0, PDF* pdfB = 0 ) const;
321 
322  virtual void GetRegressionDeviation(UInt_t tgtNum, Types::ETreeType type, Double_t& stddev,Double_t& stddev90Percent ) const;
323  // ---------- public accessors -----------------------------------------------
324 
325  // classifier naming (a lot of names ... aren't they ;-)
326  const TString& GetJobName () const { return fJobName; }
327  const TString& GetMethodName () const { return fMethodName; }
328  TString GetMethodTypeName() const { return Types::Instance().GetMethodName(fMethodType); }
329  Types::EMVA GetMethodType () const { return fMethodType; }
330  const char* GetName () const { return fMethodName.Data(); }
331  const TString& GetTestvarName () const { return fTestvar; }
332  const TString GetProbaName () const { return fTestvar + "_Proba"; }
333  TString GetWeightFileName() const;
334 
335  // build classifier name in Test tree
336  // MVA prefix (e.g., "TMVA_")
337  void SetTestvarName ( const TString & v="" ) { fTestvar = (v=="") ? ("MVA_" + GetMethodName()) : v; }
338 
339  // number of input variable used by classifier
340  UInt_t GetNvar() const { return DataInfo().GetNVariables(); }
341  UInt_t GetNVariables() const { return DataInfo().GetNVariables(); }
342  UInt_t GetNTargets() const { return DataInfo().GetNTargets(); };
343 
344  // internal names and expressions of input variables
345  const TString& GetInputVar ( Int_t i ) const { return DataInfo().GetVariableInfo(i).GetInternalName(); }
346  const TString& GetInputLabel( Int_t i ) const { return DataInfo().GetVariableInfo(i).GetLabel(); }
347  const char * GetInputTitle( Int_t i ) const { return DataInfo().GetVariableInfo(i).GetTitle(); }
348 
349  // normalisation and limit accessors
350  Double_t GetMean( Int_t ivar ) const { return GetTransformationHandler().GetMean(ivar); }
351  Double_t GetRMS ( Int_t ivar ) const { return GetTransformationHandler().GetRMS(ivar); }
352  Double_t GetXmin( Int_t ivar ) const { return GetTransformationHandler().GetMin(ivar); }
353  Double_t GetXmax( Int_t ivar ) const { return GetTransformationHandler().GetMax(ivar); }
354 
355  // sets the minimum requirement on the MVA output to declare an event signal-like
356  Double_t GetSignalReferenceCut() const { return fSignalReferenceCut; }
357  Double_t GetSignalReferenceCutOrientation() const { return fSignalReferenceCutOrientation; }
358 
359  // sets the minimum requirement on the MVA output to declare an event signal-like
360  void SetSignalReferenceCut( Double_t cut ) { fSignalReferenceCut = cut; }
361  void SetSignalReferenceCutOrientation( Double_t cutOrientation ) { fSignalReferenceCutOrientation = cutOrientation; }
362 
363  // pointers to ROOT directories
364  TDirectory* BaseDir() const;
365  TDirectory* MethodBaseDir() const;
366  TFile* GetFile() const {return fFile;}
367 
368  void SetMethodDir ( TDirectory* methodDir ) { fBaseDir = fMethodBaseDir = methodDir; }
369  void SetBaseDir( TDirectory* methodDir ){ fBaseDir = methodDir; }
370  void SetMethodBaseDir( TDirectory* methodDir ){ fMethodBaseDir = methodDir; }
371  void SetFile(TFile* file){fFile=file;}
372 
373  //Silent file
374  void SetSilentFile(Bool_t status){fSilentFile=status;}
375  Bool_t IsSilentFile(){return fSilentFile;}
376 
377  //Model Persistence
378  void SetModelPersistence(Bool_t status){fModelPersistence=status;}//added support to create/remove dir here if exits or not
379  Bool_t IsModelPersistence(){return fModelPersistence;}
380 
381  // the TMVA version can be obtained and checked using
382  // if (GetTrainingTMVAVersionCode()>TMVA_VERSION(3,7,2)) {...}
383  // or
384  // if (GetTrainingROOTVersionCode()>ROOT_VERSION(5,15,5)) {...}
385  UInt_t GetTrainingTMVAVersionCode() const { return fTMVATrainingVersion; }
386  UInt_t GetTrainingROOTVersionCode() const { return fROOTTrainingVersion; }
387  TString GetTrainingTMVAVersionString() const;
388  TString GetTrainingROOTVersionString() const;
389 
391  {
392  if(fTransformationPointer && takeReroutedIfAvailable) return *fTransformationPointer; else return fTransformation;
393  }
394  const TransformationHandler& GetTransformationHandler(Bool_t takeReroutedIfAvailable=true) const
395  {
396  if(fTransformationPointer && takeReroutedIfAvailable) return *fTransformationPointer; else return fTransformation;
397  }
398 
399  void RerouteTransformationHandler (TransformationHandler* fTargetTransformation) { fTransformationPointer=fTargetTransformation; }
400 
401  // ---------- event accessors ------------------------------------------------
402 
403  // returns reference to data set
404  // NOTE: this DataSet is the "original" dataset, i.e. the one seen by ALL Classifiers WITHOUT transformation
405  DataSet* Data() const { return DataInfo().GetDataSet(); }
406  DataSetInfo& DataInfo() const { return fDataSetInfo; }
407 
408  mutable const Event* fTmpEvent; //! temporary event when testing on a different DataSet than the own one
409 
410  // event reference and update
411  // NOTE: these Event accessors make sure that you get the events transformed according to the
412  // particular clasifiers transformation chosen
413  UInt_t GetNEvents () const { return Data()->GetNEvents(); }
414  const Event* GetEvent () const;
415  const Event* GetEvent ( const TMVA::Event* ev ) const;
416  const Event* GetEvent ( Long64_t ievt ) const;
417  const Event* GetEvent ( Long64_t ievt , Types::ETreeType type ) const;
418  const Event* GetTrainingEvent( Long64_t ievt ) const;
419  const Event* GetTestingEvent ( Long64_t ievt ) const;
420  const std::vector<TMVA::Event*>& GetEventCollection( Types::ETreeType type );
421 
422  // ---------- public auxiliary methods ---------------------------------------
423 
424  // this method is used to decide whether an event is signal- or background-like
425  // the reference cut "xC" is taken to be where
426  // Int_[-oo,xC] { PDF_S(x) dx } = Int_[xC,+oo] { PDF_B(x) dx }
427  virtual Bool_t IsSignalLike();
428  virtual Bool_t IsSignalLike(Double_t mvaVal);
429 
430 
431  Bool_t HasMVAPdfs() const { return fHasMVAPdfs; }
432  virtual void SetAnalysisType( Types::EAnalysisType type ) { fAnalysisType = type; }
433  Types::EAnalysisType GetAnalysisType() const { return fAnalysisType; }
434  Bool_t DoRegression() const { return fAnalysisType == Types::kRegression; }
435  Bool_t DoMulticlass() const { return fAnalysisType == Types::kMulticlass; }
436 
437  // setter method for suppressing writing to XML and writing of standalone classes
438  void DisableWriting(Bool_t setter){ fModelPersistence = setter?kFALSE:kTRUE; }//DEPRECATED
439 
440  protected:
441  // helper variables for JsMVA
442  IPythonInteractive *fInteractive = nullptr;
443  bool fExitFromTraining = false;
444  UInt_t fIPyMaxIter = 0, fIPyCurrentIter = 0;
445 
446  public:
447 
448  // initializing IPythonInteractive class (for JsMVA only)
449  inline void InitIPythonInteractive(){
450  if (fInteractive) delete fInteractive;
451  fInteractive = new IPythonInteractive();
452  }
453 
454  // get training errors (for JsMVA only)
455  inline TMultiGraph* GetInteractiveTrainingError(){return fInteractive->Get();}
456 
457  // stop's the training process (for JsMVA only)
458  inline void ExitFromTraining(){
459  fExitFromTraining = true;
460  }
461 
462  // check's if the training ended (for JsMVA only)
463  inline bool TrainingEnded(){
464  if (fExitFromTraining && fInteractive){
465  delete fInteractive;
466  fInteractive = nullptr;
467  }
468  return fExitFromTraining;
469  }
470 
471  // get fIPyMaxIter
472  inline UInt_t GetMaxIter(){ return fIPyMaxIter; }
473 
474  // get fIPyCurrentIter
475  inline UInt_t GetCurrentIter(){ return fIPyCurrentIter; }
476 
477  protected:
478 
479  // ---------- protected acccessors -------------------------------------------
480 
481  //TDirectory* LocalTDir() const { return Data().LocalRootDir(); }
482 
483  // weight file name and directory (given by global config variable)
484  void SetWeightFileName( TString );
485 
486  const TString& GetWeightFileDir() const { return fFileDir; }
487  void SetWeightFileDir( TString fileDir );
488 
489  // are input variables normalised ?
490  Bool_t IsNormalised() const { return fNormalise; }
491  void SetNormalised( Bool_t norm ) { fNormalise = norm; }
492 
493  // set number of input variables (only used by MethodCuts, could perhaps be removed)
494  // void SetNvar( Int_t n ) { fNvar = n; }
495 
496  // verbose and help flags
497  Bool_t Verbose() const { return fVerbose; }
498  Bool_t Help () const { return fHelp; }
499 
500  // ---------- protected event and tree accessors -----------------------------
501 
502  // names of input variables (if the original names are expressions, they are
503  // transformed into regexps)
504  const TString& GetInternalVarName( Int_t ivar ) const { return (*fInputVars)[ivar]; }
505  const TString& GetOriginalVarName( Int_t ivar ) const { return DataInfo().GetVariableInfo(ivar).GetExpression(); }
506 
507  Bool_t HasTrainingTree() const { return Data()->GetNTrainingEvents() != 0; }
508 
509  // ---------- protected auxiliary methods ------------------------------------
510 
511  protected:
512 
513  // make ROOT-independent C++ class for classifier response (classifier-specific implementation)
514  virtual void MakeClassSpecific( std::ostream&, const TString& = "" ) const {}
515 
516  // header and auxiliary classes
517  virtual void MakeClassSpecificHeader( std::ostream&, const TString& = "" ) const {}
518 
519  // static pointer to this object - required for ROOT finder (to be solved differently)(solved by Omar)
520  //static MethodBase* GetThisBase();
521 
522  // some basic statistical analysis
523  void Statistics( Types::ETreeType treeType, const TString& theVarName,
525  Double_t&, Double_t&, Double_t& );
526 
527  // if TRUE, write weights only to text files
528  Bool_t TxtWeightsOnly() const { return kTRUE; }
529 
530  protected:
531 
532  // access to event information that needs method-specific information
533 
534  Bool_t IsConstructedFromWeightFile() const { return fConstructedFromWeightFile; }
535 
536  private:
537 
538  // ---------- private definitions --------------------------------------------
539  // Initialisation
540  void InitBase();
541  void DeclareBaseOptions();
542  void ProcessBaseOptions();
543 
544  // used in efficiency computation
545  enum ECutOrientation { kNegative = -1, kPositive = +1 };
546  ECutOrientation GetCutOrientation() const { return fCutOrientation; }
547 
548  // ---------- private acccessors ---------------------------------------------
549 
550  // reset required for RootFinder
551  void ResetThisBase();
552 
553  // ---------- private auxiliary methods --------------------------------------
554 
555  // PDFs for classifier response (required to compute signal probability and Rarity)
556  void CreateMVAPdfs();
557 
558  // for root finder
559  //virtual method to find ROOT
560  virtual Double_t GetValueForRoot ( Double_t ); // implementation
561 
562  // used for file parsing
563  Bool_t GetLine( std::istream& fin, char * buf );
564 
565  // fill test tree with classification or regression results
566  virtual void AddClassifierOutput ( Types::ETreeType type );
567  virtual void AddClassifierOutputProb( Types::ETreeType type );
568  virtual void AddRegressionOutput ( Types::ETreeType type );
569  virtual void AddMulticlassOutput ( Types::ETreeType type );
570 
571  private:
572 
573  void AddInfoItem( void* gi, const TString& name,
574  const TString& value) const;
575 
576  // ========== class members ==================================================
577 
578  protected:
579 
580  // direct accessors
581  Ranking* fRanking; // pointer to ranking object (created by derived classifiers)
582  std::vector<TString>* fInputVars; // vector of input variables used in MVA
583 
584  // histogram binning
585  Int_t fNbins; // number of bins in input variable histograms
586  Int_t fNbinsMVAoutput; // number of bins in MVA output histograms
587  Int_t fNbinsH; // number of bins in evaluation histograms
588 
589  Types::EAnalysisType fAnalysisType; // method-mode : true --> regression, false --> classification
590 
591  std::vector<Float_t>* fRegressionReturnVal; // holds the return-values for the regression
592  std::vector<Float_t>* fMulticlassReturnVal; // holds the return-values for the multiclass classification
593 
594  private:
595 
596  // MethodCuts redefines some of the evaluation variables and histograms -> must access private members
597  friend class MethodCuts;
598 
599 
600  // data sets
601  DataSetInfo& fDataSetInfo; //! the data set information (sometimes needed)
602 
603  Double_t fSignalReferenceCut; // minimum requirement on the MVA output to declare an event signal-like
604  Double_t fSignalReferenceCutOrientation; // minimum requirement on the MVA output to declare an event signal-like
605  Types::ESBType fVariableTransformType; // this is the event type (sig or bgd) assumed for variable transform
606 
607  // naming and versioning
608  TString fJobName; // name of job -> user defined, appears in weight files
609  TString fMethodName; // name of the method (set in derived class)
610  Types::EMVA fMethodType; // type of method (set in derived class)
611  TString fTestvar; // variable used in evaluation, etc (mostly the MVA)
612  UInt_t fTMVATrainingVersion; // TMVA version used for training
613  UInt_t fROOTTrainingVersion; // ROOT version used for training
614  Bool_t fConstructedFromWeightFile; // is it obtained from weight file?
615 
616  // Directory structure: dataloader/fMethodBaseDir/fBaseDir
617  // where the first directory name is defined by the method type
618  // and the second is user supplied (the title given in Factory::BookMethod())
619  TDirectory* fBaseDir; // base directory for the instance, needed to know where to jump back from localDir
620  mutable TDirectory* fMethodBaseDir; // base directory for the method
621  //this will be the next way to save results
623 
624  //SilentFile
626  //Model Persistence
628 
629  TString fParentDir; // method parent name, like booster name
630 
631  TString fFileDir; // unix sub-directory for weight files (default: DataLoader's Name + "weights")
632  TString fWeightFile; // weight file name
633 
634  private:
635 
636  TH1* fEffS; // efficiency histogram for rootfinder
637 
638  PDF* fDefaultPDF; // default PDF definitions
639  PDF* fMVAPdfS; // signal MVA PDF
640  PDF* fMVAPdfB; // background MVA PDF
641 
642  // TH1D* fmvaS; // PDFs of MVA distribution (signal)
643  // TH1D* fmvaB; // PDFs of MVA distribution (background)
644  PDF* fSplS; // PDFs of MVA distribution (signal)
645  PDF* fSplB; // PDFs of MVA distribution (background)
646  TSpline* fSpleffBvsS; // splines for signal eff. versus background eff.
647 
648  PDF* fSplTrainS; // PDFs of training MVA distribution (signal)
649  PDF* fSplTrainB; // PDFs of training MVA distribution (background)
650  TSpline* fSplTrainEffBvsS; // splines for training signal eff. versus background eff.
651 
652  private:
653 
654  // basic statistics quantities of MVA
655  Double_t fMeanS; // mean (signal)
656  Double_t fMeanB; // mean (background)
657  Double_t fRmsS; // RMS (signal)
658  Double_t fRmsB; // RMS (background)
659  Double_t fXmin; // minimum (signal and background)
660  Double_t fXmax; // maximum (signal and background)
661 
662  // variable preprocessing
663  TString fVarTransformString; // labels variable transform method
664 
665  TransformationHandler* fTransformationPointer; // pointer to the rest of transformations
666  TransformationHandler fTransformation; // the list of transformations
667 
668 
669  // help and verbosity
670  Bool_t fVerbose; // verbose flag
671  TString fVerbosityLevelString; // verbosity level (user input string)
672  EMsgType fVerbosityLevel; // verbosity level
673  Bool_t fHelp; // help flag
674  Bool_t fHasMVAPdfs; // MVA Pdfs are created for this classifier
675 
676  Bool_t fIgnoreNegWeightsInTraining;// If true, events with negative weights are not used in training
677 
678  protected:
679 
680  Bool_t IgnoreEventsWithNegWeightsInTraining() const { return fIgnoreNegWeightsInTraining; }
681 
682  // for signal/background
683  UInt_t fSignalClass; // index of the Signal-class
684  UInt_t fBackgroundClass; // index of the Background-class
685 
686  private:
687 
688  // timing variables
689  Double_t fTrainTime; // for timing measurements
690  Double_t fTestTime; // for timing measurements
691 
692  // orientation of cut: depends on signal and background mean values
693  ECutOrientation fCutOrientation; // +1 if Sig>Bkg, -1 otherwise
694 
695  // for root finder
696  TSpline1* fSplRefS; // helper splines for RootFinder (signal)
697  TSpline1* fSplRefB; // helper splines for RootFinder (background)
698 
699  TSpline1* fSplTrainRefS; // helper splines for RootFinder (signal)
700  TSpline1* fSplTrainRefB; // helper splines for RootFinder (background)
701 
702  mutable std::vector<const std::vector<TMVA::Event*>*> fEventCollections; // if the method needs the complete event-collection, the transformed event coll. ist stored here.
703 
704  public:
705  Bool_t fSetupCompleted; // is method setup
706 
707  private:
708 
709  // This is a workaround for OSx where static thread_local data members are
710  // not supported. The C++ solution would indeed be the following:
711 // static MethodBase*& GetThisBaseThreadLocal() {TTHREAD_TLS(MethodBase*) fgThisBase(nullptr); return fgThisBase; };
712 
713  // ===== depreciated options, kept for backward compatibility =====
714  private:
715 
716  Bool_t fNormalise; // normalise input variables
717  Bool_t fUseDecorr; // synonymous for decorrelation
718  TString fVariableTransformTypeString; // labels variable transform type
719  Bool_t fTxtWeightsOnly; // if TRUE, write weights only to text files
720  Int_t fNbinsMVAPdf; // number of bins used in histogram that creates PDF
721  Int_t fNsmoothMVAPdf; // number of times a histogram is smoothed before creating the PDF
722 
723  protected:
725  ClassDef(MethodBase,0); // Virtual base class for all TMVA method
726 
727  };
728 } // namespace TMVA
729 
730 
731 
732 
733 
734 
735 
736 // ========== INLINE FUNCTIONS =========================================================
737 
738 
739 //_______________________________________________________________________
740 inline const TMVA::Event* TMVA::MethodBase::GetEvent( const TMVA::Event* ev ) const
741 {
742  return GetTransformationHandler().Transform(ev);
743 }
744 
746 {
747  if(fTmpEvent)
748  return GetTransformationHandler().Transform(fTmpEvent);
749  else
750  return GetTransformationHandler().Transform(Data()->GetEvent());
751 }
752 
754 {
755  assert(fTmpEvent==0);
756  return GetTransformationHandler().Transform(Data()->GetEvent(ievt));
757 }
758 
760 {
761  assert(fTmpEvent==0);
762  return GetTransformationHandler().Transform(Data()->GetEvent(ievt, type));
763 }
764 
766 {
767  assert(fTmpEvent==0);
768  return GetEvent(ievt, Types::kTraining);
769 }
770 
772 {
773  assert(fTmpEvent==0);
774  return GetEvent(ievt, Types::kTesting);
775 }
776 
777 #endif
Bool_t HasMVAPdfs() const
Definition: MethodBase.h:431
Types::EAnalysisType fAnalysisType
Definition: MethodBase.h:589
void SetModelPersistence(Bool_t status)
Definition: MethodBase.h:378
TString fMethodName
Definition: MethodBase.h:609
virtual void ReadWeightsFromStream(TFile &)
Definition: MethodBase.h:263
virtual const std::vector< Float_t > & GetMulticlassValues()
Definition: MethodBase.h:230
long long Long64_t
Definition: RtypesCore.h:69
void AddPoint(Double_t x, Double_t y1, Double_t y2)
This function is used only in 2 TGraph case, and it will add new data points to graphs.
Definition: MethodBase.cxx:206
TString GetMethodName(Types::EMVA method) const
Definition: Types.cxx:130
Bool_t fIgnoreNegWeightsInTraining
Definition: MethodBase.h:676
Bool_t IsConstructedFromWeightFile() const
Definition: MethodBase.h:534
virtual void MakeClassSpecificHeader(std::ostream &, const TString &="") const
Definition: MethodBase.h:517
TSpline1 * fSplTrainRefS
Definition: MethodBase.h:699
const TString GetProbaName() const
Definition: MethodBase.h:332
std::vector< TGraph * > fGraphs
Definition: MethodBase.h:114
const TransformationHandler & GetTransformationHandler(Bool_t takeReroutedIfAvailable=true) const
Definition: MethodBase.h:394
UInt_t GetNvar() const
Definition: MethodBase.h:340
static Types & Instance()
the the single instance of "Types" if existin already, or create it (Signleton)
Definition: Types.cxx:64
const TString & GetOriginalVarName(Int_t ivar) const
Definition: MethodBase.h:505
TString fWeightFile
Definition: MethodBase.h:632
TString fVariableTransformTypeString
Definition: MethodBase.h:718
void SetMethodBaseDir(TDirectory *methodDir)
Definition: MethodBase.h:370
Base class for spline implementation containing the Draw/Paint methods //.
Definition: TSpline.h:22
TransformationHandler * fTransformationPointer
Definition: MethodBase.h:665
Types::ESBType fVariableTransformType
Definition: MethodBase.h:605
A ROOT file is a suite of consecutive data records (TKey instances) with a well defined format...
Definition: TFile.h:50
EAnalysisType
Definition: Types.h:129
A TMultiGraph is a collection of TGraph (or derived) objects.
Definition: TMultiGraph.h:37
void InitIPythonInteractive()
Definition: MethodBase.h:449
const std::vector< Float_t > & GetRegressionValues(const TMVA::Event *const ev)
Definition: MethodBase.h:217
void SetSignalReferenceCutOrientation(Double_t cutOrientation)
Definition: MethodBase.h:361
virtual const std::vector< Float_t > & GetRegressionValues()
Definition: MethodBase.h:224
Basic string class.
Definition: TString.h:137
tomato 1-D histogram with a float per channel (see TH1 documentation)}
Definition: TH1.h:575
void SetTrainTime(Double_t trainTime)
Definition: MethodBase.h:165
TMultiGraph * fMultiGraph
Definition: MethodBase.h:111
const TString & GetInternalVarName(Int_t ivar) const
Definition: MethodBase.h:504
TransformationHandler & GetTransformationHandler(Bool_t takeReroutedIfAvailable=true)
Definition: MethodBase.h:390
int Int_t
Definition: RtypesCore.h:41
TMultiGraph * Get()
Definition: MethodBase.h:110
bool Bool_t
Definition: RtypesCore.h:59
const Bool_t kFALSE
Definition: Rtypes.h:92
Results * fResults
Definition: MethodBase.h:724
TString fJobName
Definition: MethodBase.h:608
TSpline1 * fSplRefB
Definition: MethodBase.h:697
UInt_t GetNTargets() const
Definition: MethodBase.h:342
TSpline1 * fSplRefS
Definition: MethodBase.h:696
std::vector< TString > * fInputVars
Definition: MethodBase.h:582
const char * GetInputTitle(Int_t i) const
Definition: MethodBase.h:347
void SetSilentFile(Bool_t status)
Definition: MethodBase.h:374
Double_t fTrainTime
Definition: MethodBase.h:689
Double_t fTestTime
Definition: MethodBase.h:690
Double_t GetMean(Int_t ivar) const
Definition: MethodBase.h:350
Double_t GetTrainTime() const
Definition: MethodBase.h:166
const TString & GetInputLabel(Int_t i) const
Definition: MethodBase.h:346
void SetMethodDir(TDirectory *methodDir)
Definition: MethodBase.h:368
const TString & GetWeightFileDir() const
Definition: MethodBase.h:486
UInt_t fSignalClass
Definition: MethodBase.h:683
const TString & GetInputVar(Int_t i) const
Definition: MethodBase.h:345
Double_t x[n]
Definition: legend1.C:17
DataSetInfo & fDataSetInfo
Definition: MethodBase.h:601
#define ClassDef(name, id)
Definition: Rtypes.h:254
ECutOrientation fCutOrientation
Definition: MethodBase.h:693
Bool_t TxtWeightsOnly() const
Definition: MethodBase.h:528
UInt_t GetTrainingTMVAVersionCode() const
Definition: MethodBase.h:385
const Event * GetEvent() const
Definition: MethodBase.h:745
DataSet * Data() const
Definition: MethodBase.h:405
void ClearGraphs()
This function sets the point number to 0 for all graphs.
Definition: MethodBase.cxx:194
~IPythonInteractive()
standard destructor
Definition: MethodBase.cxx:158
Double_t fMeanB
Definition: MethodBase.h:656
std::vector< std::vector< double > > Data
Double_t GetXmin(Int_t ivar) const
Definition: MethodBase.h:352
void Init(std::vector< TString > &graphTitles)
This function gets some title and it creates a TGraph for every title.
Definition: MethodBase.cxx:171
DataSetInfo & DataInfo() const
Definition: MethodBase.h:406
Bool_t DoRegression() const
Definition: MethodBase.h:434
TString fTestvar
Definition: MethodBase.h:611
TFile * GetFile() const
Definition: MethodBase.h:366
Definition: PDF.h:71
TSpline * fSpleffBvsS
Definition: MethodBase.h:646
Bool_t fModelPersistence
Definition: MethodBase.h:627
const Event * GetTrainingEvent(Long64_t ievt) const
Definition: MethodBase.h:765
Bool_t Verbose() const
Definition: MethodBase.h:497
UInt_t fTMVATrainingVersion
Definition: MethodBase.h:612
UInt_t GetNEvents() const
temporary event when testing on a different DataSet than the own one
Definition: MethodBase.h:413
Double_t GetXmax(Int_t ivar) const
Definition: MethodBase.h:353
TransformationHandler fTransformation
Definition: MethodBase.h:666
Bool_t DoMulticlass() const
Definition: MethodBase.h:435
virtual void MakeClassSpecific(std::ostream &, const TString &="") const
Definition: MethodBase.h:514
const Event * GetTestingEvent(Long64_t ievt) const
Definition: MethodBase.h:771
Bool_t HasTrainingTree() const
Definition: MethodBase.h:507
Double_t fRmsB
Definition: MethodBase.h:658
Double_t fXmin
Definition: MethodBase.h:659
std::string GetMethodName(TCppMethod_t)
Definition: Cppyy.cxx:728
TSpline1 * fSplTrainRefB
Definition: MethodBase.h:700
TDirectory * fMethodBaseDir
Definition: MethodBase.h:620
SVector< double, 2 > v
Definition: Dict.h:5
UInt_t fROOTTrainingVersion
Definition: MethodBase.h:613
const char * GetName() const
Definition: MethodBase.h:330
EMsgType
Definition: Types.h:61
UInt_t GetTrainingROOTVersionCode() const
Definition: MethodBase.h:386
unsigned int UInt_t
Definition: RtypesCore.h:42
Double_t fMeanS
Definition: MethodBase.h:655
Bool_t Help() const
Definition: MethodBase.h:498
Int_t fNsmoothMVAPdf
Definition: MethodBase.h:721
Bool_t fTxtWeightsOnly
Definition: MethodBase.h:719
const TString & GetJobName() const
Definition: MethodBase.h:326
const TString & GetMethodName() const
Definition: MethodBase.h:327
TDirectory * fBaseDir
Definition: MethodBase.h:619
Bool_t fHasMVAPdfs
Definition: MethodBase.h:674
TSpline * fSplTrainEffBvsS
Definition: MethodBase.h:650
tomato 1-D histogram with a double per channel (see TH1 documentation)}
Definition: TH1.h:618
Bool_t IsSilentFile()
Definition: MethodBase.h:375
Double_t GetSignalReferenceCutOrientation() const
Definition: MethodBase.h:357
void SetNormalised(Bool_t norm)
Definition: MethodBase.h:491
Double_t GetTestTime() const
Definition: MethodBase.h:170
UInt_t GetNVariables() const
Definition: MethodBase.h:341
std::vector< const std::vector< TMVA::Event * > * > fEventCollections
Definition: MethodBase.h:702
TString fVerbosityLevelString
Definition: MethodBase.h:671
Double_t fRmsS
Definition: MethodBase.h:657
UInt_t fBackgroundClass
Definition: MethodBase.h:684
Bool_t IgnoreEventsWithNegWeightsInTraining() const
Definition: MethodBase.h:680
void RerouteTransformationHandler(TransformationHandler *fTargetTransformation)
Definition: MethodBase.h:399
void SetTestTime(Double_t testTime)
Definition: MethodBase.h:169
UInt_t GetMaxIter()
Definition: MethodBase.h:472
double Double_t
Definition: RtypesCore.h:55
EMsgType fVerbosityLevel
Definition: MethodBase.h:672
Describe directory structure in memory.
Definition: TDirectory.h:44
std::vector< Float_t > * fMulticlassReturnVal
Definition: MethodBase.h:592
Bool_t IsNormalised() const
Definition: MethodBase.h:490
int type
Definition: TGX11.cxx:120
void SetFile(TFile *file)
Definition: MethodBase.h:371
virtual void Reset()
Definition: MethodBase.h:197
The TH1 histogram class.
Definition: TH1.h:80
IPythonInteractive()
standard constructur
Definition: MethodBase.cxx:150
void ExitFromTraining()
Definition: MethodBase.h:458
TString fParentDir
Definition: MethodBase.h:629
Bool_t fConstructedFromWeightFile
Definition: MethodBase.h:614
TString fVarTransformString
Definition: MethodBase.h:663
Types::EMVA fMethodType
Definition: MethodBase.h:610
char Char_t
Definition: RtypesCore.h:29
Double_t GetRMS(Int_t ivar) const
Definition: MethodBase.h:351
This class is needed by JsMVA, and it&#39;s a helper class for tracking errors during the training in Jup...
Definition: MethodBase.h:102
Abstract ClassifierFactory template that handles arbitrary types.
Ranking * fRanking
Definition: MethodBase.h:581
TString GetMethodTypeName() const
Definition: MethodBase.h:328
Definition: file.py:1
bool TrainingEnded()
Definition: MethodBase.h:463
Double_t fSignalReferenceCut
the data set information (sometimes needed)
Definition: MethodBase.h:603
const Event * fTmpEvent
Definition: MethodBase.h:408
Double_t GetSignalReferenceCut() const
Definition: MethodBase.h:356
A Graph is a graphics object made of two arrays X and Y with npoints each.
Definition: TGraph.h:53
Int_t fNbinsMVAoutput
Definition: MethodBase.h:586
Bool_t fSilentFile
Definition: MethodBase.h:625
UInt_t GetCurrentIter()
Definition: MethodBase.h:475
Double_t fXmax
Definition: MethodBase.h:660
void DisableWriting(Bool_t setter)
Definition: MethodBase.h:438
ECutOrientation GetCutOrientation() const
Definition: MethodBase.h:546
std::vector< Float_t > * fRegressionReturnVal
Definition: MethodBase.h:591
Types::EAnalysisType GetAnalysisType() const
Definition: MethodBase.h:433
A TTree object has a header with a name and a title.
Definition: TTree.h:98
const TString & GetTestvarName() const
Definition: MethodBase.h:331
const Bool_t kTRUE
Definition: Rtypes.h:91
void SetTestvarName(const TString &v="")
Definition: MethodBase.h:337
TString fFileDir
Definition: MethodBase.h:631
TMultiGraph * GetInteractiveTrainingError()
Definition: MethodBase.h:455
double norm(double *x, double *p)
Definition: unuranDistr.cxx:40
Types::EMVA GetMethodType() const
Definition: MethodBase.h:329
void SetBaseDir(TDirectory *methodDir)
Definition: MethodBase.h:369
virtual void SetAnalysisType(Types::EAnalysisType type)
Definition: MethodBase.h:432
char name[80]
Definition: TGX11.cxx:109
Bool_t fSetupCompleted
Definition: MethodBase.h:705
void SetSignalReferenceCut(Double_t cut)
Definition: MethodBase.h:360
Double_t fSignalReferenceCutOrientation
Definition: MethodBase.h:604
Bool_t IsModelPersistence()
Definition: MethodBase.h:379