Logo ROOT   6.10/09
Reference Guide
VariableImportance.h
Go to the documentation of this file.
1 // @(#)root/tmva $Id$
2 // Author: Omar Zapata and Sergei Gleyzer. 2016
3 
4 
5 #ifndef ROOT_TMVA_VariableImportance
6 #define ROOT_TMVA_VariableImportance
7 
8 
9 #include "TString.h"
10 
11 
12 #include "TMVA/Configurable.h"
13 #include "TMVA/Types.h"
14 
15 #include <TMVA/Factory.h>
16 
17 #include <TMVA/DataLoader.h>
18 
19 #include <TMVA/OptionMap.h>
20 
21 #include <TMVA/Envelope.h>
22 
23 namespace TMVA {
24 
26  {
27  friend class VariableImportance;
28  private:
30  std::shared_ptr<TH1F> fImportanceHist;
31  VIType fType {kShort};
32  public:
35  ~VariableImportanceResult(){fImportanceHist=nullptr;}
36 
38  TH1F *GetImportanceHist(){return fImportanceHist.get();}
39  void Print() const ;
40 
41  TCanvas* Draw(const TString name="VariableImportance") const;
42  };
43 
44 
45  class VariableImportance : public Envelope {
46  private:
47  UInt_t fNumFolds = 0;
49  VIType fType {kShort};
50  public:
51  explicit VariableImportance(DataLoader *loader);
53 
54  virtual void Evaluate();
55 
56  void SetType(VIType type){fType=type;}
57  VIType GetType(){return fType;}
58 
59  const VariableImportanceResult& GetResults() const {return fResults;}//I need to think about this, which is the best way to get the results?
60  protected:
61  //evaluate the simple case that is removing 1 variable at time
62  void EvaluateImportanceShort();
63  //evaluate all variables combinations NOTE: use with care in huge datasets with a huge number of variables
64  void EvaluateImportanceAll();
65  //evaluate randomly given a number of seeds
66  void EvaluateImportanceRandom(UInt_t nseeds);
67 
68  //method to return a nice histogram with the results ;)
69  TH1F* GetImportance(const UInt_t nbits,std::vector<Float_t> &importances,std::vector<TString> &varNames);
70 
71  //method to compute the range(number total of operations for every bit configuration)
72  ULong_t Sum(ULong_t i);
73 
74  private:
75  std::unique_ptr<Factory> fClassifier;
77  };
78 }
79 
80 
81 #endif
class to storage options for the differents methods
Definition: OptionMap.h:35
Basic string class.
Definition: TString.h:129
tomato 1-D histogram with a float per channel (see TH1 documentation)}
Definition: TH1.h:551
std::shared_ptr< TH1F > fImportanceHist
TCanvas * Draw(const TString name="VariableImportance") const
#define ClassDef(name, id)
Definition: Rtypes.h:297
std::unique_ptr< Factory > fClassifier
Base class for all machine learning algorithms.
Definition: Envelope.h:35
const VariableImportanceResult & GetResults() const
unsigned int UInt_t
Definition: RtypesCore.h:42
Double_t Sum(const double *x, const double *p)
The Canvas class.
Definition: TCanvas.h:31
int type
Definition: TGX11.cxx:120
unsigned long ULong_t
Definition: RtypesCore.h:51
Abstract ClassifierFactory template that handles arbitrary types.
VariableImportanceResult fResults