Logo ROOT  
Reference Guide
Classification.h
Go to the documentation of this file.
1 // @(#)root/tmva $Id$ 2017
2 // Authors: Omar Zapata, Andreas Hoecker, Peter Speckmayer, Joerg Stelzer, Helge Voss, Kai Voss, Eckhard von Toerne,
3 // Jan Therhaag
4 
5 #ifndef ROOT_TMVA_Classification
6 #define ROOT_TMVA_Classification
7 
8 #include <TString.h>
9 #include <TMultiGraph.h>
10 #include <vector>
11 #include <map>
12 
13 #include <TMVA/IMethod.h>
14 #include <TMVA/MethodBase.h>
15 #include <TMVA/Configurable.h>
16 #include <TMVA/Types.h>
17 #include <TMVA/DataSet.h>
18 #include <TMVA/Event.h>
19 #include <TMVA/Results.h>
21 #include <TMVA/ResultsMulticlass.h>
22 #include <TMVA/Factory.h>
23 #include <TMVA/DataLoader.h>
24 #include <TMVA/OptionMap.h>
25 #include <TMVA/Envelope.h>
26 
27 /*! \class TMVA::ClassificationResult
28  * Class to save the results of the classifier.
29  * Every machine learning method booked have an object for the results
30  * in the classification process, in this class is stored the mvas,
31  * data loader name and ml method name and title.
32  * You can to display the resutls calling the method Show, get the ROC-integral with the
33  * method GetROCIntegral or get the TMVA::ROCCurve object calling GetROC.
34 \ingroup TMVA
35 */
36 
37 /*! \class TMVA::Classification
38  * Class to perform two class classification.
39  * The first step before any analysis is to preperate the data,
40  * to do that you need to create an object of TMVA::DataLoader,
41  * in this object you need to configure the variables and the number of events
42  * to train/test.
43  * The class TMVA::Experimental::Classification needs a TMVA::DataLoader object,
44  * optional a TFile object to save the results and some extra options in a string
45  * like "V:Color:Transformations=I;D;P;U;G:Silent:DrawProgressBar:ModelPersistence:Jobs=2" where:
46  * V = verbose output
47  * Color = coloured screen output
48  * Silent = batch mode: boolean silent flag inhibiting any output from TMVA
49  * Transformations = list of transformations to test.
50  * DrawProgressBar = draw progress bar to display training and testing.
51  * ModelPersistence = to save the trained model in xml or serialized files.
52  * Jobs = number of ml methods to test/train in parallel using MultiProc, requires to call Evaluate method.
53  * Basic example.
54  * \code
55 void classification(UInt_t jobs = 2)
56 {
57  TMVA::Tools::Instance();
58 
59  TFile *input(0);
60  TString fname = "./tmva_class_example.root";
61  if (!gSystem->AccessPathName(fname)) {
62  input = TFile::Open(fname); // check if file in local directory exists
63  } else {
64  TFile::SetCacheFileDir(".");
65  input = TFile::Open("http://root.cern.ch/files/tmva_class_example.root", "CACHEREAD");
66  }
67  if (!input) {
68  std::cout << "ERROR: could not open data file" << std::endl;
69  exit(1);
70  }
71 
72  // Register the training and test trees
73 
74  TTree *signalTree = (TTree *)input->Get("TreeS");
75  TTree *background = (TTree *)input->Get("TreeB");
76 
77  TMVA::DataLoader *dataloader = new TMVA::DataLoader("dataset");
78 
79  dataloader->AddVariable("myvar1 := var1+var2", 'F');
80  dataloader->AddVariable("myvar2 := var1-var2", "Expression 2", "", 'F');
81  dataloader->AddVariable("var3", "Variable 3", "units", 'F');
82  dataloader->AddVariable("var4", "Variable 4", "units", 'F');
83 
84  dataloader->AddSpectator("spec1 := var1*2", "Spectator 1", "units", 'F');
85  dataloader->AddSpectator("spec2 := var1*3", "Spectator 2", "units", 'F');
86 
87  // global event weights per tree (see below for setting event-wise weights)
88  Double_t signalWeight = 1.0;
89  Double_t backgroundWeight = 1.0;
90 
91  dataloader->SetBackgroundWeightExpression("weight");
92 
93  TMVA::Experimental::Classification *cl = new TMVA::Experimental::Classification(dataloader, Form("Jobs=%d", jobs));
94 
95  cl->BookMethod(TMVA::Types::kBDT, "BDTG", "!H:!V:NTrees=2000:MinNodeSize=2.5%:BoostType=Grad:Shrinkage=0.10:"
96  "UseBaggedBoost:BaggedSampleFraction=0.5:nCuts=20:MaxDepth=2");
97  cl->BookMethod(TMVA::Types::kSVM, "SVM", "Gamma=0.25:Tol=0.001:VarTransform=Norm");
98 
99  cl->Evaluate(); // Train and Test all methods
100 
101  auto &results = cl->GetResults();
102 
103  TCanvas *c = new TCanvas(Form("ROC"));
104  c->SetTitle("ROC-Integral Curve");
105 
106  auto mg = new TMultiGraph();
107  for (UInt_t i = 0; i < results.size(); i++) {
108  auto roc = results[i].GetROCGraph();
109  roc->SetLineColorAlpha(i + 1, 0.1);
110  mg->Add(roc);
111  }
112  mg->Draw("AL");
113  mg->GetXaxis()->SetTitle(" Signal Efficiency ");
114  mg->GetYaxis()->SetTitle(" Background Rejection ");
115  c->BuildLegend(0.15, 0.15, 0.3, 0.3);
116  c->Draw();
117 
118  delete cl;
119 }
120  * \endcode
121  *
122 \ingroup TMVA
123 */
124 
125 namespace TMVA {
126 class ResultsClassification;
127 namespace Experimental {
129  friend class Classification;
130 
131 private:
134  std::map<UInt_t, std::vector<std::tuple<Float_t, Float_t, Bool_t>>> fMvaTrain; // Mvas for two-class classification
135  std::map<UInt_t, std::vector<std::tuple<Float_t, Float_t, Bool_t>>>
136  fMvaTest; // Mvas for two-class and multiclass classification
137  std::vector<TString> fClassNames; //
138 
139  Bool_t IsMethod(TString methodname, TString methodtitle);
140  Bool_t fIsCuts; // if it is a method cuts need special output
142 
143 public:
147 
148  const TString GetMethodName() const { return fMethod.GetValue<TString>("MethodName"); }
149  const TString GetMethodTitle() const { return fMethod.GetValue<TString>("MethodTitle"); }
154 
155  void Show();
156 
159 
161 };
162 
163 class Classification : public Envelope {
164  std::vector<ClassificationResult> fResults; //!
165  std::vector<IMethod *> fIMethods; //! vector of objects with booked methods
169 public:
170  explicit Classification(DataLoader *loader, TFile *file, TString options);
171  explicit Classification(DataLoader *loader, TString options);
172  ~Classification();
173 
174  virtual void Train();
175  virtual void TrainMethod(TString methodname, TString methodtitle);
176  virtual void TrainMethod(Types::EMVA method, TString methodtitle);
177 
178  virtual void Test();
179  virtual void TestMethod(TString methodname, TString methodtitle);
180  virtual void TestMethod(Types::EMVA method, TString methodtitle);
181 
182  virtual void Evaluate();
183 
184  std::vector<ClassificationResult> &GetResults();
185 
186  MethodBase *GetMethod(TString methodname, TString methodtitle);
187 
188 protected:
189  TString GetMethodOptions(TString methodname, TString methodtitle);
190  Bool_t HasMethodObject(TString methodname, TString methodtitle, Int_t &index);
194  TMVA::ROCCurve *GetROC(TString methodname, TString methodtitle, UInt_t iClass = 0,
196 
197  Double_t GetROCIntegral(TString methodname, TString methodtitle, UInt_t iClass = 0);
198 
199  ClassificationResult &GetResults(TString methodname, TString methodtitle);
200  void CopyFrom(TDirectory *src, TFile *file);
201  void MergeFiles();
202 
204 };
205 } // namespace Experimental
206 } // namespace TMVA
207 
208 #endif // ROOT_TMVA_Classification
TMVA::Experimental::ClassificationResult::fMvaTest
std::map< UInt_t, std::vector< std::tuple< Float_t, Float_t, Bool_t > > > fMvaTest
Definition: Classification.h:136
TMVA::OptionMap
class to storage options for the differents methods
Definition: OptionMap.h:34
TMVA::Experimental::Classification::GetMethodOptions
TString GetMethodOptions(TString methodname, TString methodtitle)
return the options for the booked method.
Definition: Classification.cxx:234
TMVA::OptionMap::GetValue
T GetValue(const TString &key)
Definition: OptionMap.h:133
ResultsClassification.h
TMVA::Experimental::ClassificationResult::GetDataLoaderName
TString GetDataLoaderName()
Definition: Classification.h:152
TMVA::Envelope
Abstract base class for all high level ml algorithms, you can book ml methods like BDT,...
Definition: Envelope.h:44
TMVA::Experimental::ClassificationResult::GetROCIntegral
Double_t GetROCIntegral(UInt_t iClass=0, TMVA::Types::ETreeType type=TMVA::Types::kTesting)
Method to get ROC-Integral value from mvas.
Definition: Classification.cxx:74
TMVA::Experimental::ClassificationResult::GetROCGraph
TGraph * GetROCGraph(UInt_t iClass=0, TMVA::Types::ETreeType type=TMVA::Types::kTesting)
Method to get TGraph object with the ROC curve.
Definition: Classification.cxx:149
TMVA::Experimental::ClassificationResult::fClassNames
std::vector< TString > fClassNames
Definition: Classification.h:137
TMVA::Experimental::Classification
Definition: Classification.h:163
TMVA::Experimental::Classification::fIMethods
std::vector< IMethod * > fIMethods
Definition: Classification.h:165
TMVA::Experimental::Classification::Test
virtual void Test()
Perform test evaluation in all booked methods.
Definition: Classification.cxx:512
r
ROOT::R::TRInterface & r
Definition: Object.C:4
IMethod.h
TMVA::Experimental::ClassificationResult::GetROC
ROCCurve * GetROC(UInt_t iClass=0, TMVA::Types::ETreeType type=TMVA::Types::kTesting)
Method to get TMVA::ROCCurve Object.
Definition: Classification.cxx:93
TMVA::Experimental::Classification::Classification
Classification(DataLoader *loader, TFile *file, TString options)
Contructor to create a two class classifier.
Definition: Classification.cxx:181
TMVA::Experimental::Classification::TestMethod
virtual void TestMethod(TString methodname, TString methodtitle)
Lets perform test an specific ml method.
Definition: Classification.cxx:525
TMVA::Types::kTesting
@ kTesting
Definition: Types.h:146
TMVA::Experimental::Classification::fResults
std::vector< ClassificationResult > fResults
Definition: Classification.h:164
DataLoader.h
TMVA::Experimental::ClassificationResult::operator=
ClassificationResult & operator=(const ClassificationResult &r)
Definition: Classification.cxx:105
TMVA::Experimental::Classification::MergeFiles
void MergeFiles()
Definition: Classification.cxx:1129
TMVA::Experimental::Classification::GetMethod
MethodBase * GetMethod(TString methodname, TString methodtitle)
Return a TMVA::MethodBase object.
Definition: Classification.cxx:385
MethodBase.h
TString
Basic string class.
Definition: TString.h:136
TString.h
TMVA::Experimental::Classification::GetResults
std::vector< ClassificationResult > & GetResults()
return the the vector of TMVA::Experimental::ClassificationResult objects.
Definition: Classification.cxx:944
bool
Envelope.h
TMultiGraph.h
TMVA::Experimental::ClassificationResult::GetMethodName
const TString GetMethodName() const
Definition: Classification.h:148
TMVA::Experimental::Classification::HasMethodObject
Bool_t HasMethodObject(TString methodname, TString methodtitle, Int_t &index)
Allows to check if the TMVA::MethodBase was created and return the index in the vector.
Definition: Classification.cxx:493
TMVA::Experimental::ClassificationResult::fMethod
OptionMap fMethod
Definition: Classification.h:132
TMVA::Experimental::ClassificationResult::fDataLoaderName
TString fDataLoaderName
Definition: Classification.h:133
TMVA::Experimental::Classification::GetROC
TMVA::ROCCurve * GetROC(TMVA::MethodBase *method, UInt_t iClass=0, TMVA::Types::ETreeType type=TMVA::Types::kTesting)
Method to get TMVA::ROCCurve Object.
Definition: Classification.cxx:992
TMVA::Experimental::ClassificationResult::Show
void Show()
Method to print the results in stdout.
Definition: Classification.cxx:121
TMVA::Experimental::ClassificationResult::fROCIntegral
Double_t fROCIntegral
Definition: Classification.h:141
TMVA::Types::EAnalysisType
EAnalysisType
Definition: Types.h:128
TMVA::Experimental::ClassificationResult::~ClassificationResult
~ClassificationResult()
Definition: Classification.h:146
TMVA::Experimental::Classification::IsCutsMethod
Bool_t IsCutsMethod(TMVA::MethodBase *method)
Allows to check if the ml method is a Cuts method.
Definition: Classification.cxx:956
TMVA::Types::ETreeType
ETreeType
Definition: Types.h:144
TMVA::Experimental::ClassificationResult::IsMethod
Bool_t IsMethod(TString methodname, TString methodtitle)
Method to check if method was booked.
Definition: Classification.cxx:166
TMVA::Experimental::Classification::GetROCIntegral
Double_t GetROCIntegral(TString methodname, TString methodtitle, UInt_t iClass=0)
Method to get ROC-Integral value from mvas.
Definition: Classification.cxx:1073
Event.h
TMVA::Experimental::ClassificationResult::fMvaTrain
std::map< UInt_t, std::vector< std::tuple< Float_t, Float_t, Bool_t > > > fMvaTrain
Definition: Classification.h:134
TMVA::Experimental::Classification::fAnalysisType
Types::EAnalysisType fAnalysisType
vector of objects with booked methods
Definition: Classification.h:166
TMVA::MethodBase
Virtual base Class for all MVA method.
Definition: MethodBase.h:111
Types.h
Configurable.h
TFile
A ROOT file is a suite of consecutive data records (TKey instances) with a well defined format.
Definition: TFile.h:54
TMVA::Experimental::Classification::CopyFrom
void CopyFrom(TDirectory *src, TFile *file)
Definition: Classification.cxx:1092
unsigned int
TMVA::Experimental::Classification::Train
virtual void Train()
Method to train all booked ml methods.
Definition: Classification.cxx:323
TMVA::Experimental::Classification::TrainMethod
virtual void TrainMethod(TString methodname, TString methodtitle)
Lets train an specific ml method.
Definition: Classification.cxx:336
OptionMap.h
Double_t
double Double_t
Definition: RtypesCore.h:59
TGraph
A TGraph is an object made of two arrays X and Y with npoints each.
Definition: TGraph.h:41
TMVA::Experimental::ClassificationResult::IsCutsMethod
Bool_t IsCutsMethod()
Definition: Classification.h:153
file
Definition: file.py:1
TMVA::Types::EMVA
EMVA
Definition: Types.h:78
TMVA::Experimental::ClassificationResult::fIsCuts
Bool_t fIsCuts
Definition: Classification.h:140
TMVA::Experimental::Classification::~Classification
~Classification()
Definition: Classification.cxx:219
TMVA::Experimental::ClassificationResult
Definition: Classification.h:128
TObject
Mother of all ROOT objects.
Definition: TObject.h:37
ClassDef
#define ClassDef(name, id)
Definition: Rtypes.h:325
Factory.h
TDirectory
Describe directory structure in memory.
Definition: TDirectory.h:45
TMVA::Experimental::ClassificationResult::ClassificationResult
ClassificationResult()
Definition: Classification.cxx:52
ResultsMulticlass.h
type
int type
Definition: TGX11.cxx:121
TMVA::Experimental::Classification::Evaluate
virtual void Evaluate()
Method to perform Train/Test over all ml method booked.
Definition: Classification.cxx:248
TMVA::ROCCurve
Definition: ROCCurve.h:45
TMVA::Experimental::ClassificationResult::GetMethodTitle
const TString GetMethodTitle() const
Definition: Classification.h:149
Results.h
TMVA::Experimental::Classification::fCorrelations
Bool_t fCorrelations
Definition: Classification.h:167
DataSet.h
TMVA::Experimental::Classification::fROC
Bool_t fROC
Definition: Classification.h:168
TMVA
create variable transformations
Definition: GeneticMinimizer.h:22
int
TMVA::DataLoader
Definition: DataLoader.h:50