ROOT  6.07/01
Reference Guide
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Properties Friends Macros Groups Pages
TMVAMultipleBackgroundExample.C
Go to the documentation of this file.
1 // @(#)root/tmva $Id$
2 /**********************************************************************************
3  * Project : TMVA - a Root-integrated toolkit for multivariate data analysis *
4  * Package : TMVA *
5  * Exectuable: TMVAGAexample *
6  * *
7  * This exectutable gives an example of a very simple use of the genetic algorithm*
8  * of TMVA *
9  * *
10  **********************************************************************************/
11 
12 #include <iostream> // Stream declarations
13 #include <vector>
14 #include <limits>
15 
16 #include "TChain.h"
17 #include "TCut.h"
18 #include "TDirectory.h"
19 #include "TH1F.h"
20 #include "TH1.h"
21 #include "TMath.h"
22 #include "TFile.h"
23 #include "TStopwatch.h"
24 #include "TROOT.h"
25 #include "TSystem.h"
26 
27 #include "TMVA/GeneticAlgorithm.h"
28 #include "TMVA/GeneticFitter.h"
29 #include "TMVA/IFitterTarget.h"
30 #include "TMVA/Factory.h"
31 #include "TMVA/Reader.h"
32 
33 using namespace std;
34 
35 namespace TMVA {
36 
37 // =========== Description =============
38 // This example shows the training of signal with three different backgrounds
39 // Then in the application a tree is created with all signal and background events where the true class ID and the three classifier outputs are added
40 // finally with the application tree, the significance is maximized with the help of the TMVA genetic algrorithm.
41 //
42 
43 
44 // ------------------------------ Training ----------------------------------------------------------------
45 void Training(){
46  std::string factoryOptions( "!V:!Silent:Transformations=I;D;P;G,D:AnalysisType=Classification" );
47  TString fname = "./tmva_example_multiple_background.root";
48 
49  TFile *input(0);
50  input = TFile::Open( fname );
51 
52  TTree *signal = (TTree*)input->Get("TreeS");
53  TTree *background0 = (TTree*)input->Get("TreeB0");
54  TTree *background1 = (TTree*)input->Get("TreeB1");
55  TTree *background2 = (TTree*)input->Get("TreeB2");
56 
57  /// global event weights per tree (see below for setting event-wise weights)
58  Double_t signalWeight = 1.0;
59  Double_t background0Weight = 1.0;
60  Double_t background1Weight = 1.0;
61  Double_t background2Weight = 1.0;
62 
63  // Create a new root output file.
64  TString outfileName( "TMVASignalBackground0.root" );
65  TFile* outputFile = TFile::Open( outfileName, "RECREATE" );
66 
67 
68 
69  // ===================== background 0
70  TMVA::Factory *factory = new TMVA::Factory( "TMVAMultiBkg0", outputFile, factoryOptions );
71  factory->AddVariable( "var1", "Variable 1", "", 'F' );
72  factory->AddVariable( "var2", "Variable 2", "", 'F' );
73  factory->AddVariable( "var3", "Variable 3", "units", 'F' );
74  factory->AddVariable( "var4", "Variable 4", "units", 'F' );
75 
76  factory->AddSignalTree ( signal, signalWeight );
77  factory->AddBackgroundTree( background0, background0Weight );
78 
79 // factory->SetBackgroundWeightExpression("weight");
80  TCut mycuts = ""; // for example: TCut mycuts = "abs(var1)<0.5 && abs(var2-0.5)<1";
81  TCut mycutb = ""; // for example: TCut mycutb = "abs(var1)<0.5";
82 
83  // tell the factory to use all remaining events in the trees after training for testing:
84  factory->PrepareTrainingAndTestTree( mycuts, mycutb,
85  "nTrain_Signal=0:nTrain_Background=0:SplitMode=Random:NormMode=NumEvents:!V" );
86 
87  // Boosted Decision Trees
88  factory->BookMethod( TMVA::Types::kBDT, "BDTG",
89  "!H:!V:NTrees=1000:BoostType=Grad:Shrinkage=0.30:UseBaggedBoost:BaggedSampleFraction=0.6:SeparationType=GiniIndex:nCuts=20:MaxDepth=2" );
90  factory->TrainAllMethods();
91  factory->TestAllMethods();
92  factory->EvaluateAllMethods();
93 
94  outputFile->Close();
95 
96  delete factory;
97 
98 
99 
100  // ===================== background 1
101 
102  outfileName = "TMVASignalBackground1.root";
103  outputFile = TFile::Open( outfileName, "RECREATE" );
104 
105  factory = new TMVA::Factory( "TMVAMultiBkg1", outputFile, factoryOptions );
106  factory->AddVariable( "var1", "Variable 1", "", 'F' );
107  factory->AddVariable( "var2", "Variable 2", "", 'F' );
108  factory->AddVariable( "var3", "Variable 3", "units", 'F' );
109  factory->AddVariable( "var4", "Variable 4", "units", 'F' );
110 
111  factory->AddSignalTree ( signal, signalWeight );
112  factory->AddBackgroundTree( background1, background1Weight );
113 
114 // factory->SetBackgroundWeightExpression("weight");
115 
116  // tell the factory to use all remaining events in the trees after training for testing:
117  factory->PrepareTrainingAndTestTree( mycuts, mycutb,
118  "nTrain_Signal=0:nTrain_Background=0:SplitMode=Random:NormMode=NumEvents:!V" );
119 
120  // Boosted Decision Trees
121  factory->BookMethod( TMVA::Types::kBDT, "BDTG",
122  "!H:!V:NTrees=1000:BoostType=Grad:Shrinkage=0.30:UseBaggedBoost:BaggedSampleFraction=0.6:SeparationType=GiniIndex:nCuts=20:MaxDepth=2" );
123  factory->TrainAllMethods();
124  factory->TestAllMethods();
125  factory->EvaluateAllMethods();
126 
127  outputFile->Close();
128 
129  delete factory;
130 
131 
132 
133 
134  // ===================== background 2
135  outfileName = "TMVASignalBackground2.root";
136  outputFile = TFile::Open( outfileName, "RECREATE" );
137 
138  factory = new TMVA::Factory( "TMVAMultiBkg2", outputFile, factoryOptions );
139  factory->AddVariable( "var1", "Variable 1", "", 'F' );
140  factory->AddVariable( "var2", "Variable 2", "", 'F' );
141  factory->AddVariable( "var3", "Variable 3", "units", 'F' );
142  factory->AddVariable( "var4", "Variable 4", "units", 'F' );
143 
144  factory->AddSignalTree ( signal, signalWeight );
145  factory->AddBackgroundTree( background2, background2Weight );
146 
147 // factory->SetBackgroundWeightExpression("weight");
148 
149  // tell the factory to use all remaining events in the trees after training for testing:
150  factory->PrepareTrainingAndTestTree( mycuts, mycutb,
151  "nTrain_Signal=0:nTrain_Background=0:SplitMode=Random:NormMode=NumEvents:!V" );
152 
153  // Boosted Decision Trees
154  factory->BookMethod( TMVA::Types::kBDT, "BDTG",
155  "!H:!V:NTrees=1000:BoostType=Grad:Shrinkage=0.30:UseBaggedBoost:BaggedSampleFraction=0.5:SeparationType=GiniIndex:nCuts=20:MaxDepth=2" );
156  factory->TrainAllMethods();
157  factory->TestAllMethods();
158  factory->EvaluateAllMethods();
159 
160  outputFile->Close();
161 
162  delete factory;
163 
164 }
165 
166 
167 
168 
169 
170 // ------------------------------ Application ----------------------------------------------------------------
171 // create a summary tree with all signal and background events and for each event the three classifier values and the true classID
173 
174  // Create a new root output file.
175  TString outfileName( "tmva_example_multiple_backgrounds__applied.root" );
176  TFile* outputFile = TFile::Open( outfileName, "RECREATE" );
177  TTree* outputTree = new TTree("multiBkg","multiple backgrounds tree");
178 
179  Float_t var1, var2;
180  Float_t var3, var4;
181  Int_t classID = 0;
182  Float_t weight = 1.f;
183 
184  Float_t classifier0, classifier1, classifier2;
185 
186  outputTree->Branch("classID", &classID, "classID/I");
187  outputTree->Branch("var1", &var1, "var1/F");
188  outputTree->Branch("var2", &var2, "var2/F");
189  outputTree->Branch("var3", &var3, "var3/F");
190  outputTree->Branch("var4", &var4, "var4/F");
191  outputTree->Branch("weight", &weight, "weight/F");
192  outputTree->Branch("cls0", &classifier0, "cls0/F");
193  outputTree->Branch("cls1", &classifier1, "cls1/F");
194  outputTree->Branch("cls2", &classifier2, "cls2/F");
195 
196 
197  // ===== create three readers for the three different signal/background classifications, .. one for each background
198  TMVA::Reader *reader0 = new TMVA::Reader( "!Color:!Silent" );
199  TMVA::Reader *reader1 = new TMVA::Reader( "!Color:!Silent" );
200  TMVA::Reader *reader2 = new TMVA::Reader( "!Color:!Silent" );
201 
202  reader0->AddVariable( "var1", &var1 );
203  reader0->AddVariable( "var2", &var2 );
204  reader0->AddVariable( "var3", &var3 );
205  reader0->AddVariable( "var4", &var4 );
206 
207  reader1->AddVariable( "var1", &var1 );
208  reader1->AddVariable( "var2", &var2 );
209  reader1->AddVariable( "var3", &var3 );
210  reader1->AddVariable( "var4", &var4 );
211 
212  reader2->AddVariable( "var1", &var1 );
213  reader2->AddVariable( "var2", &var2 );
214  reader2->AddVariable( "var3", &var3 );
215  reader2->AddVariable( "var4", &var4 );
216 
217  // ====== load the weight files for the readers
218  TString method = "BDT method";
219  reader0->BookMVA( "BDT method", "weights/TMVAMultiBkg0_BDTG.weights.xml" );
220  reader1->BookMVA( "BDT method", "weights/TMVAMultiBkg1_BDTG.weights.xml" );
221  reader2->BookMVA( "BDT method", "weights/TMVAMultiBkg2_BDTG.weights.xml" );
222 
223  // ===== load the input file
224  TFile *input(0);
225  TString fname = "./tmva_example_multiple_background.root";
226  input = TFile::Open( fname );
227 
228  TTree* theTree = NULL;
229 
230  // ===== loop through signal and all background trees
231  for( int treeNumber = 0; treeNumber < 4; ++treeNumber ) {
232  if( treeNumber == 0 ){
233  theTree = (TTree*)input->Get("TreeS");
234  std::cout << "--- Select signal sample" << std::endl;
235 // theTree->SetBranchAddress( "weight", &weight );
236  weight = 1;
237  classID = 0;
238  }else if( treeNumber == 1 ){
239  theTree = (TTree*)input->Get("TreeB0");
240  std::cout << "--- Select background 0 sample" << std::endl;
241 // theTree->SetBranchAddress( "weight", &weight );
242  weight = 1;
243  classID = 1;
244  }else if( treeNumber == 2 ){
245  theTree = (TTree*)input->Get("TreeB1");
246  std::cout << "--- Select background 1 sample" << std::endl;
247 // theTree->SetBranchAddress( "weight", &weight );
248  weight = 1;
249  classID = 2;
250  }else if( treeNumber == 3 ){
251  theTree = (TTree*)input->Get("TreeB2");
252  std::cout << "--- Select background 2 sample" << std::endl;
253 // theTree->SetBranchAddress( "weight", &weight );
254  weight = 1;
255  classID = 3;
256  }
257 
258 
259  theTree->SetBranchAddress( "var1", &var1 );
260  theTree->SetBranchAddress( "var2", &var2 );
261  theTree->SetBranchAddress( "var3", &var3 );
262  theTree->SetBranchAddress( "var4", &var4 );
263 
264 
265  std::cout << "--- Processing: " << theTree->GetEntries() << " events" << std::endl;
266  TStopwatch sw;
267  sw.Start();
268  Int_t nEvent = theTree->GetEntries();
269 // Int_t nEvent = 100;
270  for (Long64_t ievt=0; ievt<nEvent; ievt++) {
271 
272  if (ievt%1000 == 0){
273  std::cout << "--- ... Processing event: " << ievt << std::endl;
274  }
275 
276  theTree->GetEntry(ievt);
277 
278  // ==== get the classifiers for each of the signal/background classifications
279  classifier0 = reader0->EvaluateMVA( method );
280  classifier1 = reader1->EvaluateMVA( method );
281  classifier2 = reader2->EvaluateMVA( method );
282 
283  outputTree->Fill();
284  }
285 
286 
287  // get elapsed time
288  sw.Stop();
289  std::cout << "--- End of event loop: "; sw.Print();
290  }
291  input->Close();
292 
293 
294  // write output tree
295 // outputTree->SetDirectory(outputFile);
296 // outputTree->Write();
297  outputFile->Write();
298 
299  outputFile->Close();
300 
301  std::cout << "--- Created root file: \"" << outfileName.Data() << "\" containing the MVA output histograms" << std::endl;
302 
303  delete reader0;
304  delete reader1;
305  delete reader2;
306 
307  std::cout << "==> Application of readers is done! combined tree created" << std::endl << std::endl;
308 
309 }
310 
311 
312 
313 
314 // ------------------------------ Genetic Algorithm Fitness definition ----------------------------------------------------------------
315 class MyFitness : public IFitterTarget {
316 public:
317  // constructor
318  MyFitness( TChain* _chain ) : IFitterTarget() {
319  chain = _chain;
320 
321  hSignal = new TH1F("hsignal","hsignal",100,-1,1);
322  hFP = new TH1F("hfp","hfp",100,-1,1);
323  hTP = new TH1F("htp","htp",100,-1,1);
324 
325  TString cutsAndWeightSignal = "weight*(classID==0)";
326  nSignal = chain->Draw("Entry$/Entries$>>hsignal",cutsAndWeightSignal,"goff");
327  weightsSignal = hSignal->Integral();
328 
329  }
330 
331  // the output of this function will be minimized
332  Double_t EstimatorFunction( std::vector<Double_t> & factors ){
333 
334  TString cutsAndWeightTruePositive = Form("weight*((classID==0) && cls0>%f && cls1>%f && cls2>%f )",factors.at(0), factors.at(1), factors.at(2));
335  TString cutsAndWeightFalsePositive = Form("weight*((classID >0) && cls0>%f && cls1>%f && cls2>%f )",factors.at(0), factors.at(1), factors.at(2));
336 
337  // Entry$/Entries$ just draws something reasonable. Could in principle anything
338  Float_t nTP = chain->Draw("Entry$/Entries$>>htp",cutsAndWeightTruePositive,"goff");
339  Float_t nFP = chain->Draw("Entry$/Entries$>>hfp",cutsAndWeightFalsePositive,"goff");
340 
341  weightsTruePositive = hTP->Integral();
342  weightsFalsePositive = hFP->Integral();
343 
344  efficiency = 0;
345  if( weightsSignal > 0 )
346  efficiency = weightsTruePositive/weightsSignal;
347 
348  purity = 0;
349  if( weightsTruePositive+weightsFalsePositive > 0 )
350  purity = weightsTruePositive/(weightsTruePositive+weightsFalsePositive);
351 
352  Float_t effTimesPur = efficiency*purity;
353 
354  Float_t toMinimize = std::numeric_limits<float>::max(); // set to the highest existing number
355  if( effTimesPur > 0 ) // if larger than 0, take 1/x. This is the value to minimize
356  toMinimize = 1./(effTimesPur); // we want to minimize 1/efficiency*purity
357 
358  // Print();
359 
360  return toMinimize;
361  }
362 
363 
364  void Print(){
365  std::cout << std::endl;
366  std::cout << "======================" << std::endl
367  << "Efficiency : " << efficiency << std::endl
368  << "Purity : " << purity << std::endl << std::endl
369  << "True positive weights : " << weightsTruePositive << std::endl
370  << "False positive weights: " << weightsFalsePositive << std::endl
371  << "Signal weights : " << weightsSignal << std::endl;
372  }
373 
374  Float_t nSignal;
375 
376  Float_t efficiency;
377  Float_t purity;
378  Float_t weightsTruePositive;
379  Float_t weightsFalsePositive;
380  Float_t weightsSignal;
381 
382 
383 private:
384  TChain* chain;
385  TH1F* hSignal;
386  TH1F* hFP;
387  TH1F* hTP;
388 
389 };
390 
391 
392 
393 
394 
395 
396 
397 
398 // ------------------------------ call of Genetic algorithm ----------------------------------------------------------------
400 
401  // define all the parameters by their minimum and maximum value
402  // in this example 3 parameters (=cuts on the classifiers) are defined.
403  vector<Interval*> ranges;
404  ranges.push_back( new Interval(-1,1) ); // for some classifiers (especially LD) the ranges have to be taken larger
405  ranges.push_back( new Interval(-1,1) );
406  ranges.push_back( new Interval(-1,1) );
407 
408  std::cout << "Classifier ranges (defined by the user)" << std::endl;
409  for( std::vector<Interval*>::iterator it = ranges.begin(); it != ranges.end(); it++ ){
410  std::cout << " range: " << (*it)->GetMin() << " " << (*it)->GetMax() << std::endl;
411  }
412 
413  TChain* chain = new TChain("multiBkg");
414  chain->Add("tmva_example_multiple_backgrounds__applied.root");
415 
416  IFitterTarget* myFitness = new MyFitness( chain );
417 
418  // prepare the genetic algorithm with an initial population size of 20
419  // mind: big population sizes will help in searching the domain space of the solution
420  // but you have to weight this out to the number of generations
421  // the extreme case of 1 generation and populationsize n is equal to
422  // a Monte Carlo calculation with n tries
423 
424  const TString name( "multipleBackgroundGA" );
425  const TString opts( "PopSize=100:Steps=30" );
426 
427  GeneticFitter mg( *myFitness, name, ranges, opts);
428  // mg.SetParameters( 4, 30, 200, 10,5, 0.95, 0.001 );
429 
430  std::vector<Double_t> result;
431  Double_t estimator = mg.Run(result);
432 
433  dynamic_cast<MyFitness*>(myFitness)->Print();
434  std::cout << std::endl;
435 
436  int n = 0;
437  for( std::vector<Double_t>::iterator it = result.begin(); it<result.end(); it++ ){
438  std::cout << " cutValue[" << n << "] = " << (*it) << ";"<< std::endl;
439  n++;
440  }
441 
442 
443 }
444 
445 
446 } // namespace TMVA
447 
448 
449 // ------------------------------ Run all ----------------------------------------------------------------
451 {
452  cout << "Start Test TMVAGAexample" << endl
453  << "========================" << endl
454  << endl;
455 
456  TString createDataMacro = TString(gSystem->DirName(__FILE__) ) + "/createData.C";
457  gROOT->ProcessLine(TString::Format(".L %s",createDataMacro.Data()));
458  gROOT->ProcessLine("create_MultipleBackground(2000)");
459 
460 
461  cout << endl;
462  cout << "========================" << endl;
463  cout << "--- Training" << endl;
464  TMVA::Training();
465 
466  cout << endl;
467  cout << "========================" << endl;
468  cout << "--- Application & create combined tree" << endl;
470 
471  cout << endl;
472  cout << "========================" << endl;
473  cout << "--- maximize significance" << endl;
475 }
476 
477 int main( int argc, char** argv ) {
479 }
void Print(Option_t *option="") const
Print the real and cpu time passed between the start and stop events.
Definition: TStopwatch.cxx:217
long long Long64_t
Definition: RtypesCore.h:69
void Start(Bool_t reset=kTRUE)
Start the stopwatch.
Definition: TStopwatch.cxx:56
float Float_t
Definition: RtypesCore.h:53
void AddVariable(const TString &expression, Float_t *)
Add a float variable or expression to the reader.
Definition: Reader.cxx:307
Double_t Run(std::vector< Double_t > &pars)
Execute fitting.
virtual Int_t Fill()
Fill all branches.
Definition: TTree.cxx:4306
#define gROOT
Definition: TROOT.h:344
virtual Int_t GetEntry(Long64_t entry=0, Int_t getall=0)
Read all branches of entry and return total number of bytes read.
Definition: TTree.cxx:5144
Basic string class.
Definition: TString.h:137
int Int_t
Definition: RtypesCore.h:41
virtual const char * DirName(const char *pathname)
Return the directory name in pathname.
Definition: TSystem.cxx:980
void AddVariable(const TString &expression, const TString &title, const TString &unit, char type='F', Double_t min=0, Double_t max=0)
user inserts discriminating variable in data set info
Definition: Factory.cxx:540
void TrainAllMethods()
iterates through all booked methods and calls training
Definition: Factory.cxx:965
void TMVAMultipleBackgroundExample()
void AddBackgroundTree(TTree *background, Double_t weight=1.0, Types::ETreeType treetype=Types::kMaxTreeType)
number of signal events (used to compute significance)
Definition: Factory.cxx:458
static TFile * Open(const char *name, Option_t *option="", const char *ftitle="", Int_t compress=1, Int_t netopt=0)
Create / open a file.
Definition: TFile.cxx:3851
TChain chain("h42")
const char * Data() const
Definition: TString.h:349
void Stop()
Stop the stopwatch.
Definition: TStopwatch.cxx:75
virtual Long64_t Draw(const char *varexp, const TCut &selection, Option_t *option="", Long64_t nentries=kMaxEntries, Long64_t firstentry=0)
Draw expression varexp for selected entries.
Definition: TChain.cxx:754
static TString Format(const char *fmt,...)
Static method which formats a string using a printf style format descriptor and return a TString...
Definition: TString.cxx:2321
virtual Int_t SetBranchAddress(const char *bname, void *add, TBranch **ptr=0)
Change branch address, dealing with clone trees properly.
Definition: TTree.cxx:7510
int main(int argc, char **argv)
IMethod * BookMVA(const TString &methodTag, const TString &weightfile)
read method name from weight file
Definition: Reader.cxx:376
A specialized string object used for TTree selections.
Definition: TCut.h:27
R__EXTERN TSystem * gSystem
Definition: TSystem.h:545
MethodBase * BookMethod(TString theMethodName, TString methodTitle, TString theOption="")
Book a classifier or regression method.
Definition: Factory.cxx:706
void EvaluateAllMethods(void)
iterates over all MVAs that have been booked, and calls their evaluation methods
Definition: Factory.cxx:1185
void TestAllMethods()
Definition: Factory.cxx:1085
char * Form(const char *fmt,...)
void Print(std::ostream &os, const OptionType &opt)
double Double_t
Definition: RtypesCore.h:55
void ApplicationCreateCombinedTree()
Double_t EvaluateMVA(const std::vector< Float_t > &, const TString &methodTag, Double_t aux=0)
Evaluate a std::vector<float> of input data for a given method The parameter aux is obligatory for th...
Definition: Reader.cxx:484
static Vc_ALWAYS_INLINE int_v max(const int_v &x, const int_v &y)
Definition: vector.h:440
#define name(a, b)
Definition: linkTestLib0.cpp:5
void AddSignalTree(TTree *signal, Double_t weight=1.0, Types::ETreeType treetype=Types::kMaxTreeType)
number of signal events (used to compute significance)
Definition: Factory.cxx:427
virtual Int_t Branch(TCollection *list, Int_t bufsize=32000, Int_t splitlevel=99, const char *name="")
Create one branch for each element in the collection.
Definition: TTree.cxx:1623
A chain is a collection of files containg TTree objects.
Definition: TChain.h:35
#define NULL
Definition: Rtypes.h:82
virtual Long64_t GetEntries() const
Definition: TTree.h:386
A TTree object has a header with a name and a title.
Definition: TTree.h:98
double result[121]
const Int_t n
Definition: legend1.C:16
void PrepareTrainingAndTestTree(const TCut &cut, const TString &splitOpt)
prepare the training and test trees -> same cuts for signal and background
Definition: Factory.cxx:679
virtual Int_t Add(TChain *chain)
Add all files referenced by the passed chain to this chain.
Definition: TChain.cxx:210
Stopwatch class.
Definition: TStopwatch.h:30