Logo ROOT   6.10/09
Reference Guide
efficienciesMulticlass.cxx
Go to the documentation of this file.
1 // @(#)Root/tmva $Id$
2 // Author: Kim Albertsson
3 /**********************************************************************************
4  * Project: TMVA - a Root-integrated toolkit for multivariate data analysis *
5  * Package: TMVAGUI *
6  * Web : http://tmva.sourceforge.net *
7  * *
8  * Description: *
9  * Implementation (see header for description) *
10  * *
11  * Authors : *
12  * Kim Albertsson <kim.albertsson@cern.ch> - LTU & CERN *
13  * *
14  * Copyright (c) 2005-2017: *
15  * CERN, Switzerland *
16  * LTU, Sweden *
17  * *
18  * Redistribution and use in source and binary forms, with or without *
19  * modification, are permitted according to the terms listed in LICENSE *
20  * (http://tmva.sourceforge.net/LICENSE) *
21  **********************************************************************************/
22 
24 
25 #include "TFile.h"
26 #include "TGraph.h"
27 #include "TH2F.h"
28 #include "TIterator.h"
29 #include "TKey.h"
30 
31 ////////////////////////////////////////////////////////////////////////////////
32 ///
33 /// Note: This file assumes a certain structure on the input file. The structure
34 /// is as follows:
35 ///
36 /// - dataset (TDirectory)
37 /// - ... some variables, plots ...
38 /// - Method_XXX (TDirectory)
39 /// + XXX (TDirectory)
40 /// * ... some plots ...
41 /// * MVA_Method_XXX_Test_#classname#
42 /// * ... some plots ...
43 /// - Method_YYY (TDirectory)
44 /// + YYY (TDirectory)
45 /// * ... some plots ...
46 /// * MVA_Method_YYY_Test_#classname#
47 /// * ... some plots ...
48 /// - TestTree (TTree)
49 /// + ... data...
50 /// - TrainTree (TTree)
51 /// + ... data...
52 ///
53 /// Keeping this in mind makes the main loop in plotEfficienciesMulticlass easier
54 /// to follow :)
55 ///
56 
57 ////////////////////////////////////////////////////////////////////////////////
58 /// Wrapper for a canvas that also keeps track of color assignments for added
59 /// subgraphs.
60 
61 class EfficiencyPlotWrapper {
62 
63 public:
64  TCanvas *fCanvas;
65  TLegend *fLegend;
66 
67  TString fClassname;
68  Int_t fColor;
69 
70  UInt_t fNumMethods;
71 
72  EfficiencyPlotWrapper(TString classname);
73  Int_t addGraph(TGraph *graph);
74 
75  void addLegendEntry(TString methodTitle, TGraph *graph);
76 
77 private:
78  Float_t fx0L;
79  Float_t fdxL;
80  Float_t fy0H;
81  Float_t fdyH;
82 
83  TCanvas *newEfficiencyCanvas(TString className);
84  TLegend *newEfficiencyLegend();
85 };
86 
87 ////////////////////////////////////////////////////////////////////////////////
88 /// Constructs a new canvas + auxiliary data for showing an efficiency plot.
89 ///
90 
91 EfficiencyPlotWrapper::EfficiencyPlotWrapper(TString classname)
92 {
93  // Legend extents (init before calling newEfficiencyLegend...)
94  fx0L = 0.107;
95  fy0H = 0.899;
96  fdxL = 0.457 - fx0L;
97  fdyH = 0.22;
98  fx0L = 0.15;
99  fy0H = 1 - fy0H + fdyH + 0.07;
100 
101  fColor = 1;
102  fNumMethods = 0;
103 
104  fClassname = classname;
105  fCanvas = newEfficiencyCanvas(classname);
106  fLegend = newEfficiencyLegend();
107 }
108 
109 ////////////////////////////////////////////////////////////////////////////////
110 /// Adds a new graph to the plot. The added graph should contain a single ROC
111 /// curve.
112 ///
113 
114 Int_t EfficiencyPlotWrapper::addGraph(TGraph *graph)
115 {
116  graph->SetLineWidth(3);
117  graph->SetLineColor(fColor);
118  fColor++;
119  if (fColor == 5 || fColor == 10 || fColor == 11) {
120  fColor++;
121  }
122 
123  fCanvas->cd();
124  graph->Draw("");
125  fCanvas->Update();
126 
127  ++fNumMethods;
128 
129  return fColor;
130 }
131 
132 ////////////////////////////////////////////////////////////////////////////////
133 /// WARNING: Uses the current color, thus the correct call ordering is:
134 /// plotWrapper->addGraph(...);
135 /// plotWrapper->addLegendEntry(...);
136 ///
137 
138 void EfficiencyPlotWrapper::addLegendEntry(TString methodTitle, TGraph *graph)
139 {
140  fLegend->AddEntry(graph, methodTitle, "l");
141 
142  Float_t dyH_local = fdyH * (Float_t(TMath::Min((UInt_t)10, fNumMethods) - 3.0) / 4.0);
143  fLegend->SetY2(fy0H + dyH_local);
144 
145  fLegend->Paint();
146  fCanvas->Update();
147 }
148 
149 ////////////////////////////////////////////////////////////////////////////////
150 /// Helper to create new Canvas
151 
152 TCanvas *EfficiencyPlotWrapper::newEfficiencyCanvas(TString className)
153 {
154  TString canvas_name = Form("%s_%s", className.Data(), "canvas");
155  TString canvas_title = Form("ROC Curve %s", className.Data());
156  TCanvas *c = new TCanvas(canvas_name, canvas_title, 200, 0, 650, 500);
157  // global style settings
158  c->SetGrid();
159  c->SetTicks();
160 
161  // Frame
162  TString xtit = "Signal Efficiency";
163  TString ytit = "Background Rejection (1 - eff)";
164  TString ftit = Form("Background Rejection vs Signal Efficiency %s", className.Data());
165  Double_t x1 = 0.0;
166  Double_t x2 = 1.0;
167  Double_t y1 = 0.0;
168  Double_t y2 = 1.0;
169 
170  TH2F *frame = new TH2F(Form("%s_%s", className.Data(), "frame"), ftit, 500, x1, x2, 500, y1, y2);
171  frame->GetXaxis()->SetTitle(xtit);
172  frame->GetYaxis()->SetTitle(ytit);
173  TMVA::TMVAGlob::SetFrameStyle(frame, 1.0);
174  frame->DrawClone();
175 
176  return c;
177 }
178 
179 ////////////////////////////////////////////////////////////////////////////////
180 /// Helper to create new legend.
181 
182 TLegend *EfficiencyPlotWrapper::newEfficiencyLegend()
183 {
184  TLegend *legend = new TLegend(fx0L, fy0H - fdyH, fx0L + fdxL, fy0H);
185  // legend->SetTextSize( 0.05 );
186  legend->SetHeader("MVA Method:");
187  legend->SetMargin(0.4);
188  legend->Draw("");
189 
190  return legend;
191 }
192 
193 ////////////////////////////////////////////////////////////////////////////////
194 /// Entry point. Called from the TMVAMulticlassGui Buttons
195 ///
196 /// @param dataset Dataset to operate on. Should be created by the TMVA Multiclass Factory.
197 /// @param filename_input Name of the input file procuded by a TMVA Multiclass Factory.
198 /// @param plotType Specified what kind of ROC curve to draw. Currently only rejB vs. effS is supported.
199 
200 void TMVA::efficienciesMulticlass(TString dataset, TString filename_input, EEfficiencyPlotType plotType,
201  Bool_t useTMVAStyle)
202 {
203  // set style and remove existing canvas'
204  TMVAGlob::Initialize(useTMVAStyle);
205 
206  // checks if filename_input is already open, and if not opens one
207  TFile *file = TMVAGlob::OpenFile(filename_input);
208  if (file == nullptr) {
209  std::cout << "ERROR: filename \"" << filename_input << "\" is not found.";
210  return;
211  }
212 
213  plotEfficienciesMulticlass(plotType, file->GetDirectory(dataset.Data()));
214 
215  return;
216 }
217 
218 ////////////////////////////////////////////////////////////////////////////////
219 /// Work horse function. Will operate on the currently open file (opened by
220 /// efficienciesMulticlass).
221 ///
222 /// @param plotType See effcienciesMulticlass.
223 /// @param binDir Directory in the file on which to operate.
224 
225 void TMVA::plotEfficienciesMulticlass(EEfficiencyPlotType plotType, TDirectory *binDir)
226 {
227  // The current multiclass version implements only type 2 - rejB vs effS
228  if (plotType != EEfficiencyPlotType::kRejBvsEffS) {
229  std::cout << "Error: For multiclass, only rejB vs effS is currently implemented.";
230  }
231 
232  TString methodPrefix = "MVA_";
233  TString graphNameRef = "rejBvsS";
234  std::map<TString, EfficiencyPlotWrapper *> classCanvasMap;
235 
236  TList methods;
237  UInt_t nm = TMVAGlob::GetListOfMethods(methods, binDir);
238  if (nm == 0) {
239  cout << "ups .. no methods found in to plot ROC curve for ... give up" << endl;
240  return;
241  }
242  // TIter next(file->GetListOfKeys());
243  TIter next(&methods);
244 
245  // Loop over all method categories
246  TKey *key;
247  while ((key = (TKey *)next())) {
248  TDirectory *mDir = (TDirectory *)key->ReadObj();
249  TList titles;
250  TMVAGlob::GetListOfTitles(mDir, titles);
251 
252  // Loop over each method within a category
253  TIter nextTitle(&titles);
254  TKey *titkey;
255  TDirectory *titDir;
256  while ((titkey = TMVAGlob::NextKey(nextTitle, "TDirectory"))) {
257  titDir = (TDirectory *)titkey->ReadObj();
258  TString methodTitle;
259  TMVAGlob::GetMethodTitle(methodTitle, titDir);
260 
261  // Loop through all plots for the method
262  TIter nextKey(titDir->GetListOfKeys());
263  TKey *hkey2;
264  while ((hkey2 = TMVAGlob::NextKey(nextKey, "TGraph"))) {
265 
266  TGraph *h = (TGraph *)hkey2->ReadObj();
267  TString hname = h->GetName();
268  if (hname.Contains(graphNameRef) && hname.BeginsWith(methodPrefix) && not hname.Contains("Train")) {
269 
270  // Extract classname from plot name
271  UInt_t index = hname.Last('_');
272  TString classname = hname(index + 1, hname.Length() - (index + 1));
273 
274  EfficiencyPlotWrapper *plotWrapper;
275  // Creating the class map lazily, TMVAGlob::GetClassNames is
276  // bugged and reports more classes than there are. This method
277  // does not.
278  try {
279  plotWrapper = classCanvasMap.at(classname);
280  } catch (...) {
281  plotWrapper = new EfficiencyPlotWrapper(classname);
282  classCanvasMap.emplace(classname.Data(), plotWrapper);
283  }
284 
285  plotWrapper->addGraph(h);
286  plotWrapper->addLegendEntry(methodTitle, h);
287  }
288  }
289  }
290  }
291 }
virtual const char * GetName() const
Returns name of object.
Definition: TNamed.h:47
virtual void SetLineWidth(Width_t lwidth)
Set the line width.
Definition: TAttLine.h:43
virtual TList * GetListOfKeys() const
Definition: TDirectory.h:148
This class displays a legend box (TPaveText) containing several legend entries.
Definition: TLegend.h:23
virtual TObject * DrawClone(Option_t *option="") const
Draw a clone of this object in the current selected pad for instance with: gROOT->SetSelectedPad(gPad...
Definition: TObject.cxx:226
float Float_t
Definition: RtypesCore.h:53
TFile * OpenFile(const TString &fin)
Definition: tmvaglob.cxx:192
virtual void Draw(Option_t *option="")
Draw this legend with its current attributes.
Definition: TLegend.cxx:452
TH1 * h
Definition: legend2.C:5
A ROOT file is a suite of consecutive data records (TKey instances) with a well defined format...
Definition: TFile.h:46
void SetFrameStyle(TH1 *frame, Float_t scale=1.0)
Definition: tmvaglob.cxx:77
Basic string class.
Definition: TString.h:129
Short_t Min(Short_t a, Short_t b)
Definition: TMathBase.h:168
int Int_t
Definition: RtypesCore.h:41
bool Bool_t
Definition: RtypesCore.h:59
void SetMargin(Float_t margin)
Definition: TLegend.h:69
TKey * NextKey(TIter &keyIter, TString className)
Definition: tmvaglob.cxx:357
virtual void Draw(Option_t *chopt="")
Draw this graph with its current attributes.
Definition: TGraph.cxx:745
void efficienciesMulticlass(TString dataset, TString filename_input="TMVAMulticlass.root", EEfficiencyPlotType plotType=EEfficiencyPlotType::kRejBvsEffS, Bool_t useTMVAStyle=kTRUE)
TLegend * legend
Definition: pirndm.C:35
static const double x2[5]
void GetMethodTitle(TString &name, TKey *ikey)
Definition: tmvaglob.cxx:341
Book space in a file, create I/O buffers, to fill them, (un)compress them.
Definition: TKey.h:24
A doubly linked list.
Definition: TList.h:43
virtual void SetLineColor(Color_t lcolor)
Set the line color.
Definition: TAttLine.h:40
void Initialize(Bool_t useTMVAStyle=kTRUE)
Definition: tmvaglob.cxx:176
tomato 2-D histogram with a float per channel (see TH1 documentation)}
Definition: TH2.h:249
Bool_t BeginsWith(const char *s, ECaseCompare cmp=kExact) const
Definition: TString.h:563
unsigned int UInt_t
Definition: RtypesCore.h:42
char * Form(const char *fmt,...)
Ssiz_t Length() const
Definition: TString.h:388
TAxis * GetYaxis()
Definition: TH1.h:301
virtual TDirectory * GetDirectory(const char *apath, Bool_t printError=false, const char *funcname="GetDirectory")
Find a directory named "apath".
Definition: graph.py:1
The Canvas class.
Definition: TCanvas.h:31
static const double x1[5]
UInt_t GetListOfMethods(TList &methods, TDirectory *dir=0)
Definition: tmvaglob.cxx:582
double Double_t
Definition: RtypesCore.h:55
Ssiz_t Last(char c) const
Find last occurrence of a character c.
Definition: TString.cxx:875
Describe directory structure in memory.
Definition: TDirectory.h:34
Bool_t Contains(const char *pat, ECaseCompare cmp=kExact) const
Definition: TString.h:572
void plotEfficienciesMulticlass(EEfficiencyPlotType plotType=EEfficiencyPlotType::kRejBvsEffS, TDirectory *BinDir=0)
virtual TObject * ReadObj()
To read a TObject* from the file.
Definition: TKey.cxx:730
Definition: file.py:1
A Graph is a graphics object made of two arrays X and Y with npoints each.
Definition: TGraph.h:41
virtual void SetTitle(const char *title="")
Set the title of the TNamed.
Definition: TNamed.cxx:155
THist< 2, float, THistStatContent, THistStatUncertainty > TH2F
Definition: THist.hxx:317
TAxis * GetXaxis()
Definition: TH1.h:300
const char * Data() const
Definition: TString.h:347
UInt_t GetListOfTitles(TDirectory *rfdir, TList &titles)
Definition: tmvaglob.cxx:635
virtual void SetHeader(const char *header="", Option_t *option="")
Sets the header, which is the "title" that appears at the top of the legend.
Definition: TLegend.cxx:1126