Logo ROOT   6.14/05
Reference Guide
Classification.cxx
Go to the documentation of this file.
1 // @(#)root/tmva $Id$
2 // Author: Omar Zapata, Andreas Hoecker, Peter Speckmayer, Joerg Stelzer, Helge Voss, Kai Voss, Eckhard von Toerne, Jan
3 // Therhaag
4 
5 #include <TMVA/Classification.h>
6 
8 #include <TMVA/Config.h>
9 #include <TMVA/Configurable.h>
10 #include <TMVA/Tools.h>
11 #include <TMVA/Ranking.h>
12 #include <TMVA/DataSet.h>
13 #include <TMVA/IMethod.h>
14 #include <TMVA/MethodBase.h>
15 #include <TMVA/DataInputHandler.h>
16 #include <TMVA/DataSetManager.h>
17 #include <TMVA/DataSetInfo.h>
18 #include <TMVA/DataLoader.h>
19 #include <TMVA/MethodBoost.h>
20 #include <TMVA/MethodCategory.h>
21 #include <TMVA/ROCCalc.h>
22 #include <TMVA/ROCCurve.h>
23 #include <TMVA/MsgLogger.h>
24 
25 #include <TMVA/VariableInfo.h>
26 #include <TMVA/VariableTransform.h>
27 
28 #include <TMVA/Types.h>
29 
30 #include <TROOT.h>
31 #include <TFile.h>
32 #include <TTree.h>
33 #include <TLeaf.h>
34 #include <TEventList.h>
35 #include <TH2.h>
36 #include <TText.h>
37 #include <TLegend.h>
38 #include <TGraph.h>
39 #include <TStyle.h>
40 #include <TMatrixF.h>
41 #include <TMatrixDSym.h>
42 #include <TMultiGraph.h>
43 #include <TPaletteAxis.h>
44 #include <TPrincipal.h>
45 #include <TMath.h>
46 #include <TObjString.h>
47 #include <TSystem.h>
48 #include <TCanvas.h>
49 #include <iostream>
50 #include <memory>
51 #define MinNoTrainingEvents 10
52 
53 //_______________________________________________________________________
55 {
56 }
57 
58 //_______________________________________________________________________
60 {
61  fMethod = cr.fMethod;
63  fMvaTrain = cr.fMvaTrain;
64  fMvaTest = cr.fMvaTest;
65  fIsCuts = cr.fIsCuts;
67 }
68 
69 //_______________________________________________________________________
70 /**
71  * Method to get ROC-Integral value from mvas.
72  * \param iClass category, default 0 then signal
73  * \param type train/test tree, default test.
74  * \return Double_t with the ROC-Integral value.
75  */
77 {
78  if (fIsCuts) {
79  return fROCIntegral;
80  } else {
81  auto roc = GetROC(iClass, type);
82  auto inte = roc->GetROCIntegral();
83  delete roc;
84  return inte;
85  }
86 }
87 
88 //_______________________________________________________________________
89 /**
90  * Method to get TMVA::ROCCurve Object.
91  * \param iClass category, default 0 then signal
92  * \param type train/test tree, default test.
93  * \return TMVA::ROCCurve object.
94  */
96 {
97  ROCCurve *fROCCurve = nullptr;
98  if (type == TMVA::Types::kTesting)
99  fROCCurve = new ROCCurve(fMvaTest[iClass]);
100  else
101  fROCCurve = new ROCCurve(fMvaTrain[iClass]);
102  return fROCCurve;
103 }
104 
105 //_______________________________________________________________________
108 {
109  fMethod = cr.fMethod;
111  fMvaTrain = cr.fMvaTrain;
112  fMvaTest = cr.fMvaTest;
113  fIsCuts = cr.fIsCuts;
115  return *this;
116 }
117 
118 //_______________________________________________________________________
119 /**
120  * Method to print the results in stdout.
121  * data loader name, method name/tittle and ROC-integ.
122  */
124 {
125  MsgLogger fLogger("Classification");
128  TString hLine = "--------------------------------------------------- :";
129 
130  fLogger << kINFO << hLine << Endl;
131  fLogger << kINFO << "DataSet MVA :" << Endl;
132  fLogger << kINFO << "Name: Method/Title: ROC-integ :" << Endl;
133  fLogger << kINFO << hLine << Endl;
134  fLogger << kINFO << Form("%-20s %-15s %#1.3f :", fDataLoaderName.Data(),
135  Form("%s/%s", fMethod.GetValue<TString>("MethodName").Data(),
136  fMethod.GetValue<TString>("MethodTitle").Data()),
137  GetROCIntegral())
138  << Endl;
139  fLogger << kINFO << hLine << Endl;
140 
142 }
143 
144 //_______________________________________________________________________
145 /**
146  * Method to get TGraph object with the ROC curve.
147  * \param iClass category, default 0 then signal
148  * \param type train/test tree, default test.
149  * \return TGraph object.
150  */
152 {
153  TGraph *roc = GetROC(iClass, type)->GetROCCurve();
154  roc->SetName(Form("%s/%s", GetMethodName().Data(), GetMethodTitle().Data()));
155  roc->SetTitle(Form("%s/%s", GetMethodName().Data(), GetMethodTitle().Data()));
156  roc->GetXaxis()->SetTitle(" Signal Efficiency ");
157  roc->GetYaxis()->SetTitle(" Background Rejection ");
158  return roc;
159 }
160 
161 //_______________________________________________________________________
162 /**
163  * Method to check if method was booked.
164  * \param methodname name of the method.
165  * \param methodtitle method title.
166  * \return boolean true if the method was booked, false in other case.
167  */
169 {
170  return fMethod.GetValue<TString>("MethodName") == methodname &&
171  fMethod.GetValue<TString>("MethodTitle") == methodtitle
172  ? kTRUE
173  : kFALSE;
174 }
175 
176 //_______________________________________________________________________
177 /**
178  * Contructor to create a two class classifier.
179  * \param dataloader TMVA::DataLoader object with the data to train/test.
180  * \param file TFile object to save the results
181  * \param options string extra options.
182  */
184  : TMVA::Envelope("Classification", dataloader, file, options), fAnalysisType(Types::kClassification),
185  fCorrelations(kFALSE), fROC(kTRUE)
186 {
187  DeclareOptionRef(fCorrelations, "Correlations", "boolean to show correlation in output");
188  DeclareOptionRef(fROC, "ROC", "boolean to show ROC in output");
189  ParseOptions();
191 
192  if (fModelPersistence)
193  gSystem->MakeDirectory(fDataLoader->GetName()); // creating directory for DataLoader output
194 }
195 
196 //_______________________________________________________________________
197 /**
198  * Contructor to create a two class classifier without output file.
199  * \param dataloader TMVA::DataLoader object with the data to train/test.
200  * \param options string extra options.
201  */
203  : TMVA::Envelope("Classification", dataloader, NULL, options), fAnalysisType(Types::kClassification),
205 {
206 
207  // init configurable
208  SetConfigDescription("Configuration options for Classification running");
210 
211  DeclareOptionRef(fCorrelations, "Correlations", "boolean to show correlation in output");
212  DeclareOptionRef(fROC, "ROC", "boolean to show ROC in output");
213  ParseOptions();
215  if (fModelPersistence)
216  gSystem->MakeDirectory(fDataLoader->GetName()); // creating directory for DataLoader output
218 }
219 
220 //_______________________________________________________________________
222 {
223  for (auto m : fIMethods) {
224  if (m != NULL)
225  delete m;
226  }
227 }
228 
229 //_______________________________________________________________________
230 /**
231  * return the options for the booked method.
232  * \param methodname name of the method.
233  * \param methodtitle method title.
234  * \return string the with options for the ml method.
235  */
237 {
238  for (auto &meth : fMethods) {
239  if (meth.GetValue<TString>("MethodName") == methodname && meth.GetValue<TString>("MethodTitle") == methodtitle)
240  return meth.GetValue<TString>("MethodOptions");
241  }
242  return "";
243 }
244 
245 //_______________________________________________________________________
246 /**
247  * Method to perform Train/Test over all ml method booked.
248  * If the option Jobs > 1 can do it in parallel with MultiProc.
249  */
251 {
252  fTimer.Reset();
253  fTimer.Start();
254 
255  Bool_t roc = fROC;
256  fROC = kFALSE;
257  if (fJobs <= 1) {
258  Train();
259  Test();
260  } else {
261  for (auto &meth : fMethods) {
262  GetMethod(meth.GetValue<TString>("MethodName"), meth.GetValue<TString>("MethodTitle"));
263  }
265  auto executor = [=](UInt_t workerID) -> ClassificationResult {
270  auto methodname = fMethods[workerID].GetValue<TString>("MethodName");
271  auto methodtitle = fMethods[workerID].GetValue<TString>("MethodTitle");
272  auto meth = GetMethod(methodname, methodtitle);
273  if (!IsSilentFile()) {
274  auto fname = Form(".%s%s%s.root", fDataLoader->GetName(), methodname.Data(), methodtitle.Data());
275  auto f = new TFile(fname, "RECREATE");
276  f->mkdir(fDataLoader->GetName());
277  SetFile(f);
278  meth->SetFile(f);
279  }
280  TrainMethod(methodname, methodtitle);
281  TestMethod(methodname, methodtitle);
282  if (!IsSilentFile()) {
283  GetFile()->Close();
284  }
285  return GetResults(methodname, methodtitle);
286  };
287 
288  fResults = fWorkers.Map(executor, ROOT::TSeqI(fMethods.size()));
289  if (!IsSilentFile())
290  MergeFiles();
291  }
292 
293  fROC = roc;
295 
296  TString hLine = "--------------------------------------------------- :";
297  Log() << kINFO << hLine << Endl;
298  Log() << kINFO << "DataSet MVA :" << Endl;
299  Log() << kINFO << "Name: Method/Title: ROC-integ :" << Endl;
300  Log() << kINFO << hLine << Endl;
301  for (auto &r : fResults) {
302 
303  Log() << kINFO << Form("%-20s %-15s %#1.3f :", r.GetDataLoaderName().Data(),
304  Form("%s/%s", r.GetMethodName().Data(), r.GetMethodTitle().Data()), r.GetROCIntegral())
305  << Endl;
306  }
307  Log() << kINFO << hLine << Endl;
308 
309  Log() << kINFO << "-----------------------------------------------------" << Endl;
310  Log() << kHEADER << "Evaluation done." << Endl << Endl;
311  Log() << kINFO << Form("Jobs = %d Real Time = %lf ", fJobs, fTimer.RealTime()) << Endl;
312  Log() << kINFO << "-----------------------------------------------------" << Endl;
313  Log() << kINFO << "Evaluation done." << Endl;
315 }
316 
317 //_______________________________________________________________________
318 /**
319  * Method to train all booked ml methods.
320  */
322 {
323  for (auto &meth : fMethods) {
324  TrainMethod(meth.GetValue<TString>("MethodName"), meth.GetValue<TString>("MethodTitle"));
325  }
326 }
327 
328 //_______________________________________________________________________
329 /**
330  * Lets train an specific ml method.
331  * \param methodname name of the method.
332  * \param methodtitle method title.
333  */
335 {
336  auto method = GetMethod(methodname, methodtitle);
337  if (!method) {
338  Log() << kFATAL
339  << Form("Trying to train method %s %s that maybe is not booked.", methodname.Data(), methodtitle.Data())
340  << Endl;
341  }
342  Log() << kHEADER << gTools().Color("bold") << Form("Training method %s %s", methodname.Data(), methodtitle.Data())
343  << gTools().Color("reset") << Endl;
344 
347  method->DataInfo().GetNClasses() < 2)
348  Log() << kFATAL << "You want to do classification training, but specified less than two classes." << Endl;
349 
350  // first print some information about the default dataset
351  // if(!IsSilentFile()) WriteDataInformation(method->fDataSetInfo);
352 
353  if (method->Data()->GetNTrainingEvents() < MinNoTrainingEvents) {
354  Log() << kWARNING << "Method " << method->GetMethodName() << " not trained (training tree has less entries ["
355  << method->Data()->GetNTrainingEvents() << "] than required [" << MinNoTrainingEvents << "]" << Endl;
356  return;
357  }
358 
359  Log() << kHEADER << "Train method: " << method->GetMethodName() << " for Classification" << Endl << Endl;
360  method->TrainMethod();
361  Log() << kHEADER << "Training finished" << Endl << Endl;
362 }
363 
364 //_______________________________________________________________________
365 /**
366  * Lets train an specific ml method given the method type in enum TMVA::Types::EMVA
367  * \param method TMVA::Types::EMVA type.
368  * \param methodtitle method title.
369  */
371 {
372  TrainMethod(Types::Instance().GetMethodName(method), methodtitle);
373 }
374 
375 //_______________________________________________________________________
376 /**
377  * Return a TMVA::MethodBase object. if method is not booked then return a null
378  * pointer.
379  * \param methodname name of the method.
380  * \param methodtitle method title.
381  * \return TMVA::MethodBase object
382  */
384 {
385 
386  if (!HasMethod(methodname, methodtitle)) {
387  std::cout << methodname << " " << methodtitle << std::endl;
388  Log() << kERROR << "Trying to get method not booked." << Endl;
389  return 0;
390  }
391  Int_t index = -1;
392  if (HasMethodObject(methodname, methodtitle, index)) {
393  return dynamic_cast<MethodBase *>(fIMethods[index]);
394  }
395  // if is not created then lets to create it.
397  1) { // 0 entries --> 0 events, 1 entry --> dynamical dataset (or one entry)
398  Log() << kFATAL << "No input data for the training provided!" << Endl;
399  }
400  Log() << kHEADER << "Loading booked method: " << gTools().Color("bold") << methodname << " " << methodtitle
401  << gTools().Color("reset") << Endl << Endl;
402 
403  TString moptions = GetMethodOptions(methodname, methodtitle);
404 
405  // interpret option string with respect to a request for boosting (i.e., BostNum > 0)
406  Int_t boostNum = 0;
407  auto conf = new TMVA::Configurable(moptions);
408  conf->DeclareOptionRef(boostNum = 0, "Boost_num", "Number of times the classifier will be boosted");
409  conf->ParseOptions();
410  delete conf;
411 
412  TString fFileDir;
413  if (fModelPersistence) {
414  fFileDir = fDataLoader->GetName();
415  fFileDir += "/" + gConfig().GetIONames().fWeightFileDir;
416  }
417 
418  // initialize methods
419  IMethod *im;
420  TString fJobName = GetName();
421  if (!boostNum) {
422  im = ClassifierFactory::Instance().Create(std::string(methodname.Data()), fJobName, methodtitle,
423  GetDataLoaderDataSetInfo(), moptions);
424  } else {
425  // boosted classifier, requires a specific definition, making it transparent for the user
426  Log() << kDEBUG << "Boost Number is " << boostNum << " > 0: train boosted classifier" << Endl;
427  im = ClassifierFactory::Instance().Create(std::string("Boost"), fJobName, methodtitle, GetDataLoaderDataSetInfo(),
428  moptions);
429  MethodBoost *methBoost = dynamic_cast<MethodBoost *>(im);
430  if (!methBoost)
431  Log() << kFATAL << "Method with type kBoost cannot be casted to MethodCategory. /Classification" << Endl;
432 
433  if (fModelPersistence)
434  methBoost->SetWeightFileDir(fFileDir);
436  methBoost->SetBoostedMethodName(methodname);
438  methBoost->SetFile(fFile.get());
439  methBoost->SetSilentFile(IsSilentFile());
440  }
441 
442  MethodBase *method = dynamic_cast<MethodBase *>(im);
443  if (method == 0)
444  return 0; // could not create method
445 
446  // set fDataSetManager if MethodCategory (to enable Category to create datasetinfo objects)
447  if (method->GetMethodType() == Types::kCategory) {
448  MethodCategory *methCat = (dynamic_cast<MethodCategory *>(im));
449  if (!methCat)
450  Log() << kFATAL << "Method with type kCategory cannot be casted to MethodCategory. /Classification" << Endl;
451 
452  if (fModelPersistence)
453  methCat->SetWeightFileDir(fFileDir);
456  methCat->SetFile(fFile.get());
457  methCat->SetSilentFile(IsSilentFile());
458  }
459 
460  if (!method->HasAnalysisType(fAnalysisType, GetDataLoaderDataSetInfo().GetNClasses(),
461  GetDataLoaderDataSetInfo().GetNTargets())) {
462  Log() << kWARNING << "Method " << method->GetMethodTypeName() << " is not capable of handling ";
463  Log() << "classification with " << GetDataLoaderDataSetInfo().GetNClasses() << " classes." << Endl;
464  return 0;
465  }
466 
467  if (fModelPersistence)
468  method->SetWeightFileDir(fFileDir);
471  method->SetupMethod();
472  method->ParseOptions();
473  method->ProcessSetup();
474  method->SetFile(fFile.get());
475  method->SetSilentFile(IsSilentFile());
476 
477  // check-for-unused-options is performed; may be overridden by derived classes
478  method->CheckSetup();
479  fIMethods.push_back(method);
480  return method;
481 }
482 
483 //_______________________________________________________________________
484 /**
485  * Allows to check if the TMVA::MethodBase was created and return the index in the vector.
486  * \param methodname name of the method.
487  * \param methodtitle method title.
488  * \param index refrence to Int_t with the position of the method into the vector fIMethods
489  * \return boolean true if the method was found.
490  */
492 {
493  if (fIMethods.empty())
494  return kFALSE;
495  for (UInt_t i = 0; i < fIMethods.size(); i++) {
496  // they put method title like method name in MethodBase and type is type name
497  auto methbase = dynamic_cast<MethodBase *>(fIMethods[i]);
498  if (methbase->GetMethodTypeName() == methodname && methbase->GetMethodName() == methodtitle) {
499  index = i;
500  return kTRUE;
501  }
502  }
503  return kFALSE;
504 }
505 
506 //_______________________________________________________________________
507 /**
508  * Perform test evaluation in all booked methods.
509  */
511 {
512  for (auto &meth : fMethods) {
513  TestMethod(meth.GetValue<TString>("MethodName"), meth.GetValue<TString>("MethodTitle"));
514  }
515 }
516 
517 //_______________________________________________________________________
518 /**
519  * Lets perform test an specific ml method.
520  * \param methodname name of the method.
521  * \param methodtitle method title.
522  */
524 {
525  auto method = GetMethod(methodname, methodtitle);
526  if (!method) {
527  Log() << kFATAL
528  << Form("Trying to train method %s %s that maybe is not booked.", methodname.Data(), methodtitle.Data())
529  << Endl;
530  }
531 
532  Log() << kHEADER << gTools().Color("bold") << "Test all methods" << gTools().Color("reset") << Endl;
534 
535  Types::EAnalysisType analysisType = method->GetAnalysisType();
536  Log() << kHEADER << "Test method: " << method->GetMethodName() << " for Classification"
537  << " performance" << Endl << Endl;
538  method->AddOutput(Types::kTesting, analysisType);
539 
540  // -----------------------------------------------------------------------
541  // First part of evaluation process
542  // --> compute efficiencies, and other separation estimators
543  // -----------------------------------------------------------------------
544 
545  // although equal, we now want to separate the output for the variables
546  // and the real methods
547  Int_t isel; // will be 0 for a Method; 1 for a Variable
548  Int_t nmeth_used[2] = {0, 0}; // 0 Method; 1 Variable
549 
550  std::vector<std::vector<TString>> mname(2);
551  std::vector<std::vector<Double_t>> sig(2), sep(2), roc(2);
552  std::vector<std::vector<Double_t>> eff01(2), eff10(2), eff30(2), effArea(2);
553  std::vector<std::vector<Double_t>> eff01err(2), eff10err(2), eff30err(2);
554  std::vector<std::vector<Double_t>> trainEff01(2), trainEff10(2), trainEff30(2);
555 
556  method->SetFile(fFile.get());
557  method->SetSilentFile(IsSilentFile());
558 
559  MethodBase *methodNoCuts = NULL;
560  if (!IsCutsMethod(method))
561  methodNoCuts = method;
562 
563  Log() << kHEADER << "Evaluate classifier: " << method->GetMethodName() << Endl << Endl;
564  isel = (method->GetMethodTypeName().Contains("Variable")) ? 1 : 0;
565 
566  // perform the evaluation
567  method->TestClassification();
568 
569  // evaluate the classifier
570  mname[isel].push_back(method->GetMethodName());
571  sig[isel].push_back(method->GetSignificance());
572  sep[isel].push_back(method->GetSeparation());
573  roc[isel].push_back(method->GetROCIntegral());
574 
575  Double_t err;
576  eff01[isel].push_back(method->GetEfficiency("Efficiency:0.01", Types::kTesting, err));
577  eff01err[isel].push_back(err);
578  eff10[isel].push_back(method->GetEfficiency("Efficiency:0.10", Types::kTesting, err));
579  eff10err[isel].push_back(err);
580  eff30[isel].push_back(method->GetEfficiency("Efficiency:0.30", Types::kTesting, err));
581  eff30err[isel].push_back(err);
582  effArea[isel].push_back(method->GetEfficiency("", Types::kTesting, err)); // computes the area (average)
583 
584  trainEff01[isel].push_back(method->GetTrainingEfficiency("Efficiency:0.01")); // the first pass takes longer
585  trainEff10[isel].push_back(method->GetTrainingEfficiency("Efficiency:0.10"));
586  trainEff30[isel].push_back(method->GetTrainingEfficiency("Efficiency:0.30"));
587 
588  nmeth_used[isel]++;
589 
590  if (!IsSilentFile()) {
591  Log() << kDEBUG << "\tWrite evaluation histograms to file" << Endl;
592  method->WriteEvaluationHistosToFile(Types::kTesting);
593  method->WriteEvaluationHistosToFile(Types::kTraining);
594  }
595 
596  // now sort the variables according to the best 'eff at Beff=0.10'
597  for (Int_t k = 0; k < 2; k++) {
598  std::vector<std::vector<Double_t>> vtemp;
599  vtemp.push_back(effArea[k]); // this is the vector that is ranked
600  vtemp.push_back(eff10[k]);
601  vtemp.push_back(eff01[k]);
602  vtemp.push_back(eff30[k]);
603  vtemp.push_back(eff10err[k]);
604  vtemp.push_back(eff01err[k]);
605  vtemp.push_back(eff30err[k]);
606  vtemp.push_back(trainEff10[k]);
607  vtemp.push_back(trainEff01[k]);
608  vtemp.push_back(trainEff30[k]);
609  vtemp.push_back(sig[k]);
610  vtemp.push_back(sep[k]);
611  vtemp.push_back(roc[k]);
612  std::vector<TString> vtemps = mname[k];
613  gTools().UsefulSortDescending(vtemp, &vtemps);
614  effArea[k] = vtemp[0];
615  eff10[k] = vtemp[1];
616  eff01[k] = vtemp[2];
617  eff30[k] = vtemp[3];
618  eff10err[k] = vtemp[4];
619  eff01err[k] = vtemp[5];
620  eff30err[k] = vtemp[6];
621  trainEff10[k] = vtemp[7];
622  trainEff01[k] = vtemp[8];
623  trainEff30[k] = vtemp[9];
624  sig[k] = vtemp[10];
625  sep[k] = vtemp[11];
626  roc[k] = vtemp[12];
627  mname[k] = vtemps;
628  }
629 
630  // -----------------------------------------------------------------------
631  // Second part of evaluation process
632  // --> compute correlations among MVAs
633  // --> compute correlations between input variables and MVA (determines importance)
634  // --> count overlaps
635  // -----------------------------------------------------------------------
636  if (fCorrelations) {
637  const Int_t nmeth = methodNoCuts == NULL ? 0 : 1;
638  const Int_t nvar = method->fDataSetInfo.GetNVariables();
639  if (nmeth > 0) {
640 
641  // needed for correlations
642  Double_t *dvec = new Double_t[nmeth + nvar];
643  std::vector<Double_t> rvec;
644 
645  // for correlations
646  TPrincipal *tpSig = new TPrincipal(nmeth + nvar, "");
647  TPrincipal *tpBkg = new TPrincipal(nmeth + nvar, "");
648 
649  // set required tree branch references
650  std::vector<TString> *theVars = new std::vector<TString>;
651  std::vector<ResultsClassification *> mvaRes;
652  theVars->push_back(methodNoCuts->GetTestvarName());
653  rvec.push_back(methodNoCuts->GetSignalReferenceCut());
654  theVars->back().ReplaceAll("MVA_", "");
655  mvaRes.push_back(dynamic_cast<ResultsClassification *>(
656  methodNoCuts->Data()->GetResults(methodNoCuts->GetMethodName(), Types::kTesting, Types::kMaxAnalysisType)));
657 
658  // for overlap study
659  TMatrixD *overlapS = new TMatrixD(nmeth, nmeth);
660  TMatrixD *overlapB = new TMatrixD(nmeth, nmeth);
661  (*overlapS) *= 0; // init...
662  (*overlapB) *= 0; // init...
663 
664  // loop over test tree
665  DataSet *defDs = method->fDataSetInfo.GetDataSet();
667  for (Int_t ievt = 0; ievt < defDs->GetNEvents(); ievt++) {
668  const Event *ev = defDs->GetEvent(ievt);
669 
670  // for correlations
671  TMatrixD *theMat = 0;
672  for (Int_t im = 0; im < nmeth; im++) {
673  // check for NaN value
674  Double_t retval = (Double_t)(*mvaRes[im])[ievt][0];
675  if (TMath::IsNaN(retval)) {
676  Log() << kWARNING << "Found NaN return value in event: " << ievt << " for method \""
677  << methodNoCuts->GetName() << "\"" << Endl;
678  dvec[im] = 0;
679  } else
680  dvec[im] = retval;
681  }
682  for (Int_t iv = 0; iv < nvar; iv++)
683  dvec[iv + nmeth] = (Double_t)ev->GetValue(iv);
684  if (method->fDataSetInfo.IsSignal(ev)) {
685  tpSig->AddRow(dvec);
686  theMat = overlapS;
687  } else {
688  tpBkg->AddRow(dvec);
689  theMat = overlapB;
690  }
691 
692  // count overlaps
693  for (Int_t im = 0; im < nmeth; im++) {
694  for (Int_t jm = im; jm < nmeth; jm++) {
695  if ((dvec[im] - rvec[im]) * (dvec[jm] - rvec[jm]) > 0) {
696  (*theMat)(im, jm)++;
697  if (im != jm)
698  (*theMat)(jm, im)++;
699  }
700  }
701  }
702  }
703 
704  // renormalise overlap matrix
705  (*overlapS) *= (1.0 / defDs->GetNEvtSigTest()); // init...
706  (*overlapB) *= (1.0 / defDs->GetNEvtBkgdTest()); // init...
707 
708  tpSig->MakePrincipals();
709  tpBkg->MakePrincipals();
710 
711  const TMatrixD *covMatS = tpSig->GetCovarianceMatrix();
712  const TMatrixD *covMatB = tpBkg->GetCovarianceMatrix();
713 
714  const TMatrixD *corrMatS = gTools().GetCorrelationMatrix(covMatS);
715  const TMatrixD *corrMatB = gTools().GetCorrelationMatrix(covMatB);
716 
717  // print correlation matrices
718  if (corrMatS != 0 && corrMatB != 0) {
719 
720  // extract MVA matrix
721  TMatrixD mvaMatS(nmeth, nmeth);
722  TMatrixD mvaMatB(nmeth, nmeth);
723  for (Int_t im = 0; im < nmeth; im++) {
724  for (Int_t jm = 0; jm < nmeth; jm++) {
725  mvaMatS(im, jm) = (*corrMatS)(im, jm);
726  mvaMatB(im, jm) = (*corrMatB)(im, jm);
727  }
728  }
729 
730  // extract variables - to MVA matrix
731  std::vector<TString> theInputVars;
732  TMatrixD varmvaMatS(nvar, nmeth);
733  TMatrixD varmvaMatB(nvar, nmeth);
734  for (Int_t iv = 0; iv < nvar; iv++) {
735  theInputVars.push_back(method->fDataSetInfo.GetVariableInfo(iv).GetLabel());
736  for (Int_t jm = 0; jm < nmeth; jm++) {
737  varmvaMatS(iv, jm) = (*corrMatS)(nmeth + iv, jm);
738  varmvaMatB(iv, jm) = (*corrMatB)(nmeth + iv, jm);
739  }
740  }
741 
742  if (nmeth > 1) {
743  Log() << kINFO << Endl;
744  Log() << kINFO << Form("Dataset[%s] : ", method->fDataSetInfo.GetName())
745  << "Inter-MVA correlation matrix (signal):" << Endl;
746  gTools().FormattedOutput(mvaMatS, *theVars, Log());
747  Log() << kINFO << Endl;
748 
749  Log() << kINFO << Form("Dataset[%s] : ", method->fDataSetInfo.GetName())
750  << "Inter-MVA correlation matrix (background):" << Endl;
751  gTools().FormattedOutput(mvaMatB, *theVars, Log());
752  Log() << kINFO << Endl;
753  }
754 
755  Log() << kINFO << Form("Dataset[%s] : ", method->fDataSetInfo.GetName())
756  << "Correlations between input variables and MVA response (signal):" << Endl;
757  gTools().FormattedOutput(varmvaMatS, theInputVars, *theVars, Log());
758  Log() << kINFO << Endl;
759 
760  Log() << kINFO << Form("Dataset[%s] : ", method->fDataSetInfo.GetName())
761  << "Correlations between input variables and MVA response (background):" << Endl;
762  gTools().FormattedOutput(varmvaMatB, theInputVars, *theVars, Log());
763  Log() << kINFO << Endl;
764  } else
765  Log() << kWARNING << Form("Dataset[%s] : ", method->fDataSetInfo.GetName())
766  << "<TestAllMethods> cannot compute correlation matrices" << Endl;
767 
768  // print overlap matrices
769  Log() << kINFO << Form("Dataset[%s] : ", method->fDataSetInfo.GetName())
770  << "The following \"overlap\" matrices contain the fraction of events for which " << Endl;
771  Log() << kINFO << Form("Dataset[%s] : ", method->fDataSetInfo.GetName())
772  << "the MVAs 'i' and 'j' have returned conform answers about \"signal-likeness\"" << Endl;
773  Log() << kINFO << Form("Dataset[%s] : ", method->fDataSetInfo.GetName())
774  << "An event is signal-like, if its MVA output exceeds the following value:" << Endl;
775  gTools().FormattedOutput(rvec, *theVars, "Method", "Cut value", Log());
776  Log() << kINFO << Form("Dataset[%s] : ", method->fDataSetInfo.GetName())
777  << "which correspond to the working point: eff(signal) = 1 - eff(background)" << Endl;
778 
779  // give notice that cut method has been excluded from this test
780  if (nmeth != 1)
781  Log() << kINFO << Form("Dataset[%s] : ", method->fDataSetInfo.GetName())
782  << "Note: no correlations and overlap with cut method are provided at present" << Endl;
783 
784  if (nmeth > 1) {
785  Log() << kINFO << Endl;
786  Log() << kINFO << Form("Dataset[%s] : ", method->fDataSetInfo.GetName())
787  << "Inter-MVA overlap matrix (signal):" << Endl;
788  gTools().FormattedOutput(*overlapS, *theVars, Log());
789  Log() << kINFO << Endl;
790 
791  Log() << kINFO << Form("Dataset[%s] : ", method->fDataSetInfo.GetName())
792  << "Inter-MVA overlap matrix (background):" << Endl;
793  gTools().FormattedOutput(*overlapB, *theVars, Log());
794  }
795 
796  // cleanup
797  delete tpSig;
798  delete tpBkg;
799  delete corrMatS;
800  delete corrMatB;
801  delete theVars;
802  delete overlapS;
803  delete overlapB;
804  delete[] dvec;
805  }
806  }
807 
808  // -----------------------------------------------------------------------
809  // Third part of evaluation process
810  // --> output
811  // -----------------------------------------------------------------------
812  // putting results in the classification result object
813  auto &fResult = GetResults(methodname, methodtitle);
814 
815  // Binary classification
816  if (fROC) {
817  Log().EnableOutput();
819  Log() << Endl;
820  TString hLine = "------------------------------------------------------------------------------------------"
821  "-------------------------";
822  Log() << kINFO << "Evaluation results ranked by best signal efficiency and purity (area)" << Endl;
823  Log() << kINFO << hLine << Endl;
824  Log() << kINFO << "DataSet MVA " << Endl;
825  Log() << kINFO << "Name: Method: ROC-integ" << Endl;
826 
827  Log() << kDEBUG << hLine << Endl;
828  for (Int_t k = 0; k < 2; k++) {
829  if (k == 1 && nmeth_used[k] > 0) {
830  Log() << kINFO << hLine << Endl;
831  Log() << kINFO << "Input Variables: " << Endl << hLine << Endl;
832  }
833  for (Int_t i = 0; i < nmeth_used[k]; i++) {
834  TString datasetName = fDataLoader->GetName();
835  TString methodName = mname[k][i];
836 
837  if (k == 1) {
838  methodName.ReplaceAll("Variable_", "");
839  }
840 
841  TMVA::DataSet *dataset = method->Data();
842  TMVA::Results *results = dataset->GetResults(methodName, Types::kTesting, this->fAnalysisType);
843  std::vector<Bool_t> *mvaResType = dynamic_cast<ResultsClassification *>(results)->GetValueVectorTypes();
844 
845  Double_t rocIntegral = 0.0;
846  if (mvaResType->size() != 0) {
847  rocIntegral = GetROCIntegral(methodname, methodtitle);
848  }
849 
850  if (sep[k][i] < 0 || sig[k][i] < 0) {
851  // cannot compute separation/significance -> no MVA (usually for Cuts)
852  fResult.fROCIntegral = effArea[k][i];
853  Log() << kINFO
854  << Form("%-13s %-15s: %#1.3f", fDataLoader->GetName(), methodName.Data(), fResult.fROCIntegral)
855  << Endl;
856  } else {
857  fResult.fROCIntegral = rocIntegral;
858  Log() << kINFO << Form("%-13s %-15s: %#1.3f", datasetName.Data(), methodName.Data(), rocIntegral)
859  << Endl;
860  }
861  }
862  }
863  Log() << kINFO << hLine << Endl;
864  Log() << kINFO << Endl;
865  Log() << kINFO << "Testing efficiency compared to training efficiency (overtraining check)" << Endl;
866  Log() << kINFO << hLine << Endl;
867  Log() << kINFO
868  << "DataSet MVA Signal efficiency: from test sample (from training sample) "
869  << Endl;
870  Log() << kINFO << "Name: Method: @B=0.01 @B=0.10 @B=0.30 "
871  << Endl;
872  Log() << kINFO << hLine << Endl;
873  for (Int_t k = 0; k < 2; k++) {
874  if (k == 1 && nmeth_used[k] > 0) {
875  Log() << kINFO << hLine << Endl;
876  Log() << kINFO << "Input Variables: " << Endl << hLine << Endl;
877  }
878  for (Int_t i = 0; i < nmeth_used[k]; i++) {
879  if (k == 1)
880  mname[k][i].ReplaceAll("Variable_", "");
881 
882  Log() << kINFO << Form("%-20s %-15s: %#1.3f (%#1.3f) %#1.3f (%#1.3f) %#1.3f (%#1.3f)",
883  method->fDataSetInfo.GetName(), (const char *)mname[k][i], eff01[k][i],
884  trainEff01[k][i], eff10[k][i], trainEff10[k][i], eff30[k][i], trainEff30[k][i])
885  << Endl;
886  }
887  }
888  Log() << kINFO << hLine << Endl;
889  Log() << kINFO << Endl;
890 
891  if (gTools().CheckForSilentOption(GetOptions()))
892  Log().InhibitOutput();
893  } else if (IsCutsMethod(method)) { // end fROC
894  for (Int_t k = 0; k < 2; k++) {
895  for (Int_t i = 0; i < nmeth_used[k]; i++) {
896 
897  if (sep[k][i] < 0 || sig[k][i] < 0) {
898  // cannot compute separation/significance -> no MVA (usually for Cuts)
899  fResult.fROCIntegral = effArea[k][i];
900  }
901  }
902  }
903  }
904 
905  TMVA::DataSet *dataset = method->Data();
907 
908  if (IsCutsMethod(method)) {
909  fResult.fIsCuts = kTRUE;
910  } else {
911  auto rocCurveTest = GetROC(methodname, methodtitle, 0, Types::kTesting);
912  fResult.fMvaTest[0] = rocCurveTest->GetMvas();
913  fResult.fROCIntegral = GetROCIntegral(methodname, methodtitle);
914  }
915  TString className = method->DataInfo().GetClassInfo(0)->GetName();
916  fResult.fClassNames.push_back(className);
917 
918  if (!IsSilentFile()) {
919  // write test/training trees
920  RootBaseDir()->cd(method->fDataSetInfo.GetName());
921  method->fDataSetInfo.GetDataSet()->GetTree(Types::kTesting)->Write("", TObject::kOverwrite);
922  method->fDataSetInfo.GetDataSet()->GetTree(Types::kTraining)->Write("", TObject::kOverwrite);
923  }
924 }
925 
926 //_______________________________________________________________________
927 /**
928  * Lets perform test an specific ml method given the method type in enum TMVA::Types::EMVA.
929  * \param method TMVA::Types::EMVA type.
930  * \param methodtitle method title.
931  */
933 {
934  TestMethod(Types::Instance().GetMethodName(method), methodtitle);
935 }
936 
937 //_______________________________________________________________________
938 /**
939  * return the the vector of TMVA::Experimental::ClassificationResult objects.
940  * \return vector of results.
941  */
942 std::vector<TMVA::Experimental::ClassificationResult> &TMVA::Experimental::Classification::GetResults()
943 {
944  if (fResults.size() == 0)
945  Log() << kFATAL << "No Classification results available" << Endl;
946  return fResults;
947 }
948 
949 //_______________________________________________________________________
950 /**
951  * Allows to check if the ml method is a Cuts method.
952  * \return boolen true if the method is a Cuts method.
953  */
955 {
956  return method->GetMethodType() == Types::kCuts ? kTRUE : kFALSE;
957 }
958 
959 //_______________________________________________________________________
960 /**
961  * Allow to get result for an specific ml method.
962  * \param methodname name of the method.
963  * \param methodtitle method title.
964  * \return TMVA::Experimental::ClassificationResult object for the method.
965  */
968 {
969  for (auto &result : fResults) {
970  if (result.IsMethod(methodname, methodtitle))
971  return result;
972  }
973  ClassificationResult result;
974  result.fMethod["MethodName"] = methodname;
975  result.fMethod["MethodTitle"] = methodtitle;
976  result.fDataLoaderName = fDataLoader->GetName();
977  fResults.push_back(result);
978  return fResults.back();
979 }
980 
981 //_______________________________________________________________________
982 /**
983  * Method to get TMVA::ROCCurve Object.
984  * \param method TMVA::MethodBase object
985  * \param iClass category, default 0 then signal
986  * \param type train/test tree, default test.
987  * \return TMVA::ROCCurve object.
988  */
991 {
992  TMVA::DataSet *dataset = method->Data();
993  dataset->SetCurrentType(type);
994  TMVA::Results *results = dataset->GetResults(method->GetName(), type, this->fAnalysisType);
995 
996  UInt_t nClasses = method->DataInfo().GetNClasses();
997  if (this->fAnalysisType == Types::kMulticlass && iClass >= nClasses) {
998  Log() << kERROR << Form("Given class number (iClass = %i) does not exist. There are %i classes in dataset.",
999  iClass, nClasses)
1000  << Endl;
1001  return nullptr;
1002  }
1003 
1004  TMVA::ROCCurve *rocCurve = nullptr;
1005  if (this->fAnalysisType == Types::kClassification) {
1006 
1007  std::vector<Float_t> *mvaRes = dynamic_cast<ResultsClassification *>(results)->GetValueVector();
1008  std::vector<Bool_t> *mvaResTypes = dynamic_cast<ResultsClassification *>(results)->GetValueVectorTypes();
1009  std::vector<Float_t> mvaResWeights;
1010 
1011  auto eventCollection = dataset->GetEventCollection(type);
1012  mvaResWeights.reserve(eventCollection.size());
1013  for (auto ev : eventCollection) {
1014  mvaResWeights.push_back(ev->GetWeight());
1015  }
1016 
1017  rocCurve = new TMVA::ROCCurve(*mvaRes, *mvaResTypes, mvaResWeights);
1018 
1019  } else if (this->fAnalysisType == Types::kMulticlass) {
1020  std::vector<Float_t> mvaRes;
1021  std::vector<Bool_t> mvaResTypes;
1022  std::vector<Float_t> mvaResWeights;
1023 
1024  std::vector<std::vector<Float_t>> *rawMvaRes = dynamic_cast<ResultsMulticlass *>(results)->GetValueVector();
1025 
1026  // Vector transpose due to values being stored as
1027  // [ [0, 1, 2], [0, 1, 2], ... ]
1028  // in ResultsMulticlass::GetValueVector.
1029  mvaRes.reserve(rawMvaRes->size());
1030  for (auto item : *rawMvaRes) {
1031  mvaRes.push_back(item[iClass]);
1032  }
1033 
1034  auto eventCollection = dataset->GetEventCollection(type);
1035  mvaResTypes.reserve(eventCollection.size());
1036  mvaResWeights.reserve(eventCollection.size());
1037  for (auto ev : eventCollection) {
1038  mvaResTypes.push_back(ev->GetClass() == iClass);
1039  mvaResWeights.push_back(ev->GetWeight());
1040  }
1041 
1042  rocCurve = new TMVA::ROCCurve(mvaRes, mvaResTypes, mvaResWeights);
1043  }
1044 
1045  return rocCurve;
1046 }
1047 
1048 //_______________________________________________________________________
1049 /**
1050  * Method to get TMVA::ROCCurve Object.
1051  * \param methodname ml method name.
1052  * \param methodtitle ml method title.
1053  * \param iClass category, default 0 then signal
1054  * \param type train/test tree, default test.
1055  * \return TMVA::ROCCurve object.
1056  */
1059 {
1060  return GetROC(GetMethod(methodname, methodtitle), iClass, type);
1061 }
1062 
1063 //_______________________________________________________________________
1064 /**
1065  * Method to get ROC-Integral value from mvas.
1066  * \param methodname ml method name.
1067  * \param methodtitle ml method title.
1068  * \param iClass category, default 0 then signal
1069  * \return Double_t with the ROC-Integral value.
1070  */
1072 {
1073  TMVA::ROCCurve *rocCurve = GetROC(methodname, methodtitle, iClass);
1074  if (!rocCurve) {
1075  Log() << kFATAL
1076  << Form("ROCCurve object was not created in MethodName = %s MethodTitle = %s not found with Dataset = %s ",
1077  methodname.Data(), methodtitle.Data(), fDataLoader->GetName())
1078  << Endl;
1079  return 0;
1080  }
1081 
1083  Double_t rocIntegral = rocCurve->GetROCIntegral(npoints);
1084  delete rocCurve;
1085 
1086  return rocIntegral;
1087 }
1088 
1089 //_______________________________________________________________________
1091 {
1092  TFile *savdir = file;
1093  TDirectory *adir = savdir;
1094  adir->cd();
1095  // loop on all entries of this directory
1096  TKey *key;
1097  TIter nextkey(src->GetListOfKeys());
1098  while ((key = (TKey *)nextkey())) {
1099  const Char_t *classname = key->GetClassName();
1100  TClass *cl = gROOT->GetClass(classname);
1101  if (!cl)
1102  continue;
1103  if (cl->InheritsFrom(TDirectory::Class())) {
1104  src->cd(key->GetName());
1105  TDirectory *subdir = file;
1106  adir->cd();
1107  CopyFrom(subdir, file);
1108  adir->cd();
1109  } else if (cl->InheritsFrom(TTree::Class())) {
1110  TTree *T = (TTree *)src->Get(key->GetName());
1111  adir->cd();
1112  TTree *newT = T->CloneTree(-1, "fast");
1113  newT->Write();
1114  } else {
1115  src->cd();
1116  TObject *obj = key->ReadObj();
1117  adir->cd();
1118  obj->Write();
1119  delete obj;
1120  }
1121  }
1122  adir->SaveSelf(kTRUE);
1123  savdir->cd();
1124 }
1125 
1126 //_______________________________________________________________________
1128 {
1129 
1130  auto dsdir = fFile->mkdir(fDataLoader->GetName()); // dataset dir
1131  TTree *TrainTree = 0;
1132  TTree *TestTree = 0;
1133  TFile *ifile = 0;
1134  TFile *ofile = 0;
1135  for (UInt_t i = 0; i < fMethods.size(); i++) {
1136  auto methodname = fMethods[i].GetValue<TString>("MethodName");
1137  auto methodtitle = fMethods[i].GetValue<TString>("MethodTitle");
1138  auto fname = Form(".%s%s%s.root", fDataLoader->GetName(), methodname.Data(), methodtitle.Data());
1139  TDirectoryFile *ds = 0;
1140  if (i == 0) {
1141  ifile = new TFile(fname);
1142  ds = (TDirectoryFile *)ifile->Get(fDataLoader->GetName());
1143  } else {
1144  ofile = new TFile(fname);
1145  ds = (TDirectoryFile *)ofile->Get(fDataLoader->GetName());
1146  }
1147  auto tmptrain = (TTree *)ds->Get("TrainTree");
1148  auto tmptest = (TTree *)ds->Get("TestTree");
1149  fFile->cd();
1150  fFile->cd(fDataLoader->GetName());
1151 
1152  auto methdirname = Form("Method_%s", methodtitle.Data());
1153  auto methdir = dsdir->mkdir(methdirname, methdirname);
1154  auto methdirbase = methdir->mkdir(methodtitle.Data(), methodtitle.Data());
1155  auto mfdir = (TDirectoryFile *)ds->Get(methdirname);
1156  auto mfdirbase = (TDirectoryFile *)mfdir->Get(methodtitle.Data());
1157 
1158  CopyFrom(mfdirbase, (TFile *)methdirbase);
1159  dsdir->cd();
1160  if (i == 0) {
1161  TrainTree = tmptrain->CopyTree("");
1162  TestTree = tmptest->CopyTree("");
1163  } else {
1164  Float_t mva = 0;
1165  auto trainbranch = TrainTree->Branch(methodtitle.Data(), &mva);
1166  tmptrain->SetBranchAddress(methodtitle.Data(), &mva);
1167  auto entries = tmptrain->GetEntries();
1168  for (UInt_t ev = 0; ev < entries; ev++) {
1169  tmptrain->GetEntry(ev);
1170  trainbranch->Fill();
1171  }
1172  auto testbranch = TestTree->Branch(methodtitle.Data(), &mva);
1173  tmptest->SetBranchAddress(methodtitle.Data(), &mva);
1174  entries = tmptest->GetEntries();
1175  for (UInt_t ev = 0; ev < entries; ev++) {
1176  tmptest->GetEntry(ev);
1177  testbranch->Fill();
1178  }
1179  ofile->Close();
1180  }
1181  }
1182  TrainTree->Write();
1183  TestTree->Write();
1184  ifile->Close();
1185  // cleaning
1186  for (UInt_t i = 0; i < fMethods.size(); i++) {
1187  auto methodname = fMethods[i].GetValue<TString>("MethodName");
1188  auto methodtitle = fMethods[i].GetValue<TString>("MethodTitle");
1189  auto fname = Form(".%s%s%s.root", fDataLoader->GetName(), methodname.Data(), methodtitle.Data());
1190  gSystem->Unlink(fname);
1191  }
1192 }
std::map< UInt_t, std::vector< std::tuple< Float_t, Float_t, Bool_t > > > fMvaTest
IMethod * Create(const std::string &name, const TString &job, const TString &title, DataSetInfo &dsi, const TString &option)
creates the method if needed based on the method name using the creator function the factory has stor...
static ClassifierFactory & Instance()
access to the ClassifierFactory singleton creates the instance if needed
virtual Bool_t cd(const char *path=0)
Change current directory to "this" directory.
void SetModelPersistence(Bool_t status)
Definition: MethodBase.h:373
virtual const char * GetName() const
Returns name of object.
Definition: TNamed.h:47
virtual Int_t Write(const char *name=0, Int_t option=0, Int_t bufsize=0)
Write this object to the current directory.
Definition: TObject.cxx:785
Bool_t IsCutsMethod(TMVA::MethodBase *method)
Allows to check if the ml method is a Cuts method.
UInt_t GetNVariables() const
Definition: DataSetInfo.h:110
#define MinNoTrainingEvents
TString GetMethodOptions(TString methodname, TString methodtitle)
return the options for the booked method.
Principal Components Analysis (PCA)
Definition: TPrincipal.h:20
void UsefulSortDescending(std::vector< std::vector< Double_t > > &, std::vector< TString > *vs=0)
sort 2D vector (AND in parallel a TString vector) in such a way that the "first vector is sorted" and...
Definition: Tools.cxx:576
void SetNWorkers(unsigned n)
virtual TList * GetListOfKeys() const
Definition: TDirectory.h:150
Types::EAnalysisType fAnalysisType
vector of objects with booked methods
Double_t RealTime()
Stop the stopwatch (if it is running) and return the realtime (in seconds) passed between the start a...
Definition: TStopwatch.cxx:110
MsgLogger & Endl(MsgLogger &ml)
Definition: MsgLogger.h:158
void Show()
Method to print the results in stdout.
DataSetInfo & GetDataLoaderDataSetInfo()
Utility method to get TMVA::DataSetInfo reference from the DataLoader.
Definition: Envelope.h:102
Singleton class for Global types used by TMVA.
Definition: Types.h:73
void Start(Bool_t reset=kTRUE)
Start the stopwatch.
Definition: TStopwatch.cxx:58
auto * m
Definition: textangle.C:8
virtual void Test()
Perform test evaluation in all booked methods.
virtual TObject * Get(const char *namecycle)
Return pointer to object identified by namecycle.
Definition: TDirectory.cxx:805
Double_t GetROCIntegral(const UInt_t points=41)
Calculates the ROC integral (AUC)
Definition: ROCCurve.cxx:251
float Float_t
Definition: RtypesCore.h:53
void SetFile(TFile *file)
Method to set the pointer to TFile object, with a writable file.
Definition: Envelope.cxx:91
T GetValue(const TString &key)
Definition: OptionMap.h:145
double T(double x)
Definition: ChebyshevPol.h:34
TString & ReplaceAll(const TString &s1, const TString &s2)
Definition: TString.h:687
static Types & Instance()
the the single instance of "Types" if existing already, or create it (Singleton)
Definition: Types.cxx:70
virtual const char * GetClassName() const
Definition: TKey.h:71
Bool_t HasMethodObject(TString methodname, TString methodtitle, Int_t &index)
Allows to check if the TMVA::MethodBase was created and return the index in the vector.
Config & gConfig()
MsgLogger & Log() const
Definition: Configurable.h:122
OptionBase * DeclareOptionRef(T &ref, const TString &name, const TString &desc="")
Double_t GetROCIntegral(UInt_t iClass=0, TMVA::Types::ETreeType type=TMVA::Types::kTesting)
Method to get ROC-Integral value from mvas.
A ROOT file is a suite of consecutive data records (TKey instances) with a well defined format...
Definition: TFile.h:47
EAnalysisType
Definition: Types.h:127
virtual int MakeDirectory(const char *name)
Make a directory.
Definition: TSystem.cxx:825
TAxis * GetYaxis() const
Get y axis of the graph.
Definition: TGraph.cxx:1602
Virtual base Class for all MVA method.
Definition: MethodBase.h:109
virtual TObject * Get(const char *namecycle)
Return pointer to object identified by namecycle.
overwrite existing object with same name
Definition: TObject.h:88
MethodBase * GetMethod(TString methodname, TString methodtitle)
Return a TMVA::MethodBase object.
#define gROOT
Definition: TROOT.h:410
Basic string class.
Definition: TString.h:131
#define f(i)
Definition: RSha256.hxx:104
int Int_t
Definition: RtypesCore.h:41
bool Bool_t
Definition: RtypesCore.h:59
virtual void SetTitle(const char *title="")
Set graph title.
Definition: TGraph.cxx:2216
UInt_t GetNClasses() const
Definition: DataSetInfo.h:136
Bool_t IsNaN(Double_t x)
Definition: TMath.h:891
const std::vector< Event * > & GetEventCollection(Types::ETreeType type=Types::kMaxTreeType) const
Definition: DataSet.h:227
void SetSilentFile(Bool_t status)
Definition: MethodBase.h:369
virtual TTree * CloneTree(Long64_t nentries=-1, Option_t *option="")
Create a clone of this tree and copy nentries.
Definition: TTree.cxx:2971
virtual Bool_t HasAnalysisType(Types::EAnalysisType type, UInt_t numberClasses, UInt_t numberTargets)=0
std::shared_ptr< TFile > fFile
data
Definition: Envelope.h:48
virtual TTree * CopyTree(const char *selection, Option_t *option="", Long64_t nentries=kMaxEntries, Long64_t firstentry=0)
Copy a tree with selection.
Definition: TTree.cxx:3522
Bool_t IsSilentFile()
Method to see if a file is available to save results.
Definition: Envelope.cxx:76
std::map< UInt_t, std::vector< std::tuple< Float_t, Float_t, Bool_t > > > fMvaTrain
TDirectory * RootBaseDir()
Utility method to get base dir directory from current file.
Definition: Envelope.h:114
Bool_t HasMethod(TString methodname, TString methodtitle)
function to check methods booked
Definition: Envelope.cxx:225
ClassificationResult & operator=(const ClassificationResult &r)
static void InhibitOutput()
Definition: MsgLogger.cxx:74
static void SetIsTraining(Bool_t)
when this static function is called, it sets the flag whether events with negative event weight shoul...
Definition: Event.cxx:392
virtual int Unlink(const char *name)
Unlink, i.e. remove, a file.
Definition: TSystem.cxx:1357
DataSetInfo & fDataSetInfo
Definition: MethodBase.h:596
void Class()
Definition: Class.C:29
virtual void SetName(const char *name="")
Set graph name.
Definition: TGraph.cxx:2207
DataSet * Data() const
Definition: MethodBase.h:400
TString fWeightFileDir
Definition: Config.h:112
virtual void TrainMethod(TString methodname, TString methodtitle)
Lets train an specific ml method.
IONames & GetIONames()
Definition: Config.h:90
virtual void ParseOptions()
options parser
Abstract base class for all high level ml algorithms, you can book ml methods like BDT...
Definition: Envelope.h:43
void SetupMethod()
setup of methods
Definition: MethodBase.cxx:411
DataSetInfo & DataInfo() const
Definition: MethodBase.h:401
std::vector< IMethod * > fIMethods
void SetDrawProgressBar(Bool_t d)
Definition: Config.h:75
Book space in a file, create I/O buffers, to fill them, (un)compress them.
Definition: TKey.h:24
Classification(DataLoader *loader, TFile *file, TString options)
Contructor to create a two class classifier.
Class for boosting a TMVA method.
Definition: MethodBoost.h:58
TMatrixT< Double_t > TMatrixD
Definition: TMatrixDfwd.h:22
Class that contains all the data information.
Definition: DataSet.h:69
virtual void ParseOptions()
Method to parse the internal option string.
Definition: Envelope.cxx:187
virtual Int_t Write(const char *name=0, Int_t option=0, Int_t bufsize=0)
Write this object to the current directory.
Definition: TTree.cxx:9298
virtual void TestMethod(TString methodname, TString methodtitle)
Lets perform test an specific ml method.
Results * GetResults(const TString &, Types::ETreeType type, Types::EAnalysisType analysistype)
Definition: DataSet.cxx:265
std::string GetMethodName(TCppMethod_t)
Definition: Cppyy.cxx:733
ROOT::R::TRInterface & r
Definition: Object.C:4
R__EXTERN TSystem * gSystem
Definition: TSystem.h:540
virtual void AddRow(const Double_t *x)
Add a data point and update the covariance matrix.
Definition: TPrincipal.cxx:410
const char * GetName() const
Definition: MethodBase.h:325
Long64_t GetNEvtSigTest()
return number of signal test events in dataset
Definition: DataSet.cxx:427
A ROOT file is structured in Directories (like a file system).
Double_t GetROCIntegral(TString methodname, TString methodtitle, UInt_t iClass=0)
Method to get ROC-Integral value from mvas.
class TMVA::Config::VariablePlotting fVariablePlotting
unsigned int UInt_t
Definition: RtypesCore.h:42
char * Form(const char *fmt,...)
DataSetManager * fDataSetManager
const TMatrixD * GetCovarianceMatrix() const
Definition: TPrincipal.h:58
const TString & GetMethodName() const
Definition: MethodBase.h:322
The ROOT global object gROOT contains a list of all defined classes.
Definition: TClass.h:75
Tools & gTools()
Bool_t InheritsFrom(const char *cl) const
Return kTRUE if this class inherits from a class with name "classname".
Definition: TClass.cxx:4688
virtual void MakePrincipals()
Perform the principal components analysis.
Definition: TPrincipal.cxx:869
void SetBoostedMethodName(TString methodName)
Definition: MethodBoost.h:86
Long64_t GetNEvtBkgdTest()
return number of background test events in dataset
Definition: DataSet.cxx:435
TAxis * GetXaxis() const
Get x axis of the graph.
Definition: TGraph.cxx:1592
const Bool_t kFALSE
Definition: RtypesCore.h:88
Float_t GetValue(UInt_t ivar) const
return value of i&#39;th variable
Definition: Event.cxx:237
virtual void SaveSelf(Bool_t=kFALSE)
Definition: TDirectory.h:181
Class for categorizing the phase space.
TMVA::ROCCurve * GetROC(TMVA::MethodBase *method, UInt_t iClass=0, TMVA::Types::ETreeType type=TMVA::Types::kTesting)
Method to get TMVA::ROCCurve Object.
virtual void CheckSetup()
check may be overridden by derived class (sometimes, eg, fitters are used which can only be implement...
Definition: MethodBase.cxx:438
double Double_t
Definition: RtypesCore.h:55
A pseudo container class which is a generator of indices.
Definition: TSeq.hxx:66
Describe directory structure in memory.
Definition: TDirectory.h:34
Bool_t fModelPersistence
file to save the results
Definition: Envelope.h:49
const TMatrixD * GetCorrelationMatrix(const TMatrixD *covMat)
turns covariance into correlation matrix
Definition: Tools.cxx:336
int type
Definition: TGX11.cxx:120
std::shared_ptr< DataLoader > fDataLoader
Booked method information.
Definition: Envelope.h:47
void CopyFrom(TDirectory *src, TFile *file)
Class which takes the results of a multiclass classification.
void SetFile(TFile *file)
Definition: MethodBase.h:366
void SetCurrentType(Types::ETreeType type) const
Definition: DataSet.h:100
UInt_t GetEntries(const TString &name) const
void ProcessSetup()
process all options the "CheckForUnusedOptions" is done in an independent call, since it may be overr...
Definition: MethodBase.cxx:428
ostringstream derivative to redirect and format output
Definition: MsgLogger.h:59
const TString & GetOptions() const
Definition: Configurable.h:84
void FormattedOutput(const std::vector< Double_t > &, const std::vector< TString > &, const TString titleVars, const TString titleValues, MsgLogger &logger, TString format="%+1.3f")
formatted output of simple table
Definition: Tools.cxx:899
const TString & Color(const TString &)
human readable color strings
Definition: Tools.cxx:840
Mother of all ROOT objects.
Definition: TObject.h:37
UInt_t fJobs
procpool object
Definition: Envelope.h:54
void SetUseColor(Bool_t uc)
Definition: Config.h:66
void SetConfigName(const char *n)
Definition: Configurable.h:63
Interface for all concrete MVA method implementations.
Definition: IMethod.h:54
virtual Int_t Branch(TCollection *list, Int_t bufsize=32000, Int_t splitlevel=99, const char *name="")
Create one branch for each element in the collection.
Definition: TTree.cxx:1711
Bool_t IsMethod(TString methodname, TString methodtitle)
Method to check if method was booked.
char Char_t
Definition: RtypesCore.h:29
virtual TObject * ReadObj()
To read a TObject* from the file.
Definition: TKey.cxx:722
Abstract ClassifierFactory template that handles arbitrary types.
TGraph * GetROCCurve(const UInt_t points=100)
Returns a new TGraph containing the ROC curve.
Definition: ROCCurve.cxx:277
DataSetManager * fDataSetManager
Definition: MethodBoost.h:193
virtual Bool_t cd(const char *path=0)
Change current directory to "this" directory.
Definition: TDirectory.cxx:497
auto Map(F func, unsigned nTimes) -> std::vector< typename std::result_of< F()>::type >
Execute func (with no arguments) nTimes in parallel.
TString GetMethodTypeName() const
Definition: MethodBase.h:323
TFile * GetFile()
Method to get the pointer to TFile object.
Definition: Envelope.cxx:83
Definition: file.py:1
TGraph * GetROCGraph(UInt_t iClass=0, TMVA::Types::ETreeType type=TMVA::Types::kTesting)
Method to get TGraph object with the ROC curve.
Class that is the base-class for a vector of result.
Definition: Results.h:57
void SetSilent(Bool_t s)
Definition: Config.h:69
void SetWeightFileDir(TString fileDir)
set directory of weight file
Double_t GetSignalReferenceCut() const
Definition: MethodBase.h:351
DataSetManager * GetDataLoaderDataSetManager()
Utility method to get TMVA::DataSetManager pointer from the DataLoader.
Definition: Envelope.h:108
A Graph is a graphics object made of two arrays X and Y with npoints each.
Definition: TGraph.h:41
DataInputHandler & GetDataLoaderDataInput()
Utility method to get TMVA::DataInputHandler reference from the DataLoader.
Definition: Envelope.h:96
virtual void Train()
Method to train all booked ml methods.
Long64_t GetNEvents(Types::ETreeType type=Types::kMaxTreeType) const
Definition: DataSet.h:217
std::vector< ClassificationResult > & GetResults()
return the the vector of TMVA::Experimental::ClassificationResult objects.
void Reset()
Definition: TStopwatch.h:52
A TTree object has a header with a name and a title.
Definition: TTree.h:70
virtual void Evaluate()
Method to perform Train/Test over all ml method booked.
static void EnableOutput()
Definition: MsgLogger.cxx:75
const TString & GetTestvarName() const
Definition: MethodBase.h:326
virtual void SetTitle(const char *title="")
Set the title of the TNamed.
Definition: TNamed.cxx:164
TStopwatch fTimer
number of jobs to run some high level algorithm in parallel
Definition: Envelope.h:55
const Bool_t kTRUE
Definition: RtypesCore.h:87
std::vector< ClassificationResult > fResults
Types::EMVA GetMethodType() const
Definition: MethodBase.h:324
void CheckForUnusedOptions() const
checks for unused options in option string
const Event * GetEvent() const
Definition: DataSet.cxx:202
std::vector< OptionMap > fMethods
Definition: Envelope.h:46
TProcPool fWorkers
if true dont produce file output
Definition: Envelope.h:53
virtual void SetAnalysisType(Types::EAnalysisType type)
Definition: MethodBase.h:427
Class that is the base-class for a vector of result.
void SetConfigDescription(const char *d)
Definition: Configurable.h:64
ROCCurve * GetROC(UInt_t iClass=0, TMVA::Types::ETreeType type=TMVA::Types::kTesting)
Method to get TMVA::ROCCurve Object.
virtual void Close(Option_t *option="")
Close a file.
Definition: TFile.cxx:917
const char * Data() const
Definition: TString.h:364