Logo ROOT  
Reference Guide
MethodBase.cxx
Go to the documentation of this file.
1// @(#)root/tmva $Id$
2// Author: Andreas Hoecker, Peter Speckmayer, Joerg Stelzer, Helge Voss, Kai Voss, Eckhard von Toerne, Jan Therhaag
3
4/**********************************************************************************
5 * Project: TMVA - a Root-integrated toolkit for multivariate data analysis *
6 * Package: TMVA *
7 * Class : MethodBase *
8 * Web : http://tmva.sourceforge.net *
9 * *
10 * Description: *
11 * Implementation (see header for description) *
12 * *
13 * Authors (alphabetical): *
14 * Andreas Hoecker <Andreas.Hocker@cern.ch> - CERN, Switzerland *
15 * Joerg Stelzer <Joerg.Stelzer@cern.ch> - CERN, Switzerland *
16 * Peter Speckmayer <Peter.Speckmayer@cern.ch> - CERN, Switzerland *
17 * Helge Voss <Helge.Voss@cern.ch> - MPI-K Heidelberg, Germany *
18 * Kai Voss <Kai.Voss@cern.ch> - U. of Victoria, Canada *
19 * Jan Therhaag <Jan.Therhaag@cern.ch> - U of Bonn, Germany *
20 * Eckhard v. Toerne <evt@uni-bonn.de> - U of Bonn, Germany *
21 * *
22 * Copyright (c) 2005-2011: *
23 * CERN, Switzerland *
24 * U. of Victoria, Canada *
25 * MPI-K Heidelberg, Germany *
26 * U. of Bonn, Germany *
27 * *
28 * Redistribution and use in source and binary forms, with or without *
29 * modification, are permitted according to the terms listed in LICENSE *
30 * (http://tmva.sourceforge.net/LICENSE) *
31 * *
32 **********************************************************************************/
33
34/*! \class TMVA::MethodBase
35\ingroup TMVA
36
37 Virtual base Class for all MVA method
38
39 MethodBase hosts several specific evaluation methods.
40
41 The kind of MVA that provides optimal performance in an analysis strongly
42 depends on the particular application. The evaluation factory provides a
43 number of numerical benchmark results to directly assess the performance
44 of the MVA training on the independent test sample. These are:
45
46 - The _signal efficiency_ at three representative background efficiencies
47 (which is 1 &minus; rejection).
48 - The _significance_ of an MVA estimator, defined by the difference
49 between the MVA mean values for signal and background, divided by the
50 quadratic sum of their root mean squares.
51 - The _separation_ of an MVA _x_, defined by the integral
52 \f[
53 \frac{1}{2} \int \frac{(S(x) - B(x))^2}{(S(x) + B(x))} dx
54 \f]
55 where
56 \f$ S(x) \f$ and \f$ B(x) \f$ are the signal and background distributions,
57 respectively. The separation is zero for identical signal and background MVA
58 shapes, and it is one for disjunctive shapes.
59 - The average, \f$ \int x \mu (S(x)) dx \f$, of the signal \f$ \mu_{transform} \f$.
60 The \f$ \mu_{transform} \f$ of an MVA denotes the transformation that yields
61 a uniform background distribution. In this way, the signal distributions
62 \f$ S(x) \f$ can be directly compared among the various MVAs. The stronger
63 \f$ S(x) \f$ peaks towards one, the better is the discrimination of the MVA.
64 The \f$ \mu_{transform} \f$ is
65 [documented here](http://tel.ccsd.cnrs.fr/documents/archives0/00/00/29/91/index_fr.html).
66
67 The MVA standard output also prints the linear correlation coefficients between
68 signal and background, which can be useful to eliminate variables that exhibit too
69 strong correlations.
70*/
71
72#include "TMVA/MethodBase.h"
73
74#include "TMVA/Config.h"
75#include "TMVA/Configurable.h"
76#include "TMVA/DataSetInfo.h"
77#include "TMVA/DataSet.h"
78#include "TMVA/Factory.h"
79#include "TMVA/IMethod.h"
80#include "TMVA/MsgLogger.h"
81#include "TMVA/PDF.h"
82#include "TMVA/Ranking.h"
83#include "TMVA/Factory.h"
84#include "TMVA/DataLoader.h"
85#include "TMVA/Tools.h"
86#include "TMVA/Results.h"
90#include "TMVA/RootFinder.h"
91#include "TMVA/Timer.h"
92#include "TMVA/Tools.h"
93#include "TMVA/TSpline1.h"
94#include "TMVA/Types.h"
98#include "TMVA/VariableInfo.h"
102#include "TMVA/Version.h"
103
104#include "TROOT.h"
105#include "TSystem.h"
106#include "TObjString.h"
107#include "TQObject.h"
108#include "TSpline.h"
109#include "TMatrix.h"
110#include "TMath.h"
111#include "TH1F.h"
112#include "TH2F.h"
113#include "TFile.h"
114#include "TKey.h"
115#include "TGraph.h"
116#include "Riostream.h"
117#include "TXMLEngine.h"
118
119#include <iomanip>
120#include <iostream>
121#include <fstream>
122#include <sstream>
123#include <cstdlib>
124#include <algorithm>
125#include <limits>
126
127
129
130using std::endl;
131using std::atof;
132
133//const Int_t MethodBase_MaxIterations_ = 200;
135
136//const Int_t NBIN_HIST_PLOT = 100;
137const Int_t NBIN_HIST_HIGH = 10000;
138
139#ifdef _WIN32
140/* Disable warning C4355: 'this' : used in base member initializer list */
141#pragma warning ( disable : 4355 )
142#endif
143
144
145#include "TGraph.h"
146#include "TMultiGraph.h"
147
148////////////////////////////////////////////////////////////////////////////////
149/// standard constructor
150
152{
153 fNumGraphs = 0;
154 fIndex = 0;
155}
156
157////////////////////////////////////////////////////////////////////////////////
158/// standard destructor
160{
161 if (fMultiGraph){
162 delete fMultiGraph;
163 fMultiGraph = nullptr;
164 }
165 return;
166}
167
168////////////////////////////////////////////////////////////////////////////////
169/// This function gets some title and it creates a TGraph for every title.
170/// It also sets up the style for every TGraph. All graphs are added to a single TMultiGraph.
171///
172/// \param[in] graphTitles vector of titles
173
174void TMVA::IPythonInteractive::Init(std::vector<TString>& graphTitles)
175{
176 if (fNumGraphs!=0){
177 std::cerr << kERROR << "IPythonInteractive::Init: already initialized..." << std::endl;
178 return;
179 }
180 Int_t color = 2;
181 for(auto& title : graphTitles){
182 fGraphs.push_back( new TGraph() );
183 fGraphs.back()->SetTitle(title);
184 fGraphs.back()->SetName(title);
185 fGraphs.back()->SetFillColor(color);
186 fGraphs.back()->SetLineColor(color);
187 fGraphs.back()->SetMarkerColor(color);
188 fMultiGraph->Add(fGraphs.back());
189 color += 2;
190 fNumGraphs += 1;
191 }
192 return;
193}
194
195////////////////////////////////////////////////////////////////////////////////
196/// This function sets the point number to 0 for all graphs.
197
199{
200 for(Int_t i=0; i<fNumGraphs; i++){
201 fGraphs[i]->Set(0);
202 }
203}
204
205////////////////////////////////////////////////////////////////////////////////
206/// This function is used only in 2 TGraph case, and it will add new data points to graphs.
207///
208/// \param[in] x the x coordinate
209/// \param[in] y1 the y coordinate for the first TGraph
210/// \param[in] y2 the y coordinate for the second TGraph
211
213{
214 fGraphs[0]->Set(fIndex+1);
215 fGraphs[1]->Set(fIndex+1);
216 fGraphs[0]->SetPoint(fIndex, x, y1);
217 fGraphs[1]->SetPoint(fIndex, x, y2);
218 fIndex++;
219 return;
220}
221
222////////////////////////////////////////////////////////////////////////////////
223/// This function can add data points to as many TGraphs as we have.
224///
225/// \param[in] dat vector of data points. The dat[0] contains the x coordinate,
226/// dat[1] contains the y coordinate for first TGraph, dat[2] for second, ...
227
228void TMVA::IPythonInteractive::AddPoint(std::vector<Double_t>& dat)
229{
230 for(Int_t i=0; i<fNumGraphs;i++){
231 fGraphs[i]->Set(fIndex+1);
232 fGraphs[i]->SetPoint(fIndex, dat[0], dat[i+1]);
233 }
234 fIndex++;
235 return;
236}
237
238
239////////////////////////////////////////////////////////////////////////////////
240/// standard constructor
241
243 Types::EMVA methodType,
244 const TString& methodTitle,
245 DataSetInfo& dsi,
246 const TString& theOption) :
247 IMethod(),
248 Configurable ( theOption ),
249 fTmpEvent ( 0 ),
250 fRanking ( 0 ),
251 fInputVars ( 0 ),
252 fAnalysisType ( Types::kNoAnalysisType ),
253 fRegressionReturnVal ( 0 ),
254 fMulticlassReturnVal ( 0 ),
255 fDataSetInfo ( dsi ),
256 fSignalReferenceCut ( 0.5 ),
257 fSignalReferenceCutOrientation( 1. ),
258 fVariableTransformType ( Types::kSignal ),
259 fJobName ( jobName ),
260 fMethodName ( methodTitle ),
261 fMethodType ( methodType ),
262 fTestvar ( "" ),
263 fTMVATrainingVersion ( TMVA_VERSION_CODE ),
264 fROOTTrainingVersion ( ROOT_VERSION_CODE ),
265 fConstructedFromWeightFile ( kFALSE ),
266 fBaseDir ( 0 ),
267 fMethodBaseDir ( 0 ),
268 fFile ( 0 ),
269 fSilentFile (kFALSE),
270 fModelPersistence (kTRUE),
271 fWeightFile ( "" ),
272 fEffS ( 0 ),
273 fDefaultPDF ( 0 ),
274 fMVAPdfS ( 0 ),
275 fMVAPdfB ( 0 ),
276 fSplS ( 0 ),
277 fSplB ( 0 ),
278 fSpleffBvsS ( 0 ),
279 fSplTrainS ( 0 ),
280 fSplTrainB ( 0 ),
281 fSplTrainEffBvsS ( 0 ),
282 fVarTransformString ( "None" ),
283 fTransformationPointer ( 0 ),
284 fTransformation ( dsi, methodTitle ),
285 fVerbose ( kFALSE ),
286 fVerbosityLevelString ( "Default" ),
287 fHelp ( kFALSE ),
288 fHasMVAPdfs ( kFALSE ),
289 fIgnoreNegWeightsInTraining( kFALSE ),
290 fSignalClass ( 0 ),
291 fBackgroundClass ( 0 ),
292 fSplRefS ( 0 ),
293 fSplRefB ( 0 ),
294 fSplTrainRefS ( 0 ),
295 fSplTrainRefB ( 0 ),
296 fSetupCompleted (kFALSE)
297{
300
301// // default extension for weight files
302}
303
304////////////////////////////////////////////////////////////////////////////////
305/// constructor used for Testing + Application of the MVA,
306/// only (no training), using given WeightFiles
307
309 DataSetInfo& dsi,
310 const TString& weightFile ) :
311 IMethod(),
312 Configurable(""),
313 fTmpEvent ( 0 ),
314 fRanking ( 0 ),
315 fInputVars ( 0 ),
316 fAnalysisType ( Types::kNoAnalysisType ),
317 fRegressionReturnVal ( 0 ),
318 fMulticlassReturnVal ( 0 ),
319 fDataSetInfo ( dsi ),
320 fSignalReferenceCut ( 0.5 ),
321 fVariableTransformType ( Types::kSignal ),
322 fJobName ( "" ),
323 fMethodName ( "MethodBase" ),
324 fMethodType ( methodType ),
325 fTestvar ( "" ),
326 fTMVATrainingVersion ( 0 ),
327 fROOTTrainingVersion ( 0 ),
328 fConstructedFromWeightFile ( kTRUE ),
329 fBaseDir ( 0 ),
330 fMethodBaseDir ( 0 ),
331 fFile ( 0 ),
332 fSilentFile (kFALSE),
333 fModelPersistence (kTRUE),
334 fWeightFile ( weightFile ),
335 fEffS ( 0 ),
336 fDefaultPDF ( 0 ),
337 fMVAPdfS ( 0 ),
338 fMVAPdfB ( 0 ),
339 fSplS ( 0 ),
340 fSplB ( 0 ),
341 fSpleffBvsS ( 0 ),
342 fSplTrainS ( 0 ),
343 fSplTrainB ( 0 ),
344 fSplTrainEffBvsS ( 0 ),
345 fVarTransformString ( "None" ),
346 fTransformationPointer ( 0 ),
347 fTransformation ( dsi, "" ),
348 fVerbose ( kFALSE ),
349 fVerbosityLevelString ( "Default" ),
350 fHelp ( kFALSE ),
351 fHasMVAPdfs ( kFALSE ),
352 fIgnoreNegWeightsInTraining( kFALSE ),
353 fSignalClass ( 0 ),
354 fBackgroundClass ( 0 ),
355 fSplRefS ( 0 ),
356 fSplRefB ( 0 ),
357 fSplTrainRefS ( 0 ),
358 fSplTrainRefB ( 0 ),
359 fSetupCompleted (kFALSE)
360{
362// // constructor used for Testing + Application of the MVA,
363// // only (no training), using given WeightFiles
364}
365
366////////////////////////////////////////////////////////////////////////////////
367/// destructor
368
370{
371 // destructor
372 if (!fSetupCompleted) Log() << kFATAL <<Form("Dataset[%s] : ",DataInfo().GetName())<< "Calling destructor of method which got never setup" << Endl;
373
374 // destructor
375 if (fInputVars != 0) { fInputVars->clear(); delete fInputVars; }
376 if (fRanking != 0) delete fRanking;
377
378 // PDFs
379 if (fDefaultPDF!= 0) { delete fDefaultPDF; fDefaultPDF = 0; }
380 if (fMVAPdfS != 0) { delete fMVAPdfS; fMVAPdfS = 0; }
381 if (fMVAPdfB != 0) { delete fMVAPdfB; fMVAPdfB = 0; }
382
383 // Splines
384 if (fSplS) { delete fSplS; fSplS = 0; }
385 if (fSplB) { delete fSplB; fSplB = 0; }
386 if (fSpleffBvsS) { delete fSpleffBvsS; fSpleffBvsS = 0; }
387 if (fSplRefS) { delete fSplRefS; fSplRefS = 0; }
388 if (fSplRefB) { delete fSplRefB; fSplRefB = 0; }
389 if (fSplTrainRefS) { delete fSplTrainRefS; fSplTrainRefS = 0; }
390 if (fSplTrainRefB) { delete fSplTrainRefB; fSplTrainRefB = 0; }
391 if (fSplTrainEffBvsS) { delete fSplTrainEffBvsS; fSplTrainEffBvsS = 0; }
392
393 for (Int_t i = 0; i < 2; i++ ) {
394 if (fEventCollections.at(i)) {
395 for (std::vector<Event*>::const_iterator it = fEventCollections.at(i)->begin();
396 it != fEventCollections.at(i)->end(); ++it) {
397 delete (*it);
398 }
399 delete fEventCollections.at(i);
400 fEventCollections.at(i) = 0;
401 }
402 }
403
404 if (fRegressionReturnVal) delete fRegressionReturnVal;
405 if (fMulticlassReturnVal) delete fMulticlassReturnVal;
406}
407
408////////////////////////////////////////////////////////////////////////////////
409/// setup of methods
410
412{
413 // setup of methods
414
415 if (fSetupCompleted) Log() << kFATAL <<Form("Dataset[%s] : ",DataInfo().GetName())<< "Calling SetupMethod for the second time" << Endl;
416 InitBase();
417 DeclareBaseOptions();
418 Init();
419 DeclareOptions();
420 fSetupCompleted = kTRUE;
421}
422
423////////////////////////////////////////////////////////////////////////////////
424/// process all options
425/// the "CheckForUnusedOptions" is done in an independent call, since it may be overridden by derived class
426/// (sometimes, eg, fitters are used which can only be implemented during training phase)
427
429{
430 ProcessBaseOptions();
431 ProcessOptions();
432}
433
434////////////////////////////////////////////////////////////////////////////////
435/// check may be overridden by derived class
436/// (sometimes, eg, fitters are used which can only be implemented during training phase)
437
439{
440 CheckForUnusedOptions();
441}
442
443////////////////////////////////////////////////////////////////////////////////
444/// default initialization called by all constructors
445
447{
448 SetConfigDescription( "Configuration options for classifier architecture and tuning" );
449
451 fNbinsMVAoutput = gConfig().fVariablePlotting.fNbinsMVAoutput;
452 fNbinsH = NBIN_HIST_HIGH;
453
454 fSplTrainS = 0;
455 fSplTrainB = 0;
456 fSplTrainEffBvsS = 0;
457 fMeanS = -1;
458 fMeanB = -1;
459 fRmsS = -1;
460 fRmsB = -1;
461 fXmin = DBL_MAX;
462 fXmax = -DBL_MAX;
463 fTxtWeightsOnly = kTRUE;
464 fSplRefS = 0;
465 fSplRefB = 0;
466
467 fTrainTime = -1.;
468 fTestTime = -1.;
469
470 fRanking = 0;
471
472 // temporary until the move to DataSet is complete
473 fInputVars = new std::vector<TString>;
474 for (UInt_t ivar=0; ivar<GetNvar(); ivar++) {
475 fInputVars->push_back(DataInfo().GetVariableInfo(ivar).GetLabel());
476 }
477 fRegressionReturnVal = 0;
478 fMulticlassReturnVal = 0;
479
480 fEventCollections.resize( 2 );
481 fEventCollections.at(0) = 0;
482 fEventCollections.at(1) = 0;
483
484 // retrieve signal and background class index
485 if (DataInfo().GetClassInfo("Signal") != 0) {
486 fSignalClass = DataInfo().GetClassInfo("Signal")->GetNumber();
487 }
488 if (DataInfo().GetClassInfo("Background") != 0) {
489 fBackgroundClass = DataInfo().GetClassInfo("Background")->GetNumber();
490 }
491
492 SetConfigDescription( "Configuration options for MVA method" );
493 SetConfigName( TString("Method") + GetMethodTypeName() );
494}
495
496////////////////////////////////////////////////////////////////////////////////
497/// define the options (their key words) that can be set in the option string
498/// here the options valid for ALL MVA methods are declared.
499///
500/// know options:
501///
502/// - VariableTransform=None,Decorrelated,PCA to use transformed variables
503/// instead of the original ones
504/// - VariableTransformType=Signal,Background which decorrelation matrix to use
505/// in the method. Only the Likelihood
506/// Method can make proper use of independent
507/// transformations of signal and background
508/// - fNbinsMVAPdf = 50 Number of bins used to create a PDF of MVA
509/// - fNsmoothMVAPdf = 2 Number of times a histogram is smoothed before creating the PDF
510/// - fHasMVAPdfs create PDFs for the MVA outputs
511/// - V for Verbose output (!V) for non verbos
512/// - H for Help message
513
515{
516 DeclareOptionRef( fVerbose, "V", "Verbose output (short form of \"VerbosityLevel\" below - overrides the latter one)" );
517
518 DeclareOptionRef( fVerbosityLevelString="Default", "VerbosityLevel", "Verbosity level" );
519 AddPreDefVal( TString("Default") ); // uses default defined in MsgLogger header
520 AddPreDefVal( TString("Debug") );
521 AddPreDefVal( TString("Verbose") );
522 AddPreDefVal( TString("Info") );
523 AddPreDefVal( TString("Warning") );
524 AddPreDefVal( TString("Error") );
525 AddPreDefVal( TString("Fatal") );
526
527 // If True (default): write all training results (weights) as text files only;
528 // if False: write also in ROOT format (not available for all methods - will abort if not
529 fTxtWeightsOnly = kTRUE; // OBSOLETE !!!
530 fNormalise = kFALSE; // OBSOLETE !!!
531
532 DeclareOptionRef( fVarTransformString, "VarTransform", "List of variable transformations performed before training, e.g., \"D_Background,P_Signal,G,N_AllClasses\" for: \"Decorrelation, PCA-transformation, Gaussianisation, Normalisation, each for the given class of events ('AllClasses' denotes all events of all classes, if no class indication is given, 'All' is assumed)\"" );
533
534 DeclareOptionRef( fHelp, "H", "Print method-specific help message" );
535
536 DeclareOptionRef( fHasMVAPdfs, "CreateMVAPdfs", "Create PDFs for classifier outputs (signal and background)" );
537
538 DeclareOptionRef( fIgnoreNegWeightsInTraining, "IgnoreNegWeightsInTraining",
539 "Events with negative weights are ignored in the training (but are included for testing and performance evaluation)" );
540}
541
542////////////////////////////////////////////////////////////////////////////////
543/// the option string is decoded, for available options see "DeclareOptions"
544
546{
547 if (HasMVAPdfs()) {
548 // setting the default bin num... maybe should be static ? ==> Please no static (JS)
549 // You can't use the logger in the constructor!!! Log() << kINFO << "Create PDFs" << Endl;
550 // reading every PDF's definition and passing the option string to the next one to be read and marked
551 fDefaultPDF = new PDF( TString(GetName())+"_PDF", GetOptions(), "MVAPdf" );
552 fDefaultPDF->DeclareOptions();
553 fDefaultPDF->ParseOptions();
554 fDefaultPDF->ProcessOptions();
555 fMVAPdfB = new PDF( TString(GetName())+"_PDFBkg", fDefaultPDF->GetOptions(), "MVAPdfBkg", fDefaultPDF );
556 fMVAPdfB->DeclareOptions();
557 fMVAPdfB->ParseOptions();
558 fMVAPdfB->ProcessOptions();
559 fMVAPdfS = new PDF( TString(GetName())+"_PDFSig", fMVAPdfB->GetOptions(), "MVAPdfSig", fDefaultPDF );
560 fMVAPdfS->DeclareOptions();
561 fMVAPdfS->ParseOptions();
562 fMVAPdfS->ProcessOptions();
563
564 // the final marked option string is written back to the original methodbase
565 SetOptions( fMVAPdfS->GetOptions() );
566 }
567
568 TMVA::CreateVariableTransforms( fVarTransformString,
569 DataInfo(),
570 GetTransformationHandler(),
571 Log() );
572
573 if (!HasMVAPdfs()) {
574 if (fDefaultPDF!= 0) { delete fDefaultPDF; fDefaultPDF = 0; }
575 if (fMVAPdfS != 0) { delete fMVAPdfS; fMVAPdfS = 0; }
576 if (fMVAPdfB != 0) { delete fMVAPdfB; fMVAPdfB = 0; }
577 }
578
579 if (fVerbose) { // overwrites other settings
580 fVerbosityLevelString = TString("Verbose");
581 Log().SetMinType( kVERBOSE );
582 }
583 else if (fVerbosityLevelString == "Debug" ) Log().SetMinType( kDEBUG );
584 else if (fVerbosityLevelString == "Verbose" ) Log().SetMinType( kVERBOSE );
585 else if (fVerbosityLevelString == "Info" ) Log().SetMinType( kINFO );
586 else if (fVerbosityLevelString == "Warning" ) Log().SetMinType( kWARNING );
587 else if (fVerbosityLevelString == "Error" ) Log().SetMinType( kERROR );
588 else if (fVerbosityLevelString == "Fatal" ) Log().SetMinType( kFATAL );
589 else if (fVerbosityLevelString != "Default" ) {
590 Log() << kFATAL << "<ProcessOptions> Verbosity level type '"
591 << fVerbosityLevelString << "' unknown." << Endl;
592 }
593 Event::SetIgnoreNegWeightsInTraining(fIgnoreNegWeightsInTraining);
594}
595
596////////////////////////////////////////////////////////////////////////////////
597/// options that are used ONLY for the READER to ensure backward compatibility
598/// they are hence without any effect (the reader is only reading the training
599/// options that HAD been used at the training of the .xml weight file at hand
600
602{
603 DeclareOptionRef( fNormalise=kFALSE, "Normalise", "Normalise input variables" ); // don't change the default !!!
604 DeclareOptionRef( fUseDecorr=kFALSE, "D", "Use-decorrelated-variables flag" );
605 DeclareOptionRef( fVariableTransformTypeString="Signal", "VarTransformType",
606 "Use signal or background events to derive for variable transformation (the transformation is applied on both types of, course)" );
607 AddPreDefVal( TString("Signal") );
608 AddPreDefVal( TString("Background") );
609 DeclareOptionRef( fTxtWeightsOnly=kTRUE, "TxtWeightFilesOnly", "If True: write all training results (weights) as text files (False: some are written in ROOT format)" );
610 // Why on earth ?? was this here? Was the verbosity level option meant to 'disappear? Not a good idea i think..
611 // DeclareOptionRef( fVerbosityLevelString="Default", "VerboseLevel", "Verbosity level" );
612 // AddPreDefVal( TString("Default") ); // uses default defined in MsgLogger header
613 // AddPreDefVal( TString("Debug") );
614 // AddPreDefVal( TString("Verbose") );
615 // AddPreDefVal( TString("Info") );
616 // AddPreDefVal( TString("Warning") );
617 // AddPreDefVal( TString("Error") );
618 // AddPreDefVal( TString("Fatal") );
619 DeclareOptionRef( fNbinsMVAPdf = 60, "NbinsMVAPdf", "Number of bins used for the PDFs of classifier outputs" );
620 DeclareOptionRef( fNsmoothMVAPdf = 2, "NsmoothMVAPdf", "Number of smoothing iterations for classifier PDFs" );
621}
622
623
624////////////////////////////////////////////////////////////////////////////////
625/// call the Optimizer with the set of parameters and ranges that
626/// are meant to be tuned.
627
628std::map<TString,Double_t> TMVA::MethodBase::OptimizeTuningParameters(TString /* fomType */ , TString /* fitType */)
629{
630 // this is just a dummy... needs to be implemented for each method
631 // individually (as long as we don't have it automatized via the
632 // configuration string
633
634 Log() << kWARNING <<Form("Dataset[%s] : ",DataInfo().GetName())<< "Parameter optimization is not yet implemented for method "
635 << GetName() << Endl;
636 Log() << kWARNING <<Form("Dataset[%s] : ",DataInfo().GetName())<< "Currently we need to set hardcoded which parameter is tuned in which ranges"<<Endl;
637
638 std::map<TString,Double_t> tunedParameters;
639 tunedParameters.size(); // just to get rid of "unused" warning
640 return tunedParameters;
641
642}
643
644////////////////////////////////////////////////////////////////////////////////
645/// set the tuning parameters according to the argument
646/// This is just a dummy .. have a look at the MethodBDT how you could
647/// perhaps implement the same thing for the other Classifiers..
648
649void TMVA::MethodBase::SetTuneParameters(std::map<TString,Double_t> /* tuneParameters */)
650{
651}
652
653////////////////////////////////////////////////////////////////////////////////
654
656{
657 Data()->SetCurrentType(Types::kTraining);
658 Event::SetIsTraining(kTRUE); // used to set negative event weights to zero if chosen to do so
659
660 // train the MVA method
661 if (Help()) PrintHelpMessage();
662
663 // all histograms should be created in the method's subdirectory
664 if(!IsSilentFile()) BaseDir()->cd();
665
666 // once calculate all the transformation (e.g. the sequence of Decorr:Gauss:Decorr)
667 // needed for this classifier
668 GetTransformationHandler().CalcTransformations(Data()->GetEventCollection());
669
670 // call training of derived MVA
671 Log() << kDEBUG //<<Form("\tDataset[%s] : ",DataInfo().GetName())
672 << "Begin training" << Endl;
673 Long64_t nEvents = Data()->GetNEvents();
674 Timer traintimer( nEvents, GetName(), kTRUE );
675 Train();
676 Log() << kDEBUG //<<Form("Dataset[%s] : ",DataInfo().GetName()
677 << "\tEnd of training " << Endl;
678 SetTrainTime(traintimer.ElapsedSeconds());
679 Log() << kINFO //<<Form("Dataset[%s] : ",DataInfo().GetName())
680 << "Elapsed time for training with " << nEvents << " events: "
681 << traintimer.GetElapsedTime() << " " << Endl;
682
683 Log() << kDEBUG //<<Form("Dataset[%s] : ",DataInfo().GetName())
684 << "\tCreate MVA output for ";
685
686 // create PDFs for the signal and background MVA distributions (if required)
687 if (DoMulticlass()) {
688 Log() <<Form("[%s] : ",DataInfo().GetName())<< "Multiclass classification on training sample" << Endl;
689 AddMulticlassOutput(Types::kTraining);
690 }
691 else if (!DoRegression()) {
692
693 Log() <<Form("[%s] : ",DataInfo().GetName())<< "classification on training sample" << Endl;
694 AddClassifierOutput(Types::kTraining);
695 if (HasMVAPdfs()) {
696 CreateMVAPdfs();
697 AddClassifierOutputProb(Types::kTraining);
698 }
699
700 } else {
701
702 Log() <<Form("Dataset[%s] : ",DataInfo().GetName())<< "regression on training sample" << Endl;
703 AddRegressionOutput( Types::kTraining );
704
705 if (HasMVAPdfs() ) {
706 Log() <<Form("Dataset[%s] : ",DataInfo().GetName())<< "Create PDFs" << Endl;
707 CreateMVAPdfs();
708 }
709 }
710
711 // write the current MVA state into stream
712 // produced are one text file and one ROOT file
713 if (fModelPersistence ) WriteStateToFile();
714
715 // produce standalone make class (presently only supported for classification)
716 if ((!DoRegression()) && (fModelPersistence)) MakeClass();
717
718 // write additional monitoring histograms to main target file (not the weight file)
719 // again, make sure the histograms go into the method's subdirectory
720 if(!IsSilentFile())
721 {
722 BaseDir()->cd();
723 WriteMonitoringHistosToFile();
724 }
725}
726
727////////////////////////////////////////////////////////////////////////////////
728
730{
731 if (!DoRegression()) Log() << kFATAL <<Form("Dataset[%s] : ",DataInfo().GetName())<< "Trying to use GetRegressionDeviation() with a classification job" << Endl;
732 Log() << kINFO <<Form("Dataset[%s] : ",DataInfo().GetName())<< "Create results for " << (type==Types::kTraining?"training":"testing") << Endl;
734 bool truncate = false;
735 TH1F* h1 = regRes->QuadraticDeviation( tgtNum , truncate, 1.);
736 stddev = sqrt(h1->GetMean());
737 truncate = true;
738 Double_t yq[1], xq[]={0.9};
739 h1->GetQuantiles(1,yq,xq);
740 TH1F* h2 = regRes->QuadraticDeviation( tgtNum , truncate, yq[0]);
741 stddev90Percent = sqrt(h2->GetMean());
742 delete h1;
743 delete h2;
744}
745
746////////////////////////////////////////////////////////////////////////////////
747/// prepare tree branch with the method's discriminating variable
748
750{
751 Data()->SetCurrentType(type);
752
753 Log() << kINFO <<Form("Dataset[%s] : ",DataInfo().GetName())<< "Create results for " << (type==Types::kTraining?"training":"testing") << Endl;
754
755 ResultsRegression* regRes = (ResultsRegression*)Data()->GetResults(GetMethodName(), type, Types::kRegression);
756
757 Long64_t nEvents = Data()->GetNEvents();
758
759 // use timer
760 Timer timer( nEvents, GetName(), kTRUE );
761 Log() << kINFO <<Form("Dataset[%s] : ",DataInfo().GetName()) << "Evaluation of " << GetMethodName() << " on "
762 << (type==Types::kTraining?"training":"testing") << " sample" << Endl;
763
764 regRes->Resize( nEvents );
765
766 // Drawing the progress bar every event was causing a huge slowdown in the evaluation time
767 // So we set some parameters to draw the progress bar a total of totalProgressDraws, i.e. only draw every 1 in 100
768
769 Int_t totalProgressDraws = 100; // total number of times to update the progress bar
770 Int_t drawProgressEvery = 1; // draw every nth event such that we have a total of totalProgressDraws
771 if(nEvents >= totalProgressDraws) drawProgressEvery = nEvents/totalProgressDraws;
772
773 for (Int_t ievt=0; ievt<nEvents; ievt++) {
774
775 Data()->SetCurrentEvent(ievt);
776 std::vector< Float_t > vals = GetRegressionValues();
777 regRes->SetValue( vals, ievt );
778
779 // Only draw the progress bar once in a while, doing this every event causes the evaluation to be ridiculously slow
780 if(ievt % drawProgressEvery == 0 || ievt==nEvents-1) timer.DrawProgressBar( ievt );
781 }
782
783 Log() << kINFO <<Form("Dataset[%s] : ",DataInfo().GetName())
784 << "Elapsed time for evaluation of " << nEvents << " events: "
785 << timer.GetElapsedTime() << " " << Endl;
786
787 // store time used for testing
789 SetTestTime(timer.ElapsedSeconds());
790
791 TString histNamePrefix(GetTestvarName());
792 histNamePrefix += (type==Types::kTraining?"train":"test");
793 regRes->CreateDeviationHistograms( histNamePrefix );
794}
795
796////////////////////////////////////////////////////////////////////////////////
797/// prepare tree branch with the method's discriminating variable
798
800{
801 Data()->SetCurrentType(type);
802
803 Log() << kINFO <<Form("Dataset[%s] : ",DataInfo().GetName())<< "Create results for " << (type==Types::kTraining?"training":"testing") << Endl;
804
805 ResultsMulticlass* resMulticlass = dynamic_cast<ResultsMulticlass*>(Data()->GetResults(GetMethodName(), type, Types::kMulticlass));
806 if (!resMulticlass) Log() << kFATAL<<Form("Dataset[%s] : ",DataInfo().GetName())<< "unable to create pointer in AddMulticlassOutput, exiting."<<Endl;
807
808 Long64_t nEvents = Data()->GetNEvents();
809
810 // use timer
811 Timer timer( nEvents, GetName(), kTRUE );
812
813 Log() << kINFO <<Form("Dataset[%s] : ",DataInfo().GetName())<< "Multiclass evaluation of " << GetMethodName() << " on "
814 << (type==Types::kTraining?"training":"testing") << " sample" << Endl;
815
816 resMulticlass->Resize( nEvents );
817 for (Int_t ievt=0; ievt<nEvents; ievt++) {
818 Data()->SetCurrentEvent(ievt);
819 std::vector< Float_t > vals = GetMulticlassValues();
820 resMulticlass->SetValue( vals, ievt );
821 timer.DrawProgressBar( ievt );
822 }
823
824 Log() << kINFO <<Form("Dataset[%s] : ",DataInfo().GetName())
825 << "Elapsed time for evaluation of " << nEvents << " events: "
826 << timer.GetElapsedTime() << " " << Endl;
827
828 // store time used for testing
830 SetTestTime(timer.ElapsedSeconds());
831
832 TString histNamePrefix(GetTestvarName());
833 histNamePrefix += (type==Types::kTraining?"_Train":"_Test");
834
835 resMulticlass->CreateMulticlassHistos( histNamePrefix, fNbinsMVAoutput, fNbinsH );
836 resMulticlass->CreateMulticlassPerformanceHistos(histNamePrefix);
837}
838
839////////////////////////////////////////////////////////////////////////////////
840
841void TMVA::MethodBase::NoErrorCalc(Double_t* const err, Double_t* const errUpper) {
842 if (err) *err=-1;
843 if (errUpper) *errUpper=-1;
844}
845
846////////////////////////////////////////////////////////////////////////////////
847
848Double_t TMVA::MethodBase::GetMvaValue( const Event* const ev, Double_t* err, Double_t* errUpper ) {
849 fTmpEvent = ev;
850 Double_t val = GetMvaValue(err, errUpper);
851 fTmpEvent = 0;
852 return val;
853}
854
855////////////////////////////////////////////////////////////////////////////////
856/// uses a pre-set cut on the MVA output (SetSignalReferenceCut and SetSignalReferenceCutOrientation)
857/// for a quick determination if an event would be selected as signal or background
858
860 return GetMvaValue()*GetSignalReferenceCutOrientation() > GetSignalReferenceCut()*GetSignalReferenceCutOrientation() ? kTRUE : kFALSE;
861}
862////////////////////////////////////////////////////////////////////////////////
863/// uses a pre-set cut on the MVA output (SetSignalReferenceCut and SetSignalReferenceCutOrientation)
864/// for a quick determination if an event with this mva output value would be selected as signal or background
865
867 return mvaVal*GetSignalReferenceCutOrientation() > GetSignalReferenceCut()*GetSignalReferenceCutOrientation() ? kTRUE : kFALSE;
868}
869
870////////////////////////////////////////////////////////////////////////////////
871/// prepare tree branch with the method's discriminating variable
872
874{
875 Data()->SetCurrentType(type);
876
877 ResultsClassification* clRes =
879
880 Long64_t nEvents = Data()->GetNEvents();
881 clRes->Resize( nEvents );
882
883 // use timer
884 Timer timer( nEvents, GetName(), kTRUE );
885 std::vector<Double_t> mvaValues = GetMvaValues(0, nEvents, true);
886
887 // store time used for testing
889 SetTestTime(timer.ElapsedSeconds());
890
891 // load mva values to results object
892 for (Int_t ievt=0; ievt<nEvents; ievt++) {
893 clRes->SetValue( mvaValues[ievt], ievt );
894 }
895}
896
897////////////////////////////////////////////////////////////////////////////////
898/// get all the MVA values for the events of the current Data type
899std::vector<Double_t> TMVA::MethodBase::GetMvaValues(Long64_t firstEvt, Long64_t lastEvt, Bool_t logProgress)
900{
901
902 Long64_t nEvents = Data()->GetNEvents();
903 if (firstEvt > lastEvt || lastEvt > nEvents) lastEvt = nEvents;
904 if (firstEvt < 0) firstEvt = 0;
905 std::vector<Double_t> values(lastEvt-firstEvt);
906 // log in case of looping on all the events
907 nEvents = values.size();
908
909 // use timer
910 Timer timer( nEvents, GetName(), kTRUE );
911
912 if (logProgress)
913 Log() << kHEADER << Form("[%s] : ",DataInfo().GetName())
914 << "Evaluation of " << GetMethodName() << " on "
915 << (Data()->GetCurrentType() == Types::kTraining ? "training" : "testing")
916 << " sample (" << nEvents << " events)" << Endl;
917
918 for (Int_t ievt=firstEvt; ievt<lastEvt; ievt++) {
919 Data()->SetCurrentEvent(ievt);
920 values[ievt] = GetMvaValue();
921
922 // print progress
923 if (logProgress) {
924 Int_t modulo = Int_t(nEvents/100);
925 if (modulo <= 0 ) modulo = 1;
926 if (ievt%modulo == 0) timer.DrawProgressBar( ievt );
927 }
928 }
929 if (logProgress) {
930 Log() << kINFO //<<Form("Dataset[%s] : ",DataInfo().GetName())
931 << "Elapsed time for evaluation of " << nEvents << " events: "
932 << timer.GetElapsedTime() << " " << Endl;
933 }
934
935 return values;
936}
937
938////////////////////////////////////////////////////////////////////////////////
939/// prepare tree branch with the method's discriminating variable
940
942{
943 Data()->SetCurrentType(type);
944
945 ResultsClassification* mvaProb =
946 (ResultsClassification*)Data()->GetResults(TString("prob_")+GetMethodName(), type, Types::kClassification );
947
948 Long64_t nEvents = Data()->GetNEvents();
949
950 // use timer
951 Timer timer( nEvents, GetName(), kTRUE );
952
953 Log() << kINFO <<Form("Dataset[%s] : ",DataInfo().GetName()) << "Evaluation of " << GetMethodName() << " on "
954 << (type==Types::kTraining?"training":"testing") << " sample" << Endl;
955
956 mvaProb->Resize( nEvents );
957 for (Int_t ievt=0; ievt<nEvents; ievt++) {
958
959 Data()->SetCurrentEvent(ievt);
960 Float_t proba = ((Float_t)GetProba( GetMvaValue(), 0.5 ));
961 if (proba < 0) break;
962 mvaProb->SetValue( proba, ievt );
963
964 // print progress
965 Int_t modulo = Int_t(nEvents/100);
966 if (modulo <= 0 ) modulo = 1;
967 if (ievt%modulo == 0) timer.DrawProgressBar( ievt );
968 }
969
970 Log() << kDEBUG <<Form("Dataset[%s] : ",DataInfo().GetName())
971 << "Elapsed time for evaluation of " << nEvents << " events: "
972 << timer.GetElapsedTime() << " " << Endl;
973}
974
975////////////////////////////////////////////////////////////////////////////////
976/// calculate <sum-of-deviation-squared> of regression output versus "true" value from test sample
977///
978/// - bias = average deviation
979/// - dev = average absolute deviation
980/// - rms = rms of deviation
981
983 Double_t& dev, Double_t& devT,
984 Double_t& rms, Double_t& rmsT,
985 Double_t& mInf, Double_t& mInfT,
986 Double_t& corr,
988{
989 Types::ETreeType savedType = Data()->GetCurrentType();
990 Data()->SetCurrentType(type);
991
992 bias = 0; biasT = 0; dev = 0; devT = 0; rms = 0; rmsT = 0;
993 Double_t sumw = 0;
994 Double_t m1 = 0, m2 = 0, s1 = 0, s2 = 0, s12 = 0; // for correlation
995 const Int_t nevt = GetNEvents();
996 Float_t* rV = new Float_t[nevt];
997 Float_t* tV = new Float_t[nevt];
998 Float_t* wV = new Float_t[nevt];
999 Float_t xmin = 1e30, xmax = -1e30;
1000 Log() << kINFO << "Calculate regression for all events" << Endl;
1001 Timer timer( nevt, GetName(), kTRUE );
1002 for (Long64_t ievt=0; ievt<nevt; ievt++) {
1003
1004 const Event* ev = Data()->GetEvent(ievt); // NOTE: need untransformed event here !
1005 Float_t t = ev->GetTarget(0);
1006 Float_t w = ev->GetWeight();
1007 Float_t r = GetRegressionValues()[0];
1008 Float_t d = (r-t);
1009
1010 // find min/max
1013
1014 // store for truncated RMS computation
1015 rV[ievt] = r;
1016 tV[ievt] = t;
1017 wV[ievt] = w;
1018
1019 // compute deviation-squared
1020 sumw += w;
1021 bias += w * d;
1022 dev += w * TMath::Abs(d);
1023 rms += w * d * d;
1024
1025 // compute correlation between target and regression estimate
1026 m1 += t*w; s1 += t*t*w;
1027 m2 += r*w; s2 += r*r*w;
1028 s12 += t*r;
1029 if ((ievt & 0xFF) == 0) timer.DrawProgressBar(ievt);
1030 }
1031 timer.DrawProgressBar(nevt - 1);
1032 Log() << kINFO << "Elapsed time for evaluation of " << nevt << " events: "
1033 << timer.GetElapsedTime() << " " << Endl;
1034
1035 // standard quantities
1036 bias /= sumw;
1037 dev /= sumw;
1038 rms /= sumw;
1039 rms = TMath::Sqrt(rms - bias*bias);
1040
1041 // correlation
1042 m1 /= sumw;
1043 m2 /= sumw;
1044 corr = s12/sumw - m1*m2;
1045 corr /= TMath::Sqrt( (s1/sumw - m1*m1) * (s2/sumw - m2*m2) );
1046
1047 // create histogram required for computation of mutual information
1048 TH2F* hist = new TH2F( "hist", "hist", 150, xmin, xmax, 100, xmin, xmax );
1049 TH2F* histT = new TH2F( "histT", "histT", 150, xmin, xmax, 100, xmin, xmax );
1050
1051 // compute truncated RMS and fill histogram
1052 Double_t devMax = bias + 2*rms;
1053 Double_t devMin = bias - 2*rms;
1054 sumw = 0;
1055 int ic=0;
1056 for (Long64_t ievt=0; ievt<nevt; ievt++) {
1057 Float_t d = (rV[ievt] - tV[ievt]);
1058 hist->Fill( rV[ievt], tV[ievt], wV[ievt] );
1059 if (d >= devMin && d <= devMax) {
1060 sumw += wV[ievt];
1061 biasT += wV[ievt] * d;
1062 devT += wV[ievt] * TMath::Abs(d);
1063 rmsT += wV[ievt] * d * d;
1064 histT->Fill( rV[ievt], tV[ievt], wV[ievt] );
1065 ic++;
1066 }
1067 }
1068 biasT /= sumw;
1069 devT /= sumw;
1070 rmsT /= sumw;
1071 rmsT = TMath::Sqrt(rmsT - biasT*biasT);
1072 mInf = gTools().GetMutualInformation( *hist );
1073 mInfT = gTools().GetMutualInformation( *histT );
1074
1075 delete hist;
1076 delete histT;
1077
1078 delete [] rV;
1079 delete [] tV;
1080 delete [] wV;
1081
1082 Data()->SetCurrentType(savedType);
1083}
1084
1085
1086////////////////////////////////////////////////////////////////////////////////
1087/// test multiclass classification
1088
1090{
1091 ResultsMulticlass* resMulticlass = dynamic_cast<ResultsMulticlass*>(Data()->GetResults(GetMethodName(), Types::kTesting, Types::kMulticlass));
1092 if (!resMulticlass) Log() << kFATAL<<Form("Dataset[%s] : ",DataInfo().GetName())<< "unable to create pointer in TestMulticlass, exiting."<<Endl;
1093
1094 // GA evaluation of best cut for sig eff * sig pur. Slow, disabled for now.
1095 // Log() << kINFO <<Form("Dataset[%s] : ",DataInfo().GetName())<< "Determine optimal multiclass cuts for test
1096 // data..." << Endl; for (UInt_t icls = 0; icls<DataInfo().GetNClasses(); ++icls) {
1097 // resMulticlass->GetBestMultiClassCuts(icls);
1098 // }
1099
1100 // Create histograms for use in TMVA GUI
1101 TString histNamePrefix(GetTestvarName());
1102 TString histNamePrefixTest{histNamePrefix + "_Test"};
1103 TString histNamePrefixTrain{histNamePrefix + "_Train"};
1104
1105 resMulticlass->CreateMulticlassHistos(histNamePrefixTest, fNbinsMVAoutput, fNbinsH);
1106 resMulticlass->CreateMulticlassPerformanceHistos(histNamePrefixTest);
1107
1108 resMulticlass->CreateMulticlassHistos(histNamePrefixTrain, fNbinsMVAoutput, fNbinsH);
1109 resMulticlass->CreateMulticlassPerformanceHistos(histNamePrefixTrain);
1110}
1111
1112
1113////////////////////////////////////////////////////////////////////////////////
1114/// initialization
1115
1117{
1118 Data()->SetCurrentType(Types::kTesting);
1119
1120 ResultsClassification* mvaRes = dynamic_cast<ResultsClassification*>
1121 ( Data()->GetResults(GetMethodName(),Types::kTesting, Types::kClassification) );
1122
1123 // sanity checks: tree must exist, and theVar must be in tree
1124 if (0==mvaRes && !(GetMethodTypeName().Contains("Cuts"))) {
1125 Log()<<Form("Dataset[%s] : ",DataInfo().GetName()) << "mvaRes " << mvaRes << " GetMethodTypeName " << GetMethodTypeName()
1126 << " contains " << !(GetMethodTypeName().Contains("Cuts")) << Endl;
1127 Log() << kFATAL<<Form("Dataset[%s] : ",DataInfo().GetName()) << "<TestInit> Test variable " << GetTestvarName()
1128 << " not found in tree" << Endl;
1129 }
1130
1131 // basic statistics operations are made in base class
1132 gTools().ComputeStat( GetEventCollection(Types::kTesting), mvaRes->GetValueVector(),
1133 fMeanS, fMeanB, fRmsS, fRmsB, fXmin, fXmax, fSignalClass );
1134
1135 // choose reasonable histogram ranges, by removing outliers
1136 Double_t nrms = 10;
1137 fXmin = TMath::Max( TMath::Min( fMeanS - nrms*fRmsS, fMeanB - nrms*fRmsB ), fXmin );
1138 fXmax = TMath::Min( TMath::Max( fMeanS + nrms*fRmsS, fMeanB + nrms*fRmsB ), fXmax );
1139
1140 // determine cut orientation
1141 fCutOrientation = (fMeanS > fMeanB) ? kPositive : kNegative;
1142
1143 // fill 2 types of histograms for the various analyses
1144 // this one is for actual plotting
1145
1146 Double_t sxmax = fXmax+0.00001;
1147
1148 // classifier response distributions for training sample
1149 // MVA plots used for graphics representation (signal)
1150 TString TestvarName;
1151 if(IsSilentFile())
1152 {
1153 TestvarName=Form("[%s]%s",DataInfo().GetName(),GetTestvarName().Data());
1154 }else
1155 {
1156 TestvarName=GetTestvarName();
1157 }
1158 TH1* mva_s = new TH1D( TestvarName + "_S",TestvarName + "_S", fNbinsMVAoutput, fXmin, sxmax );
1159 TH1* mva_b = new TH1D( TestvarName + "_B",TestvarName + "_B", fNbinsMVAoutput, fXmin, sxmax );
1160 mvaRes->Store(mva_s, "MVA_S");
1161 mvaRes->Store(mva_b, "MVA_B");
1162 mva_s->Sumw2();
1163 mva_b->Sumw2();
1164
1165 TH1* proba_s = 0;
1166 TH1* proba_b = 0;
1167 TH1* rarity_s = 0;
1168 TH1* rarity_b = 0;
1169 if (HasMVAPdfs()) {
1170 // P(MVA) plots used for graphics representation
1171 proba_s = new TH1D( TestvarName + "_Proba_S", TestvarName + "_Proba_S", fNbinsMVAoutput, 0.0, 1.0 );
1172 proba_b = new TH1D( TestvarName + "_Proba_B", TestvarName + "_Proba_B", fNbinsMVAoutput, 0.0, 1.0 );
1173 mvaRes->Store(proba_s, "Prob_S");
1174 mvaRes->Store(proba_b, "Prob_B");
1175 proba_s->Sumw2();
1176 proba_b->Sumw2();
1177
1178 // R(MVA) plots used for graphics representation
1179 rarity_s = new TH1D( TestvarName + "_Rarity_S", TestvarName + "_Rarity_S", fNbinsMVAoutput, 0.0, 1.0 );
1180 rarity_b = new TH1D( TestvarName + "_Rarity_B", TestvarName + "_Rarity_B", fNbinsMVAoutput, 0.0, 1.0 );
1181 mvaRes->Store(rarity_s, "Rar_S");
1182 mvaRes->Store(rarity_b, "Rar_B");
1183 rarity_s->Sumw2();
1184 rarity_b->Sumw2();
1185 }
1186
1187 // MVA plots used for efficiency calculations (large number of bins)
1188 TH1* mva_eff_s = new TH1D( TestvarName + "_S_high", TestvarName + "_S_high", fNbinsH, fXmin, sxmax );
1189 TH1* mva_eff_b = new TH1D( TestvarName + "_B_high", TestvarName + "_B_high", fNbinsH, fXmin, sxmax );
1190 mvaRes->Store(mva_eff_s, "MVA_HIGHBIN_S");
1191 mvaRes->Store(mva_eff_b, "MVA_HIGHBIN_B");
1192 mva_eff_s->Sumw2();
1193 mva_eff_b->Sumw2();
1194
1195 // fill the histograms
1196
1197 ResultsClassification* mvaProb = dynamic_cast<ResultsClassification*>
1198 (Data()->GetResults( TString("prob_")+GetMethodName(), Types::kTesting, Types::kMaxAnalysisType ) );
1199
1200 Log() << kHEADER <<Form("[%s] : ",DataInfo().GetName())<< "Loop over test events and fill histograms with classifier response..." << Endl << Endl;
1201 if (mvaProb) Log() << kINFO << "Also filling probability and rarity histograms (on request)..." << Endl;
1202 std::vector<Bool_t>* mvaResTypes = mvaRes->GetValueVectorTypes();
1203
1204 //LM: this is needed to avoid crashes in ROOCCURVE
1205 if ( mvaRes->GetSize() != GetNEvents() ) {
1206 Log() << kFATAL << TString::Format("Inconsistent result size %lld with number of events %u ", mvaRes->GetSize() , GetNEvents() ) << Endl;
1207 assert(mvaRes->GetSize() == GetNEvents());
1208 }
1209
1210 for (Long64_t ievt=0; ievt<GetNEvents(); ievt++) {
1211
1212 const Event* ev = GetEvent(ievt);
1213 Float_t v = (*mvaRes)[ievt][0];
1214 Float_t w = ev->GetWeight();
1215
1216 if (DataInfo().IsSignal(ev)) {
1217 mvaResTypes->push_back(kTRUE);
1218 mva_s ->Fill( v, w );
1219 if (mvaProb) {
1220 proba_s->Fill( (*mvaProb)[ievt][0], w );
1221 rarity_s->Fill( GetRarity( v ), w );
1222 }
1223
1224 mva_eff_s ->Fill( v, w );
1225 }
1226 else {
1227 mvaResTypes->push_back(kFALSE);
1228 mva_b ->Fill( v, w );
1229 if (mvaProb) {
1230 proba_b->Fill( (*mvaProb)[ievt][0], w );
1231 rarity_b->Fill( GetRarity( v ), w );
1232 }
1233 mva_eff_b ->Fill( v, w );
1234 }
1235 }
1236
1237 // uncomment those (and several others if you want unnormalized output
1238 gTools().NormHist( mva_s );
1239 gTools().NormHist( mva_b );
1240 gTools().NormHist( proba_s );
1241 gTools().NormHist( proba_b );
1242 gTools().NormHist( rarity_s );
1243 gTools().NormHist( rarity_b );
1244 gTools().NormHist( mva_eff_s );
1245 gTools().NormHist( mva_eff_b );
1246
1247 // create PDFs from histograms, using default splines, and no additional smoothing
1248 if (fSplS) { delete fSplS; fSplS = 0; }
1249 if (fSplB) { delete fSplB; fSplB = 0; }
1250 fSplS = new PDF( TString(GetName()) + " PDF Sig", mva_s, PDF::kSpline2 );
1251 fSplB = new PDF( TString(GetName()) + " PDF Bkg", mva_b, PDF::kSpline2 );
1252}
1253
1254////////////////////////////////////////////////////////////////////////////////
1255/// general method used in writing the header of the weight files where
1256/// the used variables, variable transformation type etc. is specified
1257
1258void TMVA::MethodBase::WriteStateToStream( std::ostream& tf ) const
1259{
1260 TString prefix = "";
1261 UserGroup_t * userInfo = gSystem->GetUserInfo();
1262
1263 tf << prefix << "#GEN -*-*-*-*-*-*-*-*-*-*-*- general info -*-*-*-*-*-*-*-*-*-*-*-" << std::endl << prefix << std::endl;
1264 tf << prefix << "Method : " << GetMethodTypeName() << "::" << GetMethodName() << std::endl;
1265 tf.setf(std::ios::left);
1266 tf << prefix << "TMVA Release : " << std::setw(10) << GetTrainingTMVAVersionString() << " ["
1267 << GetTrainingTMVAVersionCode() << "]" << std::endl;
1268 tf << prefix << "ROOT Release : " << std::setw(10) << GetTrainingROOTVersionString() << " ["
1269 << GetTrainingROOTVersionCode() << "]" << std::endl;
1270 tf << prefix << "Creator : " << userInfo->fUser << std::endl;
1271 tf << prefix << "Date : "; TDatime *d = new TDatime; tf << d->AsString() << std::endl; delete d;
1272 tf << prefix << "Host : " << gSystem->GetBuildNode() << std::endl;
1273 tf << prefix << "Dir : " << gSystem->WorkingDirectory() << std::endl;
1274 tf << prefix << "Training events: " << Data()->GetNTrainingEvents() << std::endl;
1275
1276 TString analysisType(((const_cast<TMVA::MethodBase*>(this)->GetAnalysisType()==Types::kRegression) ? "Regression" : "Classification"));
1277
1278 tf << prefix << "Analysis type : " << "[" << ((GetAnalysisType()==Types::kRegression) ? "Regression" : "Classification") << "]" << std::endl;
1279 tf << prefix << std::endl;
1280
1281 delete userInfo;
1282
1283 // First write all options
1284 tf << prefix << std::endl << prefix << "#OPT -*-*-*-*-*-*-*-*-*-*-*-*- options -*-*-*-*-*-*-*-*-*-*-*-*-" << std::endl << prefix << std::endl;
1285 WriteOptionsToStream( tf, prefix );
1286 tf << prefix << std::endl;
1287
1288 // Second write variable info
1289 tf << prefix << std::endl << prefix << "#VAR -*-*-*-*-*-*-*-*-*-*-*-* variables *-*-*-*-*-*-*-*-*-*-*-*-" << std::endl << prefix << std::endl;
1290 WriteVarsToStream( tf, prefix );
1291 tf << prefix << std::endl;
1292}
1293
1294////////////////////////////////////////////////////////////////////////////////
1295/// xml writing
1296
1297void TMVA::MethodBase::AddInfoItem( void* gi, const TString& name, const TString& value) const
1298{
1299 void* it = gTools().AddChild(gi,"Info");
1300 gTools().AddAttr(it,"name", name);
1301 gTools().AddAttr(it,"value", value);
1302}
1303
1304////////////////////////////////////////////////////////////////////////////////
1305
1307 if (analysisType == Types::kRegression) {
1308 AddRegressionOutput( type );
1309 } else if (analysisType == Types::kMulticlass) {
1310 AddMulticlassOutput( type );
1311 } else {
1312 AddClassifierOutput( type );
1313 if (HasMVAPdfs())
1314 AddClassifierOutputProb( type );
1315 }
1316}
1317
1318////////////////////////////////////////////////////////////////////////////////
1319/// general method used in writing the header of the weight files where
1320/// the used variables, variable transformation type etc. is specified
1321
1322void TMVA::MethodBase::WriteStateToXML( void* parent ) const
1323{
1324 if (!parent) return;
1325
1326 UserGroup_t* userInfo = gSystem->GetUserInfo();
1327
1328 void* gi = gTools().AddChild(parent, "GeneralInfo");
1329 AddInfoItem( gi, "TMVA Release", GetTrainingTMVAVersionString() + " [" + gTools().StringFromInt(GetTrainingTMVAVersionCode()) + "]" );
1330 AddInfoItem( gi, "ROOT Release", GetTrainingROOTVersionString() + " [" + gTools().StringFromInt(GetTrainingROOTVersionCode()) + "]");
1331 AddInfoItem( gi, "Creator", userInfo->fUser);
1332 TDatime dt; AddInfoItem( gi, "Date", dt.AsString());
1333 AddInfoItem( gi, "Host", gSystem->GetBuildNode() );
1334 AddInfoItem( gi, "Dir", gSystem->WorkingDirectory());
1335 AddInfoItem( gi, "Training events", gTools().StringFromInt(Data()->GetNTrainingEvents()));
1336 AddInfoItem( gi, "TrainingTime", gTools().StringFromDouble(const_cast<TMVA::MethodBase*>(this)->GetTrainTime()));
1337
1338 Types::EAnalysisType aType = const_cast<TMVA::MethodBase*>(this)->GetAnalysisType();
1339 TString analysisType((aType==Types::kRegression) ? "Regression" :
1340 (aType==Types::kMulticlass ? "Multiclass" : "Classification"));
1341 AddInfoItem( gi, "AnalysisType", analysisType );
1342 delete userInfo;
1343
1344 // write options
1345 AddOptionsXMLTo( parent );
1346
1347 // write variable info
1348 AddVarsXMLTo( parent );
1349
1350 // write spectator info
1351 if (fModelPersistence)
1352 AddSpectatorsXMLTo( parent );
1353
1354 // write class info if in multiclass mode
1355 AddClassesXMLTo(parent);
1356
1357 // write target info if in regression mode
1358 if (DoRegression()) AddTargetsXMLTo(parent);
1359
1360 // write transformations
1361 GetTransformationHandler(false).AddXMLTo( parent );
1362
1363 // write MVA variable distributions
1364 void* pdfs = gTools().AddChild(parent, "MVAPdfs");
1365 if (fMVAPdfS) fMVAPdfS->AddXMLTo(pdfs);
1366 if (fMVAPdfB) fMVAPdfB->AddXMLTo(pdfs);
1367
1368 // write weights
1369 AddWeightsXMLTo( parent );
1370}
1371
1372////////////////////////////////////////////////////////////////////////////////
1373/// write reference MVA distributions (and other information)
1374/// to a ROOT type weight file
1375
1377{
1378 Bool_t addDirStatus = TH1::AddDirectoryStatus();
1379 TH1::AddDirectory( 0 ); // this avoids the binding of the hists in PDF to the current ROOT file
1380 fMVAPdfS = (TMVA::PDF*)rf.Get( "MVA_PDF_Signal" );
1381 fMVAPdfB = (TMVA::PDF*)rf.Get( "MVA_PDF_Background" );
1382
1383 TH1::AddDirectory( addDirStatus );
1384
1385 ReadWeightsFromStream( rf );
1386
1387 SetTestvarName();
1388}
1389
1390////////////////////////////////////////////////////////////////////////////////
1391/// write options and weights to file
1392/// note that each one text file for the main configuration information
1393/// and one ROOT file for ROOT objects are created
1394
1396{
1397 // ---- create the text file
1398 TString tfname( GetWeightFileName() );
1399
1400 // writing xml file
1401 TString xmlfname( tfname ); xmlfname.ReplaceAll( ".txt", ".xml" );
1402 Log() << kINFO //<<Form("Dataset[%s] : ",DataInfo().GetName())
1403 << "Creating xml weight file: "
1404 << gTools().Color("lightblue") << xmlfname << gTools().Color("reset") << Endl;
1405 void* doc = gTools().xmlengine().NewDoc();
1406 void* rootnode = gTools().AddChild(0,"MethodSetup", "", true);
1407 gTools().xmlengine().DocSetRootElement(doc,rootnode);
1408 gTools().AddAttr(rootnode,"Method", GetMethodTypeName() + "::" + GetMethodName());
1409 WriteStateToXML(rootnode);
1410 gTools().xmlengine().SaveDoc(doc,xmlfname);
1411 gTools().xmlengine().FreeDoc(doc);
1412}
1413
1414////////////////////////////////////////////////////////////////////////////////
1415/// Function to write options and weights to file
1416
1418{
1419 // get the filename
1420
1421 TString tfname(GetWeightFileName());
1422
1423 Log() << kINFO //<<Form("Dataset[%s] : ",DataInfo().GetName())
1424 << "Reading weight file: "
1425 << gTools().Color("lightblue") << tfname << gTools().Color("reset") << Endl;
1426
1427 if (tfname.EndsWith(".xml") ) {
1428 void* doc = gTools().xmlengine().ParseFile(tfname,gTools().xmlenginebuffersize()); // the default buffer size in TXMLEngine::ParseFile is 100k. Starting with ROOT 5.29 one can set the buffer size, see: http://savannah.cern.ch/bugs/?78864. This might be necessary for large XML files
1429 if (!doc) {
1430 Log() << kFATAL << "Error parsing XML file " << tfname << Endl;
1431 }
1432 void* rootnode = gTools().xmlengine().DocGetRootElement(doc); // node "MethodSetup"
1433 ReadStateFromXML(rootnode);
1434 gTools().xmlengine().FreeDoc(doc);
1435 }
1436 else {
1437 std::filebuf fb;
1438 fb.open(tfname.Data(),std::ios::in);
1439 if (!fb.is_open()) { // file not found --> Error
1440 Log() << kFATAL <<Form("Dataset[%s] : ",DataInfo().GetName())<< "<ReadStateFromFile> "
1441 << "Unable to open input weight file: " << tfname << Endl;
1442 }
1443 std::istream fin(&fb);
1444 ReadStateFromStream(fin);
1445 fb.close();
1446 }
1447 if (!fTxtWeightsOnly) {
1448 // ---- read the ROOT file
1449 TString rfname( tfname ); rfname.ReplaceAll( ".txt", ".root" );
1450 Log() << kINFO <<Form("Dataset[%s] : ",DataInfo().GetName())<< "Reading root weight file: "
1451 << gTools().Color("lightblue") << rfname << gTools().Color("reset") << Endl;
1452 TFile* rfile = TFile::Open( rfname, "READ" );
1453 ReadStateFromStream( *rfile );
1454 rfile->Close();
1455 }
1456}
1457////////////////////////////////////////////////////////////////////////////////
1458/// for reading from memory
1459
1461 void* doc = gTools().xmlengine().ParseString(xmlstr);
1462 void* rootnode = gTools().xmlengine().DocGetRootElement(doc); // node "MethodSetup"
1463 ReadStateFromXML(rootnode);
1464 gTools().xmlengine().FreeDoc(doc);
1465
1466 return;
1467}
1468
1469////////////////////////////////////////////////////////////////////////////////
1470
1472{
1473
1474 TString fullMethodName;
1475 gTools().ReadAttr( methodNode, "Method", fullMethodName );
1476
1477 fMethodName = fullMethodName(fullMethodName.Index("::")+2,fullMethodName.Length());
1478
1479 // update logger
1480 Log().SetSource( GetName() );
1481 Log() << kDEBUG//<<Form("Dataset[%s] : ",DataInfo().GetName())
1482 << "Read method \"" << GetMethodName() << "\" of type \"" << GetMethodTypeName() << "\"" << Endl;
1483
1484 // after the method name is read, the testvar can be set
1485 SetTestvarName();
1486
1487 TString nodeName("");
1488 void* ch = gTools().GetChild(methodNode);
1489 while (ch!=0) {
1490 nodeName = TString( gTools().GetName(ch) );
1491
1492 if (nodeName=="GeneralInfo") {
1493 // read analysis type
1494
1495 TString name(""),val("");
1496 void* antypeNode = gTools().GetChild(ch);
1497 while (antypeNode) {
1498 gTools().ReadAttr( antypeNode, "name", name );
1499
1500 if (name == "TrainingTime")
1501 gTools().ReadAttr( antypeNode, "value", fTrainTime );
1502
1503 if (name == "AnalysisType") {
1504 gTools().ReadAttr( antypeNode, "value", val );
1505 val.ToLower();
1506 if (val == "regression" ) SetAnalysisType( Types::kRegression );
1507 else if (val == "classification" ) SetAnalysisType( Types::kClassification );
1508 else if (val == "multiclass" ) SetAnalysisType( Types::kMulticlass );
1509 else Log() << kFATAL <<Form("Dataset[%s] : ",DataInfo().GetName())<< "Analysis type " << val << " is not known." << Endl;
1510 }
1511
1512 if (name == "TMVA Release" || name == "TMVA") {
1513 TString s;
1514 gTools().ReadAttr( antypeNode, "value", s);
1515 fTMVATrainingVersion = TString(s(s.Index("[")+1,s.Index("]")-s.Index("[")-1)).Atoi();
1516 Log() << kDEBUG <<Form("[%s] : ",DataInfo().GetName()) << "MVA method was trained with TMVA Version: " << GetTrainingTMVAVersionString() << Endl;
1517 }
1518
1519 if (name == "ROOT Release" || name == "ROOT") {
1520 TString s;
1521 gTools().ReadAttr( antypeNode, "value", s);
1522 fROOTTrainingVersion = TString(s(s.Index("[")+1,s.Index("]")-s.Index("[")-1)).Atoi();
1523 Log() << kDEBUG //<<Form("Dataset[%s] : ",DataInfo().GetName())
1524 << "MVA method was trained with ROOT Version: " << GetTrainingROOTVersionString() << Endl;
1525 }
1526 antypeNode = gTools().GetNextChild(antypeNode);
1527 }
1528 }
1529 else if (nodeName=="Options") {
1530 ReadOptionsFromXML(ch);
1531 ParseOptions();
1532
1533 }
1534 else if (nodeName=="Variables") {
1535 ReadVariablesFromXML(ch);
1536 }
1537 else if (nodeName=="Spectators") {
1538 ReadSpectatorsFromXML(ch);
1539 }
1540 else if (nodeName=="Classes") {
1541 if (DataInfo().GetNClasses()==0) ReadClassesFromXML(ch);
1542 }
1543 else if (nodeName=="Targets") {
1544 if (DataInfo().GetNTargets()==0 && DoRegression()) ReadTargetsFromXML(ch);
1545 }
1546 else if (nodeName=="Transformations") {
1547 GetTransformationHandler().ReadFromXML(ch);
1548 }
1549 else if (nodeName=="MVAPdfs") {
1550 TString pdfname;
1551 if (fMVAPdfS) { delete fMVAPdfS; fMVAPdfS=0; }
1552 if (fMVAPdfB) { delete fMVAPdfB; fMVAPdfB=0; }
1553 void* pdfnode = gTools().GetChild(ch);
1554 if (pdfnode) {
1555 gTools().ReadAttr(pdfnode, "Name", pdfname);
1556 fMVAPdfS = new PDF(pdfname);
1557 fMVAPdfS->ReadXML(pdfnode);
1558 pdfnode = gTools().GetNextChild(pdfnode);
1559 gTools().ReadAttr(pdfnode, "Name", pdfname);
1560 fMVAPdfB = new PDF(pdfname);
1561 fMVAPdfB->ReadXML(pdfnode);
1562 }
1563 }
1564 else if (nodeName=="Weights") {
1565 ReadWeightsFromXML(ch);
1566 }
1567 else {
1568 Log() << kWARNING <<Form("Dataset[%s] : ",DataInfo().GetName())<< "Unparsed XML node: '" << nodeName << "'" << Endl;
1569 }
1570 ch = gTools().GetNextChild(ch);
1571
1572 }
1573
1574 // update transformation handler
1575 if (GetTransformationHandler().GetCallerName() == "") GetTransformationHandler().SetCallerName( GetName() );
1576}
1577
1578////////////////////////////////////////////////////////////////////////////////
1579/// read the header from the weight files of the different MVA methods
1580
1582{
1583 char buf[512];
1584
1585 // when reading from stream, we assume the files are produced with TMVA<=397
1586 SetAnalysisType(Types::kClassification);
1587
1588
1589 // first read the method name
1590 GetLine(fin,buf);
1591 while (!TString(buf).BeginsWith("Method")) GetLine(fin,buf);
1592 TString namestr(buf);
1593
1594 TString methodType = namestr(0,namestr.Index("::"));
1595 methodType = methodType(methodType.Last(' '),methodType.Length());
1596 methodType = methodType.Strip(TString::kLeading);
1597
1598 TString methodName = namestr(namestr.Index("::")+2,namestr.Length());
1599 methodName = methodName.Strip(TString::kLeading);
1600 if (methodName == "") methodName = methodType;
1601 fMethodName = methodName;
1602
1603 Log() << kINFO <<Form("Dataset[%s] : ",DataInfo().GetName())<< "Read method \"" << GetMethodName() << "\" of type \"" << GetMethodTypeName() << "\"" << Endl;
1604
1605 // update logger
1606 Log().SetSource( GetName() );
1607
1608 // now the question is whether to read the variables first or the options (well, of course the order
1609 // of writing them needs to agree)
1610 //
1611 // the option "Decorrelation" is needed to decide if the variables we
1612 // read are decorrelated or not
1613 //
1614 // the variables are needed by some methods (TMLP) to build the NN
1615 // which is done in ProcessOptions so for the time being we first Read and Parse the options then
1616 // we read the variables, and then we process the options
1617
1618 // now read all options
1619 GetLine(fin,buf);
1620 while (!TString(buf).BeginsWith("#OPT")) GetLine(fin,buf);
1621 ReadOptionsFromStream(fin);
1622 ParseOptions();
1623
1624 // Now read variable info
1625 fin.getline(buf,512);
1626 while (!TString(buf).BeginsWith("#VAR")) fin.getline(buf,512);
1627 ReadVarsFromStream(fin);
1628
1629 // now we process the options (of the derived class)
1630 ProcessOptions();
1631
1632 if (IsNormalised()) {
1634 GetTransformationHandler().AddTransformation( new VariableNormalizeTransform(DataInfo()), -1 );
1635 norm->BuildTransformationFromVarInfo( DataInfo().GetVariableInfos() );
1636 }
1637 VariableTransformBase *varTrafo(0), *varTrafo2(0);
1638 if ( fVarTransformString == "None") {
1639 if (fUseDecorr)
1640 varTrafo = GetTransformationHandler().AddTransformation( new VariableDecorrTransform(DataInfo()), -1 );
1641 } else if ( fVarTransformString == "Decorrelate" ) {
1642 varTrafo = GetTransformationHandler().AddTransformation( new VariableDecorrTransform(DataInfo()), -1 );
1643 } else if ( fVarTransformString == "PCA" ) {
1644 varTrafo = GetTransformationHandler().AddTransformation( new VariablePCATransform(DataInfo()), -1 );
1645 } else if ( fVarTransformString == "Uniform" ) {
1646 varTrafo = GetTransformationHandler().AddTransformation( new VariableGaussTransform(DataInfo(),"Uniform"), -1 );
1647 } else if ( fVarTransformString == "Gauss" ) {
1648 varTrafo = GetTransformationHandler().AddTransformation( new VariableGaussTransform(DataInfo()), -1 );
1649 } else if ( fVarTransformString == "GaussDecorr" ) {
1650 varTrafo = GetTransformationHandler().AddTransformation( new VariableGaussTransform(DataInfo()), -1 );
1651 varTrafo2 = GetTransformationHandler().AddTransformation( new VariableDecorrTransform(DataInfo()), -1 );
1652 } else {
1653 Log() << kFATAL <<Form("Dataset[%s] : ",DataInfo().GetName())<< "<ProcessOptions> Variable transform '"
1654 << fVarTransformString << "' unknown." << Endl;
1655 }
1656 // Now read decorrelation matrix if available
1657 if (GetTransformationHandler().GetTransformationList().GetSize() > 0) {
1658 fin.getline(buf,512);
1659 while (!TString(buf).BeginsWith("#MAT")) fin.getline(buf,512);
1660 if (varTrafo) {
1661 TString trafo(fVariableTransformTypeString); trafo.ToLower();
1662 varTrafo->ReadTransformationFromStream(fin, trafo );
1663 }
1664 if (varTrafo2) {
1665 TString trafo(fVariableTransformTypeString); trafo.ToLower();
1666 varTrafo2->ReadTransformationFromStream(fin, trafo );
1667 }
1668 }
1669
1670
1671 if (HasMVAPdfs()) {
1672 // Now read the MVA PDFs
1673 fin.getline(buf,512);
1674 while (!TString(buf).BeginsWith("#MVAPDFS")) fin.getline(buf,512);
1675 if (fMVAPdfS != 0) { delete fMVAPdfS; fMVAPdfS = 0; }
1676 if (fMVAPdfB != 0) { delete fMVAPdfB; fMVAPdfB = 0; }
1677 fMVAPdfS = new PDF(TString(GetName()) + " MVA PDF Sig");
1678 fMVAPdfB = new PDF(TString(GetName()) + " MVA PDF Bkg");
1679 fMVAPdfS->SetReadingVersion( GetTrainingTMVAVersionCode() );
1680 fMVAPdfB->SetReadingVersion( GetTrainingTMVAVersionCode() );
1681
1682 fin >> *fMVAPdfS;
1683 fin >> *fMVAPdfB;
1684 }
1685
1686 // Now read weights
1687 fin.getline(buf,512);
1688 while (!TString(buf).BeginsWith("#WGT")) fin.getline(buf,512);
1689 fin.getline(buf,512);
1690 ReadWeightsFromStream( fin );;
1691
1692 // update transformation handler
1693 if (GetTransformationHandler().GetCallerName() == "") GetTransformationHandler().SetCallerName( GetName() );
1694
1695}
1696
1697////////////////////////////////////////////////////////////////////////////////
1698/// write the list of variables (name, min, max) for a given data
1699/// transformation method to the stream
1700
1701void TMVA::MethodBase::WriteVarsToStream( std::ostream& o, const TString& prefix ) const
1702{
1703 o << prefix << "NVar " << DataInfo().GetNVariables() << std::endl;
1704 std::vector<VariableInfo>::const_iterator varIt = DataInfo().GetVariableInfos().begin();
1705 for (; varIt!=DataInfo().GetVariableInfos().end(); ++varIt) { o << prefix; varIt->WriteToStream(o); }
1706 o << prefix << "NSpec " << DataInfo().GetNSpectators() << std::endl;
1707 varIt = DataInfo().GetSpectatorInfos().begin();
1708 for (; varIt!=DataInfo().GetSpectatorInfos().end(); ++varIt) { o << prefix; varIt->WriteToStream(o); }
1709}
1710
1711////////////////////////////////////////////////////////////////////////////////
1712/// Read the variables (name, min, max) for a given data
1713/// transformation method from the stream. In the stream we only
1714/// expect the limits which will be set
1715
1717{
1718 TString dummy;
1719 UInt_t readNVar;
1720 istr >> dummy >> readNVar;
1721
1722 if (readNVar!=DataInfo().GetNVariables()) {
1723 Log() << kFATAL <<Form("Dataset[%s] : ",DataInfo().GetName())<< "You declared "<< DataInfo().GetNVariables() << " variables in the Reader"
1724 << " while there are " << readNVar << " variables declared in the file"
1725 << Endl;
1726 }
1727
1728 // we want to make sure all variables are read in the order they are defined
1729 VariableInfo varInfo;
1730 std::vector<VariableInfo>::iterator varIt = DataInfo().GetVariableInfos().begin();
1731 int varIdx = 0;
1732 for (; varIt!=DataInfo().GetVariableInfos().end(); ++varIt, ++varIdx) {
1733 varInfo.ReadFromStream(istr);
1734 if (varIt->GetExpression() == varInfo.GetExpression()) {
1735 varInfo.SetExternalLink((*varIt).GetExternalLink());
1736 (*varIt) = varInfo;
1737 }
1738 else {
1739 Log() << kINFO <<Form("Dataset[%s] : ",DataInfo().GetName())<< "ERROR in <ReadVarsFromStream>" << Endl;
1740 Log() << kINFO << "The definition (or the order) of the variables found in the input file is" << Endl;
1741 Log() << kINFO << "is not the same as the one declared in the Reader (which is necessary for" << Endl;
1742 Log() << kINFO << "the correct working of the method):" << Endl;
1743 Log() << kINFO << " var #" << varIdx <<" declared in Reader: " << varIt->GetExpression() << Endl;
1744 Log() << kINFO << " var #" << varIdx <<" declared in file : " << varInfo.GetExpression() << Endl;
1745 Log() << kFATAL << "The expression declared to the Reader needs to be checked (name or order are wrong)" << Endl;
1746 }
1747 }
1748}
1749
1750////////////////////////////////////////////////////////////////////////////////
1751/// write variable info to XML
1752
1753void TMVA::MethodBase::AddVarsXMLTo( void* parent ) const
1754{
1755 void* vars = gTools().AddChild(parent, "Variables");
1756 gTools().AddAttr( vars, "NVar", gTools().StringFromInt(DataInfo().GetNVariables()) );
1757
1758 for (UInt_t idx=0; idx<DataInfo().GetVariableInfos().size(); idx++) {
1759 VariableInfo& vi = DataInfo().GetVariableInfos()[idx];
1760 void* var = gTools().AddChild( vars, "Variable" );
1761 gTools().AddAttr( var, "VarIndex", idx );
1762 vi.AddToXML( var );
1763 }
1764}
1765
1766////////////////////////////////////////////////////////////////////////////////
1767/// write spectator info to XML
1768
1770{
1771 void* specs = gTools().AddChild(parent, "Spectators");
1772
1773 UInt_t writeIdx=0;
1774 for (UInt_t idx=0; idx<DataInfo().GetSpectatorInfos().size(); idx++) {
1775
1776 VariableInfo& vi = DataInfo().GetSpectatorInfos()[idx];
1777
1778 // we do not want to write spectators that are category-cuts,
1779 // except if the method is the category method and the spectators belong to it
1780 if (vi.GetVarType()=='C') continue;
1781
1782 void* spec = gTools().AddChild( specs, "Spectator" );
1783 gTools().AddAttr( spec, "SpecIndex", writeIdx++ );
1784 vi.AddToXML( spec );
1785 }
1786 gTools().AddAttr( specs, "NSpec", gTools().StringFromInt(writeIdx) );
1787}
1788
1789////////////////////////////////////////////////////////////////////////////////
1790/// write class info to XML
1791
1792void TMVA::MethodBase::AddClassesXMLTo( void* parent ) const
1793{
1794 UInt_t nClasses=DataInfo().GetNClasses();
1795
1796 void* classes = gTools().AddChild(parent, "Classes");
1797 gTools().AddAttr( classes, "NClass", nClasses );
1798
1799 for (UInt_t iCls=0; iCls<nClasses; ++iCls) {
1800 ClassInfo *classInfo=DataInfo().GetClassInfo (iCls);
1801 TString className =classInfo->GetName();
1802 UInt_t classNumber=classInfo->GetNumber();
1803
1804 void* classNode=gTools().AddChild(classes, "Class");
1805 gTools().AddAttr( classNode, "Name", className );
1806 gTools().AddAttr( classNode, "Index", classNumber );
1807 }
1808}
1809////////////////////////////////////////////////////////////////////////////////
1810/// write target info to XML
1811
1812void TMVA::MethodBase::AddTargetsXMLTo( void* parent ) const
1813{
1814 void* targets = gTools().AddChild(parent, "Targets");
1815 gTools().AddAttr( targets, "NTrgt", gTools().StringFromInt(DataInfo().GetNTargets()) );
1816
1817 for (UInt_t idx=0; idx<DataInfo().GetTargetInfos().size(); idx++) {
1818 VariableInfo& vi = DataInfo().GetTargetInfos()[idx];
1819 void* tar = gTools().AddChild( targets, "Target" );
1820 gTools().AddAttr( tar, "TargetIndex", idx );
1821 vi.AddToXML( tar );
1822 }
1823}
1824
1825////////////////////////////////////////////////////////////////////////////////
1826/// read variable info from XML
1827
1829{
1830 UInt_t readNVar;
1831 gTools().ReadAttr( varnode, "NVar", readNVar);
1832
1833 if (readNVar!=DataInfo().GetNVariables()) {
1834 Log() << kFATAL <<Form("Dataset[%s] : ",DataInfo().GetName())<< "You declared "<< DataInfo().GetNVariables() << " variables in the Reader"
1835 << " while there are " << readNVar << " variables declared in the file"
1836 << Endl;
1837 }
1838
1839 // we want to make sure all variables are read in the order they are defined
1840 VariableInfo readVarInfo, existingVarInfo;
1841 int varIdx = 0;
1842 void* ch = gTools().GetChild(varnode);
1843 while (ch) {
1844 gTools().ReadAttr( ch, "VarIndex", varIdx);
1845 existingVarInfo = DataInfo().GetVariableInfos()[varIdx];
1846 readVarInfo.ReadFromXML(ch);
1847
1848 if (existingVarInfo.GetExpression() == readVarInfo.GetExpression()) {
1849 readVarInfo.SetExternalLink(existingVarInfo.GetExternalLink());
1850 existingVarInfo = readVarInfo;
1851 }
1852 else {
1853 Log() << kINFO <<Form("Dataset[%s] : ",DataInfo().GetName())<< "ERROR in <ReadVariablesFromXML>" << Endl;
1854 Log() << kINFO << "The definition (or the order) of the variables found in the input file is" << Endl;
1855 Log() << kINFO << "not the same as the one declared in the Reader (which is necessary for the" << Endl;
1856 Log() << kINFO << "correct working of the method):" << Endl;
1857 Log() << kINFO << " var #" << varIdx <<" declared in Reader: " << existingVarInfo.GetExpression() << Endl;
1858 Log() << kINFO << " var #" << varIdx <<" declared in file : " << readVarInfo.GetExpression() << Endl;
1859 Log() << kFATAL << "The expression declared to the Reader needs to be checked (name or order are wrong)" << Endl;
1860 }
1861 ch = gTools().GetNextChild(ch);
1862 }
1863}
1864
1865////////////////////////////////////////////////////////////////////////////////
1866/// read spectator info from XML
1867
1869{
1870 UInt_t readNSpec;
1871 gTools().ReadAttr( specnode, "NSpec", readNSpec);
1872
1873 if (readNSpec!=DataInfo().GetNSpectators(kFALSE)) {
1874 Log() << kFATAL<<Form("Dataset[%s] : ",DataInfo().GetName()) << "You declared "<< DataInfo().GetNSpectators(kFALSE) << " spectators in the Reader"
1875 << " while there are " << readNSpec << " spectators declared in the file"
1876 << Endl;
1877 }
1878
1879 // we want to make sure all variables are read in the order they are defined
1880 VariableInfo readSpecInfo, existingSpecInfo;
1881 int specIdx = 0;
1882 void* ch = gTools().GetChild(specnode);
1883 while (ch) {
1884 gTools().ReadAttr( ch, "SpecIndex", specIdx);
1885 existingSpecInfo = DataInfo().GetSpectatorInfos()[specIdx];
1886 readSpecInfo.ReadFromXML(ch);
1887
1888 if (existingSpecInfo.GetExpression() == readSpecInfo.GetExpression()) {
1889 readSpecInfo.SetExternalLink(existingSpecInfo.GetExternalLink());
1890 existingSpecInfo = readSpecInfo;
1891 }
1892 else {
1893 Log() << kINFO <<Form("Dataset[%s] : ",DataInfo().GetName())<< "ERROR in <ReadSpectatorsFromXML>" << Endl;
1894 Log() << kINFO << "The definition (or the order) of the spectators found in the input file is" << Endl;
1895 Log() << kINFO << "not the same as the one declared in the Reader (which is necessary for the" << Endl;
1896 Log() << kINFO << "correct working of the method):" << Endl;
1897 Log() << kINFO << " spec #" << specIdx <<" declared in Reader: " << existingSpecInfo.GetExpression() << Endl;
1898 Log() << kINFO << " spec #" << specIdx <<" declared in file : " << readSpecInfo.GetExpression() << Endl;
1899 Log() << kFATAL << "The expression declared to the Reader needs to be checked (name or order are wrong)" << Endl;
1900 }
1901 ch = gTools().GetNextChild(ch);
1902 }
1903}
1904
1905////////////////////////////////////////////////////////////////////////////////
1906/// read number of classes from XML
1907
1909{
1910 UInt_t readNCls;
1911 // coverity[tainted_data_argument]
1912 gTools().ReadAttr( clsnode, "NClass", readNCls);
1913
1914 TString className="";
1915 UInt_t classIndex=0;
1916 void* ch = gTools().GetChild(clsnode);
1917 if (!ch) {
1918 for (UInt_t icls = 0; icls<readNCls;++icls) {
1919 TString classname = Form("class%i",icls);
1920 DataInfo().AddClass(classname);
1921
1922 }
1923 }
1924 else{
1925 while (ch) {
1926 gTools().ReadAttr( ch, "Index", classIndex);
1927 gTools().ReadAttr( ch, "Name", className );
1928 DataInfo().AddClass(className);
1929
1930 ch = gTools().GetNextChild(ch);
1931 }
1932 }
1933
1934 // retrieve signal and background class index
1935 if (DataInfo().GetClassInfo("Signal") != 0) {
1936 fSignalClass = DataInfo().GetClassInfo("Signal")->GetNumber();
1937 }
1938 else
1939 fSignalClass=0;
1940 if (DataInfo().GetClassInfo("Background") != 0) {
1941 fBackgroundClass = DataInfo().GetClassInfo("Background")->GetNumber();
1942 }
1943 else
1944 fBackgroundClass=1;
1945}
1946
1947////////////////////////////////////////////////////////////////////////////////
1948/// read target info from XML
1949
1951{
1952 UInt_t readNTar;
1953 gTools().ReadAttr( tarnode, "NTrgt", readNTar);
1954
1955 int tarIdx = 0;
1956 TString expression;
1957 void* ch = gTools().GetChild(tarnode);
1958 while (ch) {
1959 gTools().ReadAttr( ch, "TargetIndex", tarIdx);
1960 gTools().ReadAttr( ch, "Expression", expression);
1961 DataInfo().AddTarget(expression,"","",0,0);
1962
1963 ch = gTools().GetNextChild(ch);
1964 }
1965}
1966
1967////////////////////////////////////////////////////////////////////////////////
1968/// returns the ROOT directory where info/histograms etc of the
1969/// corresponding MVA method instance are stored
1970
1972{
1973 if (fBaseDir != 0) return fBaseDir;
1974 Log()<<kDEBUG<<Form("Dataset[%s] : ",DataInfo().GetName())<<" Base Directory for " << GetMethodName() << " not set yet --> check if already there.." <<Endl;
1975
1976 if (IsSilentFile()) {
1977 Log() << kFATAL << Form("Dataset[%s] : ", DataInfo().GetName())
1978 << "MethodBase::BaseDir() - No directory exists when running a Method without output file. Enable the "
1979 "output when creating the factory"
1980 << Endl;
1981 }
1982
1983 TDirectory* methodDir = MethodBaseDir();
1984 if (methodDir==0)
1985 Log() << kFATAL <<Form("Dataset[%s] : ",DataInfo().GetName())<< "MethodBase::BaseDir() - MethodBaseDir() return a NULL pointer!" << Endl;
1986
1987 TString defaultDir = GetMethodName();
1988 TDirectory *sdir = methodDir->GetDirectory(defaultDir.Data());
1989 if(!sdir)
1990 {
1991 Log()<<kDEBUG<<Form("Dataset[%s] : ",DataInfo().GetName())<<" Base Directory for " << GetMethodTypeName() << " does not exist yet--> created it" <<Endl;
1992 sdir = methodDir->mkdir(defaultDir);
1993 sdir->cd();
1994 // write weight file name into target file
1995 if (fModelPersistence) {
1996 TObjString wfilePath( gSystem->WorkingDirectory() );
1997 TObjString wfileName( GetWeightFileName() );
1998 wfilePath.Write( "TrainingPath" );
1999 wfileName.Write( "WeightFileName" );
2000 }
2001 }
2002
2003 Log()<<kDEBUG<<Form("Dataset[%s] : ",DataInfo().GetName())<<" Base Directory for " << GetMethodTypeName() << " existed, return it.." <<Endl;
2004 return sdir;
2005}
2006
2007////////////////////////////////////////////////////////////////////////////////
2008/// returns the ROOT directory where all instances of the
2009/// corresponding MVA method are stored
2010
2012{
2013 if (fMethodBaseDir != 0) {
2014 return fMethodBaseDir;
2015 }
2016
2017 const char *datasetName = DataInfo().GetName();
2018
2019 Log() << kDEBUG << Form("Dataset[%s] : ", datasetName) << " Base Directory for " << GetMethodTypeName()
2020 << " not set yet --> check if already there.." << Endl;
2021
2022 TDirectory *factoryBaseDir = GetFile();
2023 if (!factoryBaseDir) return nullptr;
2024 fMethodBaseDir = factoryBaseDir->GetDirectory(datasetName);
2025 if (!fMethodBaseDir) {
2026 fMethodBaseDir = factoryBaseDir->mkdir(datasetName, Form("Base directory for dataset %s", datasetName));
2027 if (!fMethodBaseDir) {
2028 Log() << kFATAL << "Can not create dir " << datasetName;
2029 }
2030 }
2031 TString methodTypeDir = Form("Method_%s", GetMethodTypeName().Data());
2032 fMethodBaseDir = fMethodBaseDir->GetDirectory(methodTypeDir.Data());
2033
2034 if (!fMethodBaseDir) {
2035 TDirectory *datasetDir = factoryBaseDir->GetDirectory(datasetName);
2036 TString methodTypeDirHelpStr = Form("Directory for all %s methods", GetMethodTypeName().Data());
2037 fMethodBaseDir = datasetDir->mkdir(methodTypeDir.Data(), methodTypeDirHelpStr);
2038 Log() << kDEBUG << Form("Dataset[%s] : ", datasetName) << " Base Directory for " << GetMethodName()
2039 << " does not exist yet--> created it" << Endl;
2040 }
2041
2042 Log() << kDEBUG << Form("Dataset[%s] : ", datasetName)
2043 << "Return from MethodBaseDir() after creating base directory " << Endl;
2044 return fMethodBaseDir;
2045}
2046
2047////////////////////////////////////////////////////////////////////////////////
2048/// set directory of weight file
2049
2051{
2052 fFileDir = fileDir;
2053 gSystem->mkdir( fFileDir, kTRUE );
2054}
2055
2056////////////////////////////////////////////////////////////////////////////////
2057/// set the weight file name (depreciated)
2058
2060{
2061 fWeightFile = theWeightFile;
2062}
2063
2064////////////////////////////////////////////////////////////////////////////////
2065/// retrieve weight file name
2066
2068{
2069 if (fWeightFile!="") return fWeightFile;
2070
2071 // the default consists of
2072 // directory/jobname_methodname_suffix.extension.{root/txt}
2073 TString suffix = "";
2074 TString wFileDir(GetWeightFileDir());
2075 TString wFileName = GetJobName() + "_" + GetMethodName() +
2076 suffix + "." + gConfig().GetIONames().fWeightFileExtension + ".xml";
2077 if (wFileDir.IsNull() ) return wFileName;
2078 // add weight file directory of it is not null
2079 return ( wFileDir + (wFileDir[wFileDir.Length()-1]=='/' ? "" : "/")
2080 + wFileName );
2081}
2082////////////////////////////////////////////////////////////////////////////////
2083/// writes all MVA evaluation histograms to file
2084
2086{
2087 BaseDir()->cd();
2088
2089
2090 // write MVA PDFs to file - if exist
2091 if (0 != fMVAPdfS) {
2092 fMVAPdfS->GetOriginalHist()->Write();
2093 fMVAPdfS->GetSmoothedHist()->Write();
2094 fMVAPdfS->GetPDFHist()->Write();
2095 }
2096 if (0 != fMVAPdfB) {
2097 fMVAPdfB->GetOriginalHist()->Write();
2098 fMVAPdfB->GetSmoothedHist()->Write();
2099 fMVAPdfB->GetPDFHist()->Write();
2100 }
2101
2102 // write result-histograms
2103 Results* results = Data()->GetResults( GetMethodName(), treetype, Types::kMaxAnalysisType );
2104 if (!results)
2105 Log() << kFATAL <<Form("Dataset[%s] : ",DataInfo().GetName())<< "<WriteEvaluationHistosToFile> Unknown result: "
2106 << GetMethodName() << (treetype==Types::kTraining?"/kTraining":"/kTesting")
2107 << "/kMaxAnalysisType" << Endl;
2108 results->GetStorage()->Write();
2109 if (treetype==Types::kTesting) {
2110 // skipping plotting of variables if too many (default is 200)
2111 if ((int) DataInfo().GetNVariables()< gConfig().GetVariablePlotting().fMaxNumOfAllowedVariables)
2112 GetTransformationHandler().PlotVariables (GetEventCollection( Types::kTesting ), BaseDir() );
2113 else
2114 Log() << kINFO << TString::Format("Dataset[%s] : ",DataInfo().GetName())
2115 << " variable plots are not produces ! The number of variables is " << DataInfo().GetNVariables()
2116 << " , it is larger than " << gConfig().GetVariablePlotting().fMaxNumOfAllowedVariables << Endl;
2117 }
2118}
2119
2120////////////////////////////////////////////////////////////////////////////////
2121/// write special monitoring histograms to file
2122/// dummy implementation here -----------------
2123
2125{
2126}
2127
2128////////////////////////////////////////////////////////////////////////////////
2129/// reads one line from the input stream
2130/// checks for certain keywords and interprets
2131/// the line if keywords are found
2132
2133Bool_t TMVA::MethodBase::GetLine(std::istream& fin, char* buf )
2134{
2135 fin.getline(buf,512);
2136 TString line(buf);
2137 if (line.BeginsWith("TMVA Release")) {
2138 Ssiz_t start = line.First('[')+1;
2139 Ssiz_t length = line.Index("]",start)-start;
2140 TString code = line(start,length);
2141 std::stringstream s(code.Data());
2142 s >> fTMVATrainingVersion;
2143 Log() << kINFO <<Form("Dataset[%s] : ",DataInfo().GetName())<< "MVA method was trained with TMVA Version: " << GetTrainingTMVAVersionString() << Endl;
2144 }
2145 if (line.BeginsWith("ROOT Release")) {
2146 Ssiz_t start = line.First('[')+1;
2147 Ssiz_t length = line.Index("]",start)-start;
2148 TString code = line(start,length);
2149 std::stringstream s(code.Data());
2150 s >> fROOTTrainingVersion;
2151 Log() << kINFO <<Form("Dataset[%s] : ",DataInfo().GetName())<< "MVA method was trained with ROOT Version: " << GetTrainingROOTVersionString() << Endl;
2152 }
2153 if (line.BeginsWith("Analysis type")) {
2154 Ssiz_t start = line.First('[')+1;
2155 Ssiz_t length = line.Index("]",start)-start;
2156 TString code = line(start,length);
2157 std::stringstream s(code.Data());
2158 std::string analysisType;
2159 s >> analysisType;
2160 if (analysisType == "regression" || analysisType == "Regression") SetAnalysisType( Types::kRegression );
2161 else if (analysisType == "classification" || analysisType == "Classification") SetAnalysisType( Types::kClassification );
2162 else if (analysisType == "multiclass" || analysisType == "Multiclass") SetAnalysisType( Types::kMulticlass );
2163 else Log() << kFATAL << "Analysis type " << analysisType << " from weight-file not known!" << std::endl;
2164
2165 Log() << kINFO <<Form("Dataset[%s] : ",DataInfo().GetName())<< "Method was trained for "
2166 << (GetAnalysisType() == Types::kRegression ? "Regression" :
2167 (GetAnalysisType() == Types::kMulticlass ? "Multiclass" : "Classification")) << Endl;
2168 }
2169
2170 return true;
2171}
2172
2173////////////////////////////////////////////////////////////////////////////////
2174/// Create PDFs of the MVA output variables
2175
2177{
2178 Data()->SetCurrentType(Types::kTraining);
2179
2180 // the PDF's are stored as results ONLY if the corresponding "results" are booked,
2181 // otherwise they will be only used 'online'
2182 ResultsClassification * mvaRes = dynamic_cast<ResultsClassification*>
2183 ( Data()->GetResults(GetMethodName(), Types::kTraining, Types::kClassification) );
2184
2185 if (mvaRes==0 || mvaRes->GetSize()==0) {
2186 Log() << kERROR<<Form("Dataset[%s] : ",DataInfo().GetName())<< "<CreateMVAPdfs> No result of classifier testing available" << Endl;
2187 }
2188
2189 Double_t minVal = *std::min_element(mvaRes->GetValueVector()->begin(),mvaRes->GetValueVector()->end());
2190 Double_t maxVal = *std::max_element(mvaRes->GetValueVector()->begin(),mvaRes->GetValueVector()->end());
2191
2192 // create histograms that serve as basis to create the MVA Pdfs
2193 TH1* histMVAPdfS = new TH1D( GetMethodTypeName() + "_tr_S", GetMethodTypeName() + "_tr_S",
2194 fMVAPdfS->GetHistNBins( mvaRes->GetSize() ), minVal, maxVal );
2195 TH1* histMVAPdfB = new TH1D( GetMethodTypeName() + "_tr_B", GetMethodTypeName() + "_tr_B",
2196 fMVAPdfB->GetHistNBins( mvaRes->GetSize() ), minVal, maxVal );
2197
2198
2199 // compute sum of weights properly
2200 histMVAPdfS->Sumw2();
2201 histMVAPdfB->Sumw2();
2202
2203 // fill histograms
2204 for (UInt_t ievt=0; ievt<mvaRes->GetSize(); ievt++) {
2205 Double_t theVal = mvaRes->GetValueVector()->at(ievt);
2206 Double_t theWeight = Data()->GetEvent(ievt)->GetWeight();
2207
2208 if (DataInfo().IsSignal(Data()->GetEvent(ievt))) histMVAPdfS->Fill( theVal, theWeight );
2209 else histMVAPdfB->Fill( theVal, theWeight );
2210 }
2211
2212 gTools().NormHist( histMVAPdfS );
2213 gTools().NormHist( histMVAPdfB );
2214
2215 // momentary hack for ROOT problem
2216 if(!IsSilentFile())
2217 {
2218 histMVAPdfS->Write();
2219 histMVAPdfB->Write();
2220 }
2221 // create PDFs
2222 fMVAPdfS->BuildPDF ( histMVAPdfS );
2223 fMVAPdfB->BuildPDF ( histMVAPdfB );
2224 fMVAPdfS->ValidatePDF( histMVAPdfS );
2225 fMVAPdfB->ValidatePDF( histMVAPdfB );
2226
2227 if (DataInfo().GetNClasses() == 2) { // TODO: this is an ugly hack.. adapt this to new framework
2228 Log() << kINFO<<Form("Dataset[%s] : ",DataInfo().GetName())
2229 << Form( "<CreateMVAPdfs> Separation from histogram (PDF): %1.3f (%1.3f)",
2230 GetSeparation( histMVAPdfS, histMVAPdfB ), GetSeparation( fMVAPdfS, fMVAPdfB ) )
2231 << Endl;
2232 }
2233
2234 delete histMVAPdfS;
2235 delete histMVAPdfB;
2236}
2237
2239 // the simple one, automatically calculates the mvaVal and uses the
2240 // SAME sig/bkg ratio as given in the training sample (typically 50/50
2241 // .. (NormMode=EqualNumEvents) but can be different)
2242 if (!fMVAPdfS || !fMVAPdfB) {
2243 Log() << kINFO<<Form("Dataset[%s] : ",DataInfo().GetName()) << "<GetProba> MVA PDFs for Signal and Background don't exist yet, we'll create them on demand" << Endl;
2244 CreateMVAPdfs();
2245 }
2246 Double_t sigFraction = DataInfo().GetTrainingSumSignalWeights() / (DataInfo().GetTrainingSumSignalWeights() + DataInfo().GetTrainingSumBackgrWeights() );
2247 Double_t mvaVal = GetMvaValue(ev);
2248
2249 return GetProba(mvaVal,sigFraction);
2250
2251}
2252////////////////////////////////////////////////////////////////////////////////
2253/// compute likelihood ratio
2254
2256{
2257 if (!fMVAPdfS || !fMVAPdfB) {
2258 Log() << kWARNING <<Form("Dataset[%s] : ",DataInfo().GetName())<< "<GetProba> MVA PDFs for Signal and Background don't exist" << Endl;
2259 return -1.0;
2260 }
2261 Double_t p_s = fMVAPdfS->GetVal( mvaVal );
2262 Double_t p_b = fMVAPdfB->GetVal( mvaVal );
2263
2264 Double_t denom = p_s*ap_sig + p_b*(1 - ap_sig);
2265
2266 return (denom > 0) ? (p_s*ap_sig) / denom : -1;
2267}
2268
2269////////////////////////////////////////////////////////////////////////////////
2270/// compute rarity:
2271/// \f[
2272/// R(x) = \int_{[-\infty..x]} { PDF(x') dx' }
2273/// \f]
2274/// where PDF(x) is the PDF of the classifier's signal or background distribution
2275
2277{
2278 if ((reftype == Types::kSignal && !fMVAPdfS) || (reftype == Types::kBackground && !fMVAPdfB)) {
2279 Log() << kWARNING <<Form("Dataset[%s] : ",DataInfo().GetName())<< "<GetRarity> Required MVA PDF for Signal or Background does not exist: "
2280 << "select option \"CreateMVAPdfs\"" << Endl;
2281 return 0.0;
2282 }
2283
2284 PDF* thePdf = ((reftype == Types::kSignal) ? fMVAPdfS : fMVAPdfB);
2285
2286 return thePdf->GetIntegral( thePdf->GetXmin(), mvaVal );
2287}
2288
2289////////////////////////////////////////////////////////////////////////////////
2290/// fill background efficiency (resp. rejection) versus signal efficiency plots
2291/// returns signal efficiency at background efficiency indicated in theString
2292
2294{
2295 Data()->SetCurrentType(type);
2296 Results* results = Data()->GetResults( GetMethodName(), type, Types::kClassification );
2297 std::vector<Float_t>* mvaRes = dynamic_cast<ResultsClassification*>(results)->GetValueVector();
2298
2299 // parse input string for required background efficiency
2300 TList* list = gTools().ParseFormatLine( theString );
2301
2302 // sanity check
2303 Bool_t computeArea = kFALSE;
2304 if (!list || list->GetSize() < 2) computeArea = kTRUE; // the area is computed
2305 else if (list->GetSize() > 2) {
2306 Log() << kFATAL <<Form("Dataset[%s] : ",DataInfo().GetName())<< "<GetEfficiency> Wrong number of arguments"
2307 << " in string: " << theString
2308 << " | required format, e.g., Efficiency:0.05, or empty string" << Endl;
2309 delete list;
2310 return -1;
2311 }
2312
2313 // sanity check
2314 if ( results->GetHist("MVA_S")->GetNbinsX() != results->GetHist("MVA_B")->GetNbinsX() ||
2315 results->GetHist("MVA_HIGHBIN_S")->GetNbinsX() != results->GetHist("MVA_HIGHBIN_B")->GetNbinsX() ) {
2316 Log() << kFATAL <<Form("Dataset[%s] : ",DataInfo().GetName())<< "<GetEfficiency> Binning mismatch between signal and background histos" << Endl;
2317 delete list;
2318 return -1.0;
2319 }
2320
2321 // create histograms
2322
2323 // first, get efficiency histograms for signal and background
2324 TH1 * effhist = results->GetHist("MVA_HIGHBIN_S");
2325 Double_t xmin = effhist->GetXaxis()->GetXmin();
2326 Double_t xmax = effhist->GetXaxis()->GetXmax();
2327
2328 TTHREAD_TLS(Double_t) nevtS;
2329
2330 // first round ? --> create histograms
2331 if (results->DoesExist("MVA_EFF_S")==0) {
2332
2333 // for efficiency plot
2334 TH1* eff_s = new TH1D( GetTestvarName() + "_effS", GetTestvarName() + " (signal)", fNbinsH, xmin, xmax );
2335 TH1* eff_b = new TH1D( GetTestvarName() + "_effB", GetTestvarName() + " (background)", fNbinsH, xmin, xmax );
2336 results->Store(eff_s, "MVA_EFF_S");
2337 results->Store(eff_b, "MVA_EFF_B");
2338
2339 // sign if cut
2340 Int_t sign = (fCutOrientation == kPositive) ? +1 : -1;
2341
2342 // this method is unbinned
2343 nevtS = 0;
2344 for (UInt_t ievt=0; ievt<Data()->GetNEvents(); ievt++) {
2345
2346 // read the tree
2347 Bool_t isSignal = DataInfo().IsSignal(GetEvent(ievt));
2348 Float_t theWeight = GetEvent(ievt)->GetWeight();
2349 Float_t theVal = (*mvaRes)[ievt];
2350
2351 // select histogram depending on if sig or bgd
2352 TH1* theHist = isSignal ? eff_s : eff_b;
2353
2354 // count signal and background events in tree
2355 if (isSignal) nevtS+=theWeight;
2356
2357 TAxis* axis = theHist->GetXaxis();
2358 Int_t maxbin = Int_t((theVal - axis->GetXmin())/(axis->GetXmax() - axis->GetXmin())*fNbinsH) + 1;
2359 if (sign > 0 && maxbin > fNbinsH) continue; // can happen... event doesn't count
2360 if (sign < 0 && maxbin < 1 ) continue; // can happen... event doesn't count
2361 if (sign > 0 && maxbin < 1 ) maxbin = 1;
2362 if (sign < 0 && maxbin > fNbinsH) maxbin = fNbinsH;
2363
2364 if (sign > 0)
2365 for (Int_t ibin=1; ibin<=maxbin; ibin++) theHist->AddBinContent( ibin , theWeight);
2366 else if (sign < 0)
2367 for (Int_t ibin=maxbin+1; ibin<=fNbinsH; ibin++) theHist->AddBinContent( ibin , theWeight );
2368 else
2369 Log() << kFATAL <<Form("Dataset[%s] : ",DataInfo().GetName())<< "<GetEfficiency> Mismatch in sign" << Endl;
2370 }
2371
2372 // renormalise maximum to <=1
2373 // eff_s->Scale( 1.0/TMath::Max(1.,eff_s->GetMaximum()) );
2374 // eff_b->Scale( 1.0/TMath::Max(1.,eff_b->GetMaximum()) );
2375
2378
2379 // background efficiency versus signal efficiency
2380 TH1* eff_BvsS = new TH1D( GetTestvarName() + "_effBvsS", GetTestvarName() + "", fNbins, 0, 1 );
2381 results->Store(eff_BvsS, "MVA_EFF_BvsS");
2382 eff_BvsS->SetXTitle( "Signal eff" );
2383 eff_BvsS->SetYTitle( "Backgr eff" );
2384
2385 // background rejection (=1-eff.) versus signal efficiency
2386 TH1* rej_BvsS = new TH1D( GetTestvarName() + "_rejBvsS", GetTestvarName() + "", fNbins, 0, 1 );
2387 results->Store(rej_BvsS);
2388 rej_BvsS->SetXTitle( "Signal eff" );
2389 rej_BvsS->SetYTitle( "Backgr rejection (1-eff)" );
2390
2391 // inverse background eff (1/eff.) versus signal efficiency
2392 TH1* inveff_BvsS = new TH1D( GetTestvarName() + "_invBeffvsSeff",
2393 GetTestvarName(), fNbins, 0, 1 );
2394 results->Store(inveff_BvsS);
2395 inveff_BvsS->SetXTitle( "Signal eff" );
2396 inveff_BvsS->SetYTitle( "Inverse backgr. eff (1/eff)" );
2397
2398 // use root finder
2399 // spline background efficiency plot
2400 // note that there is a bin shift when going from a TH1D object to a TGraph :-(
2402 fSplRefS = new TSpline1( "spline2_signal", new TGraph( eff_s ) );
2403 fSplRefB = new TSpline1( "spline2_background", new TGraph( eff_b ) );
2404
2405 // verify spline sanity
2406 gTools().CheckSplines( eff_s, fSplRefS );
2407 gTools().CheckSplines( eff_b, fSplRefB );
2408 }
2409
2410 // make the background-vs-signal efficiency plot
2411
2412 // create root finder
2413 RootFinder rootFinder( this, fXmin, fXmax );
2414
2415 Double_t effB = 0;
2416 fEffS = eff_s; // to be set for the root finder
2417 for (Int_t bini=1; bini<=fNbins; bini++) {
2418
2419 // find cut value corresponding to a given signal efficiency
2420 Double_t effS = eff_BvsS->GetBinCenter( bini );
2421 Double_t cut = rootFinder.Root( effS );
2422
2423 // retrieve background efficiency for given cut
2424 if (Use_Splines_for_Eff_) effB = fSplRefB->Eval( cut );
2425 else effB = eff_b->GetBinContent( eff_b->FindBin( cut ) );
2426
2427 // and fill histograms
2428 eff_BvsS->SetBinContent( bini, effB );
2429 rej_BvsS->SetBinContent( bini, 1.0-effB );
2431 inveff_BvsS->SetBinContent( bini, 1.0/effB );
2432 }
2433
2434 // create splines for histogram
2435 fSpleffBvsS = new TSpline1( "effBvsS", new TGraph( eff_BvsS ) );
2436
2437 // search for overlap point where, when cutting on it,
2438 // one would obtain: eff_S = rej_B = 1 - eff_B
2439 Double_t effS = 0., rejB, effS_ = 0., rejB_ = 0.;
2440 Int_t nbins_ = 5000;
2441 for (Int_t bini=1; bini<=nbins_; bini++) {
2442
2443 // get corresponding signal and background efficiencies
2444 effS = (bini - 0.5)/Float_t(nbins_);
2445 rejB = 1.0 - fSpleffBvsS->Eval( effS );
2446
2447 // find signal efficiency that corresponds to required background efficiency
2448 if ((effS - rejB)*(effS_ - rejB_) < 0) break;
2449 effS_ = effS;
2450 rejB_ = rejB;
2451 }
2452
2453 // find cut that corresponds to signal efficiency and update signal-like criterion
2454 Double_t cut = rootFinder.Root( 0.5*(effS + effS_) );
2455 SetSignalReferenceCut( cut );
2456 fEffS = 0;
2457 }
2458
2459 // must exist...
2460 if (0 == fSpleffBvsS) {
2461 delete list;
2462 return 0.0;
2463 }
2464
2465 // now find signal efficiency that corresponds to required background efficiency
2466 Double_t effS = 0, effB = 0, effS_ = 0, effB_ = 0;
2467 Int_t nbins_ = 1000;
2468
2469 if (computeArea) {
2470
2471 // compute area of rej-vs-eff plot
2472 Double_t integral = 0;
2473 for (Int_t bini=1; bini<=nbins_; bini++) {
2474
2475 // get corresponding signal and background efficiencies
2476 effS = (bini - 0.5)/Float_t(nbins_);
2477 effB = fSpleffBvsS->Eval( effS );
2478 integral += (1.0 - effB);
2479 }
2480 integral /= nbins_;
2481
2482 delete list;
2483 return integral;
2484 }
2485 else {
2486
2487 // that will be the value of the efficiency retured (does not affect
2488 // the efficiency-vs-bkg plot which is done anyway.
2489 Float_t effBref = atof( ((TObjString*)list->At(1))->GetString() );
2490
2491 // find precise efficiency value
2492 for (Int_t bini=1; bini<=nbins_; bini++) {
2493
2494 // get corresponding signal and background efficiencies
2495 effS = (bini - 0.5)/Float_t(nbins_);
2496 effB = fSpleffBvsS->Eval( effS );
2497
2498 // find signal efficiency that corresponds to required background efficiency
2499 if ((effB - effBref)*(effB_ - effBref) <= 0) break;
2500 effS_ = effS;
2501 effB_ = effB;
2502 }
2503
2504 // take mean between bin above and bin below
2505 effS = 0.5*(effS + effS_);
2506
2507 effSerr = 0;
2508 if (nevtS > 0) effSerr = TMath::Sqrt( effS*(1.0 - effS)/nevtS );
2509
2510 delete list;
2511 return effS;
2512 }
2513
2514 return -1;
2515}
2516
2517////////////////////////////////////////////////////////////////////////////////
2518
2520{
2521 Data()->SetCurrentType(Types::kTraining);
2522
2523 Results* results = Data()->GetResults(GetMethodName(), Types::kTesting, Types::kNoAnalysisType);
2524
2525 // fill background efficiency (resp. rejection) versus signal efficiency plots
2526 // returns signal efficiency at background efficiency indicated in theString
2527
2528 // parse input string for required background efficiency
2529 TList* list = gTools().ParseFormatLine( theString );
2530 // sanity check
2531
2532 if (list->GetSize() != 2) {
2533 Log() << kFATAL <<Form("Dataset[%s] : ",DataInfo().GetName())<< "<GetTrainingEfficiency> Wrong number of arguments"
2534 << " in string: " << theString
2535 << " | required format, e.g., Efficiency:0.05" << Endl;
2536 delete list;
2537 return -1;
2538 }
2539 // that will be the value of the efficiency retured (does not affect
2540 // the efficiency-vs-bkg plot which is done anyway.
2541 Float_t effBref = atof( ((TObjString*)list->At(1))->GetString() );
2542
2543 delete list;
2544
2545 // sanity check
2546 if (results->GetHist("MVA_S")->GetNbinsX() != results->GetHist("MVA_B")->GetNbinsX() ||
2547 results->GetHist("MVA_HIGHBIN_S")->GetNbinsX() != results->GetHist("MVA_HIGHBIN_B")->GetNbinsX() ) {
2548 Log() << kFATAL <<Form("Dataset[%s] : ",DataInfo().GetName())<< "<GetTrainingEfficiency> Binning mismatch between signal and background histos"
2549 << Endl;
2550 return -1.0;
2551 }
2552
2553 // create histogram
2554
2555 // first, get efficiency histograms for signal and background
2556 TH1 * effhist = results->GetHist("MVA_HIGHBIN_S");
2557 Double_t xmin = effhist->GetXaxis()->GetXmin();
2558 Double_t xmax = effhist->GetXaxis()->GetXmax();
2559
2560 // first round ? --> create and fill histograms
2561 if (results->DoesExist("MVA_TRAIN_S")==0) {
2562
2563 // classifier response distributions for test sample
2564 Double_t sxmax = fXmax+0.00001;
2565
2566 // MVA plots on the training sample (check for overtraining)
2567 TH1* mva_s_tr = new TH1D( GetTestvarName() + "_Train_S",GetTestvarName() + "_Train_S", fNbinsMVAoutput, fXmin, sxmax );
2568 TH1* mva_b_tr = new TH1D( GetTestvarName() + "_Train_B",GetTestvarName() + "_Train_B", fNbinsMVAoutput, fXmin, sxmax );
2569 results->Store(mva_s_tr, "MVA_TRAIN_S");
2570 results->Store(mva_b_tr, "MVA_TRAIN_B");
2571 mva_s_tr->Sumw2();
2572 mva_b_tr->Sumw2();
2573
2574 // Training efficiency plots
2575 TH1* mva_eff_tr_s = new TH1D( GetTestvarName() + "_trainingEffS", GetTestvarName() + " (signal)",
2576 fNbinsH, xmin, xmax );
2577 TH1* mva_eff_tr_b = new TH1D( GetTestvarName() + "_trainingEffB", GetTestvarName() + " (background)",
2578 fNbinsH, xmin, xmax );
2579 results->Store(mva_eff_tr_s, "MVA_TRAINEFF_S");
2580 results->Store(mva_eff_tr_b, "MVA_TRAINEFF_B");
2581
2582 // sign if cut
2583 Int_t sign = (fCutOrientation == kPositive) ? +1 : -1;
2584
2585 std::vector<Double_t> mvaValues = GetMvaValues(0,Data()->GetNEvents());
2586 assert( (Long64_t) mvaValues.size() == Data()->GetNEvents());
2587
2588 // this method is unbinned
2589 for (Int_t ievt=0; ievt<Data()->GetNEvents(); ievt++) {
2590
2591 Data()->SetCurrentEvent(ievt);
2592 const Event* ev = GetEvent();
2593
2594 Double_t theVal = mvaValues[ievt];
2595 Double_t theWeight = ev->GetWeight();
2596
2597 TH1* theEffHist = DataInfo().IsSignal(ev) ? mva_eff_tr_s : mva_eff_tr_b;
2598 TH1* theClsHist = DataInfo().IsSignal(ev) ? mva_s_tr : mva_b_tr;
2599
2600 theClsHist->Fill( theVal, theWeight );
2601
2602 TAxis* axis = theEffHist->GetXaxis();
2603 Int_t maxbin = Int_t((theVal - axis->GetXmin())/(axis->GetXmax() - axis->GetXmin())*fNbinsH) + 1;
2604 if (sign > 0 && maxbin > fNbinsH) continue; // can happen... event doesn't count
2605 if (sign < 0 && maxbin < 1 ) continue; // can happen... event doesn't count
2606 if (sign > 0 && maxbin < 1 ) maxbin = 1;
2607 if (sign < 0 && maxbin > fNbinsH) maxbin = fNbinsH;
2608
2609 if (sign > 0) for (Int_t ibin=1; ibin<=maxbin; ibin++) theEffHist->AddBinContent( ibin , theWeight );
2610 else for (Int_t ibin=maxbin+1; ibin<=fNbinsH; ibin++) theEffHist->AddBinContent( ibin , theWeight );
2611 }
2612
2613 // normalise output distributions
2614 // uncomment those (and several others if you want unnormalized output
2615 gTools().NormHist( mva_s_tr );
2616 gTools().NormHist( mva_b_tr );
2617
2618 // renormalise to maximum
2619 mva_eff_tr_s->Scale( 1.0/TMath::Max(std::numeric_limits<double>::epsilon(), mva_eff_tr_s->GetMaximum()) );
2620 mva_eff_tr_b->Scale( 1.0/TMath::Max(std::numeric_limits<double>::epsilon(), mva_eff_tr_b->GetMaximum()) );
2621
2622 // Training background efficiency versus signal efficiency
2623 TH1* eff_bvss = new TH1D( GetTestvarName() + "_trainingEffBvsS", GetTestvarName() + "", fNbins, 0, 1 );
2624 // Training background rejection (=1-eff.) versus signal efficiency
2625 TH1* rej_bvss = new TH1D( GetTestvarName() + "_trainingRejBvsS", GetTestvarName() + "", fNbins, 0, 1 );
2626 results->Store(eff_bvss, "EFF_BVSS_TR");
2627 results->Store(rej_bvss, "REJ_BVSS_TR");
2628
2629 // use root finder
2630 // spline background efficiency plot
2631 // note that there is a bin shift when going from a TH1D object to a TGraph :-(
2633 if (fSplTrainRefS) delete fSplTrainRefS;
2634 if (fSplTrainRefB) delete fSplTrainRefB;
2635 fSplTrainRefS = new TSpline1( "spline2_signal", new TGraph( mva_eff_tr_s ) );
2636 fSplTrainRefB = new TSpline1( "spline2_background", new TGraph( mva_eff_tr_b ) );
2637
2638 // verify spline sanity
2639 gTools().CheckSplines( mva_eff_tr_s, fSplTrainRefS );
2640 gTools().CheckSplines( mva_eff_tr_b, fSplTrainRefB );
2641 }
2642
2643 // make the background-vs-signal efficiency plot
2644
2645 // create root finder
2646 RootFinder rootFinder(this, fXmin, fXmax );
2647
2648 Double_t effB = 0;
2649 fEffS = results->GetHist("MVA_TRAINEFF_S");
2650 for (Int_t bini=1; bini<=fNbins; bini++) {
2651
2652 // find cut value corresponding to a given signal efficiency
2653 Double_t effS = eff_bvss->GetBinCenter( bini );
2654
2655 Double_t cut = rootFinder.Root( effS );
2656
2657 // retrieve background efficiency for given cut
2658 if (Use_Splines_for_Eff_) effB = fSplTrainRefB->Eval( cut );
2659 else effB = mva_eff_tr_b->GetBinContent( mva_eff_tr_b->FindBin( cut ) );
2660
2661 // and fill histograms
2662 eff_bvss->SetBinContent( bini, effB );
2663 rej_bvss->SetBinContent( bini, 1.0-effB );
2664 }
2665 fEffS = 0;
2666
2667 // create splines for histogram
2668 fSplTrainEffBvsS = new TSpline1( "effBvsS", new TGraph( eff_bvss ) );
2669 }
2670
2671 // must exist...
2672 if (0 == fSplTrainEffBvsS) return 0.0;
2673
2674 // now find signal efficiency that corresponds to required background efficiency
2675 Double_t effS = 0., effB, effS_ = 0., effB_ = 0.;
2676 Int_t nbins_ = 1000;
2677 for (Int_t bini=1; bini<=nbins_; bini++) {
2678
2679 // get corresponding signal and background efficiencies
2680 effS = (bini - 0.5)/Float_t(nbins_);
2681 effB = fSplTrainEffBvsS->Eval( effS );
2682
2683 // find signal efficiency that corresponds to required background efficiency
2684 if ((effB - effBref)*(effB_ - effBref) <= 0) break;
2685 effS_ = effS;
2686 effB_ = effB;
2687 }
2688
2689 return 0.5*(effS + effS_); // the mean between bin above and bin below
2690}
2691
2692////////////////////////////////////////////////////////////////////////////////
2693
2694std::vector<Float_t> TMVA::MethodBase::GetMulticlassEfficiency(std::vector<std::vector<Float_t> >& purity)
2695{
2696 Data()->SetCurrentType(Types::kTesting);
2697 ResultsMulticlass* resMulticlass = dynamic_cast<ResultsMulticlass*>(Data()->GetResults(GetMethodName(), Types::kTesting, Types::kMulticlass));
2698 if (!resMulticlass) Log() << kFATAL<<Form("Dataset[%s] : ",DataInfo().GetName())<< "unable to create pointer in GetMulticlassEfficiency, exiting."<<Endl;
2699
2700 purity.push_back(resMulticlass->GetAchievablePur());
2701 return resMulticlass->GetAchievableEff();
2702}
2703
2704////////////////////////////////////////////////////////////////////////////////
2705
2706std::vector<Float_t> TMVA::MethodBase::GetMulticlassTrainingEfficiency(std::vector<std::vector<Float_t> >& purity)
2707{
2708 Data()->SetCurrentType(Types::kTraining);
2709 ResultsMulticlass* resMulticlass = dynamic_cast<ResultsMulticlass*>(Data()->GetResults(GetMethodName(), Types::kTraining, Types::kMulticlass));
2710 if (!resMulticlass) Log() << kFATAL<< "unable to create pointer in GetMulticlassTrainingEfficiency, exiting."<<Endl;
2711
2712 Log() << kINFO <<Form("Dataset[%s] : ",DataInfo().GetName())<< "Determine optimal multiclass cuts for training data..." << Endl;
2713 for (UInt_t icls = 0; icls<DataInfo().GetNClasses(); ++icls) {
2714 resMulticlass->GetBestMultiClassCuts(icls);
2715 }
2716
2717 purity.push_back(resMulticlass->GetAchievablePur());
2718 return resMulticlass->GetAchievableEff();
2719}
2720
2721////////////////////////////////////////////////////////////////////////////////
2722/// Construct a confusion matrix for a multiclass classifier. The confusion
2723/// matrix compares, in turn, each class agaist all other classes in a pair-wise
2724/// fashion. In rows with index \f$ k_r = 0 ... K \f$, \f$ k_r \f$ is
2725/// considered signal for the sake of comparison and for each column
2726/// \f$ k_c = 0 ... K \f$ the corresponding class is considered background.
2727///
2728/// Note that the diagonal elements will be returned as NaN since this will
2729/// compare a class against itself.
2730///
2731/// \see TMVA::ResultsMulticlass::GetConfusionMatrix
2732///
2733/// \param[in] effB The background efficiency for which to evaluate.
2734/// \param[in] type The data set on which to evaluate (training, testing ...).
2735///
2736/// \return A matrix containing signal efficiencies for the given background
2737/// efficiency. The diagonal elements are NaN since this measure is
2738/// meaningless (comparing a class against itself).
2739///
2740
2742{
2743 if (GetAnalysisType() != Types::kMulticlass) {
2744 Log() << kFATAL << "Cannot get confusion matrix for non-multiclass analysis." << std::endl;
2745 return TMatrixD(0, 0);
2746 }
2747
2748 Data()->SetCurrentType(type);
2749 ResultsMulticlass *resMulticlass =
2750 dynamic_cast<ResultsMulticlass *>(Data()->GetResults(GetMethodName(), type, Types::kMulticlass));
2751
2752 if (resMulticlass == nullptr) {
2753 Log() << kFATAL << Form("Dataset[%s] : ", DataInfo().GetName())
2754 << "unable to create pointer in GetMulticlassEfficiency, exiting." << Endl;
2755 return TMatrixD(0, 0);
2756 }
2757
2758 return resMulticlass->GetConfusionMatrix(effB);
2759}
2760
2761////////////////////////////////////////////////////////////////////////////////
2762/// compute significance of mean difference
2763/// \f[
2764/// significance = \frac{|<S> - <B>|}{\sqrt{RMS_{S2} + RMS_{B2}}}
2765/// \f]
2766
2768{
2769 Double_t rms = sqrt( fRmsS*fRmsS + fRmsB*fRmsB );
2770
2771 return (rms > 0) ? TMath::Abs(fMeanS - fMeanB)/rms : 0;
2772}
2773
2774////////////////////////////////////////////////////////////////////////////////
2775/// compute "separation" defined as
2776/// \f[
2777/// <s2> = \frac{1}{2} \int_{-\infty}^{+\infty} { \frac{(S(x) - B(x))^2}{(S(x) + B(x))} dx }
2778/// \f]
2779
2781{
2782 return gTools().GetSeparation( histoS, histoB );
2783}
2784
2785////////////////////////////////////////////////////////////////////////////////
2786/// compute "separation" defined as
2787/// \f[
2788/// <s2> = \frac{1}{2} \int_{-\infty}^{+\infty} { \frac{(S(x) - B(x))^2}{(S(x) + B(x))} dx }
2789/// \f]
2790
2792{
2793 // note, if zero pointers given, use internal pdf
2794 // sanity check first
2795 if ((!pdfS && pdfB) || (pdfS && !pdfB))
2796 Log() << kFATAL <<Form("Dataset[%s] : ",DataInfo().GetName())<< "<GetSeparation> Mismatch in pdfs" << Endl;
2797 if (!pdfS) pdfS = fSplS;
2798 if (!pdfB) pdfB = fSplB;
2799
2800 if (!fSplS || !fSplB) {
2801 Log()<<kDEBUG<<Form("[%s] : ",DataInfo().GetName())<< "could not calculate the separation, distributions"
2802 << " fSplS or fSplB are not yet filled" << Endl;
2803 return 0;
2804 }else{
2805 return gTools().GetSeparation( *pdfS, *pdfB );
2806 }
2807}
2808
2809////////////////////////////////////////////////////////////////////////////////
2810/// calculate the area (integral) under the ROC curve as a
2811/// overall quality measure of the classification
2812
2814{
2815 // note, if zero pointers given, use internal pdf
2816 // sanity check first
2817 if ((!histS && histB) || (histS && !histB))
2818 Log() << kFATAL <<Form("Dataset[%s] : ",DataInfo().GetName())<< "<GetROCIntegral(TH1D*, TH1D*)> Mismatch in hists" << Endl;
2819
2820 if (histS==0 || histB==0) return 0.;
2821
2822 TMVA::PDF *pdfS = new TMVA::PDF( " PDF Sig", histS, TMVA::PDF::kSpline3 );
2823 TMVA::PDF *pdfB = new TMVA::PDF( " PDF Bkg", histB, TMVA::PDF::kSpline3 );
2824
2825
2826 Double_t xmin = TMath::Min(pdfS->GetXmin(), pdfB->GetXmin());
2827 Double_t xmax = TMath::Max(pdfS->GetXmax(), pdfB->GetXmax());
2828
2829 Double_t integral = 0;
2830 UInt_t nsteps = 1000;
2831 Double_t step = (xmax-xmin)/Double_t(nsteps);
2832 Double_t cut = xmin;
2833 for (UInt_t i=0; i<nsteps; i++) {
2834 integral += (1-pdfB->GetIntegral(cut,xmax)) * pdfS->GetVal(cut);
2835 cut+=step;
2836 }
2837 delete pdfS;
2838 delete pdfB;
2839 return integral*step;
2840}
2841
2842
2843////////////////////////////////////////////////////////////////////////////////
2844/// calculate the area (integral) under the ROC curve as a
2845/// overall quality measure of the classification
2846
2848{
2849 // note, if zero pointers given, use internal pdf
2850 // sanity check first
2851 if ((!pdfS && pdfB) || (pdfS && !pdfB))
2852 Log() << kFATAL <<Form("Dataset[%s] : ",DataInfo().GetName())<< "<GetSeparation> Mismatch in pdfs" << Endl;
2853 if (!pdfS) pdfS = fSplS;
2854 if (!pdfB) pdfB = fSplB;
2855
2856 if (pdfS==0 || pdfB==0) return 0.;
2857
2858 Double_t xmin = TMath::Min(pdfS->GetXmin(), pdfB->GetXmin());
2859 Double_t xmax = TMath::Max(pdfS->GetXmax(), pdfB->GetXmax());
2860
2861 Double_t integral = 0;
2862 UInt_t nsteps = 1000;
2863 Double_t step = (xmax-xmin)/Double_t(nsteps);
2864 Double_t cut = xmin;
2865 for (UInt_t i=0; i<nsteps; i++) {
2866 integral += (1-pdfB->GetIntegral(cut,xmax)) * pdfS->GetVal(cut);
2867 cut+=step;
2868 }
2869 return integral*step;
2870}
2871
2872////////////////////////////////////////////////////////////////////////////////
2873/// plot significance, \f$ \frac{S}{\sqrt{S^2 + B^2}} \f$, curve for given number
2874/// of signal and background events; returns cut for maximum significance
2875/// also returned via reference is the maximum significance
2876
2878 Double_t BackgroundEvents,
2879 Double_t& max_significance_value ) const
2880{
2881 Results* results = Data()->GetResults( GetMethodName(), Types::kTesting, Types::kMaxAnalysisType );
2882
2883 Double_t max_significance(0);
2884 Double_t effS(0),effB(0),significance(0);
2885 TH1D *temp_histogram = new TH1D("temp", "temp", fNbinsH, fXmin, fXmax );
2886
2887 if (SignalEvents <= 0 || BackgroundEvents <= 0) {
2888 Log() << kFATAL <<Form("Dataset[%s] : ",DataInfo().GetName())<< "<GetMaximumSignificance> "
2889 << "Number of signal or background events is <= 0 ==> abort"
2890 << Endl;
2891 }
2892
2893 Log() << kINFO <<Form("Dataset[%s] : ",DataInfo().GetName())<< "Using ratio SignalEvents/BackgroundEvents = "
2894 << SignalEvents/BackgroundEvents << Endl;
2895
2896 TH1* eff_s = results->GetHist("MVA_EFF_S");
2897 TH1* eff_b = results->GetHist("MVA_EFF_B");
2898
2899 if ( (eff_s==0) || (eff_b==0) ) {
2900 Log() << kWARNING <<Form("Dataset[%s] : ",DataInfo().GetName())<< "Efficiency histograms empty !" << Endl;
2901 Log() << kWARNING <<Form("Dataset[%s] : ",DataInfo().GetName())<< "no maximum cut found, return 0" << Endl;
2902 return 0;
2903 }
2904
2905 for (Int_t bin=1; bin<=fNbinsH; bin++) {
2906 effS = eff_s->GetBinContent( bin );
2907 effB = eff_b->GetBinContent( bin );
2908
2909 // put significance into a histogram
2910 significance = sqrt(SignalEvents)*( effS )/sqrt( effS + ( BackgroundEvents / SignalEvents) * effB );
2911
2912 temp_histogram->SetBinContent(bin,significance);
2913 }
2914
2915 // find maximum in histogram
2916 max_significance = temp_histogram->GetBinCenter( temp_histogram->GetMaximumBin() );
2917 max_significance_value = temp_histogram->GetBinContent( temp_histogram->GetMaximumBin() );
2918
2919 // delete
2920 delete temp_histogram;
2921
2922 Log() << kINFO <<Form("Dataset[%s] : ",DataInfo().GetName())<< "Optimal cut at : " << max_significance << Endl;
2923 Log() << kINFO<<Form("Dataset[%s] : ",DataInfo().GetName()) << "Maximum significance: " << max_significance_value << Endl;
2924
2925 return max_significance;
2926}
2927
2928////////////////////////////////////////////////////////////////////////////////
2929/// calculates rms,mean, xmin, xmax of the event variable
2930/// this can be either done for the variables as they are or for
2931/// normalised variables (in the range of 0-1) if "norm" is set to kTRUE
2932
2934 Double_t& meanS, Double_t& meanB,
2935 Double_t& rmsS, Double_t& rmsB,
2937{
2938 Types::ETreeType previousTreeType = Data()->GetCurrentType();
2939 Data()->SetCurrentType(treeType);
2940
2941 Long64_t entries = Data()->GetNEvents();
2942
2943 // sanity check
2944 if (entries <=0)
2945 Log() << kFATAL <<Form("Dataset[%s] : ",DataInfo().GetName())<< "<CalculateEstimator> Wrong tree type: " << treeType << Endl;
2946
2947 // index of the wanted variable
2948 UInt_t varIndex = DataInfo().FindVarIndex( theVarName );
2949
2950 // first fill signal and background in arrays before analysis
2951 xmin = +DBL_MAX;
2952 xmax = -DBL_MAX;
2953 Long64_t nEventsS = -1;
2954 Long64_t nEventsB = -1;
2955
2956 // take into account event weights
2957 meanS = 0;
2958 meanB = 0;
2959 rmsS = 0;
2960 rmsB = 0;
2961 Double_t sumwS = 0, sumwB = 0;
2962
2963 // loop over all training events
2964 for (Int_t ievt = 0; ievt < entries; ievt++) {
2965
2966 const Event* ev = GetEvent(ievt);
2967
2968 Double_t theVar = ev->GetValue(varIndex);
2969 Double_t weight = ev->GetWeight();
2970
2971 if (DataInfo().IsSignal(ev)) {
2972 sumwS += weight;
2973 meanS += weight*theVar;
2974 rmsS += weight*theVar*theVar;
2975 }
2976 else {
2977 sumwB += weight;
2978 meanB += weight*theVar;
2979 rmsB += weight*theVar*theVar;
2980 }
2981 xmin = TMath::Min( xmin, theVar );
2982 xmax = TMath::Max( xmax, theVar );
2983 }
2984 ++nEventsS;
2985 ++nEventsB;
2986
2987 meanS = meanS/sumwS;
2988 meanB = meanB/sumwB;
2989 rmsS = TMath::Sqrt( rmsS/sumwS - meanS*meanS );
2990 rmsB = TMath::Sqrt( rmsB/sumwB - meanB*meanB );
2991
2992 Data()->SetCurrentType(previousTreeType);
2993}
2994
2995////////////////////////////////////////////////////////////////////////////////
2996/// create reader class for method (classification only at present)
2997
2998void TMVA::MethodBase::MakeClass( const TString& theClassFileName ) const
2999{
3000 // the default consists of
3001 TString classFileName = "";
3002 if (theClassFileName == "")
3003 classFileName = GetWeightFileDir() + "/" + GetJobName() + "_" + GetMethodName() + ".class.C";
3004 else
3005 classFileName = theClassFileName;
3006
3007 TString className = TString("Read") + GetMethodName();
3008
3009 TString tfname( classFileName );
3010 Log() << kINFO //<<Form("Dataset[%s] : ",DataInfo().GetName())
3011 << "Creating standalone class: "
3012 << gTools().Color("lightblue") << classFileName << gTools().Color("reset") << Endl;
3013
3014 std::ofstream fout( classFileName );
3015 if (!fout.good()) { // file could not be opened --> Error
3016 Log() << kFATAL << "<MakeClass> Unable to open file: " << classFileName << Endl;
3017 }
3018
3019 // now create the class
3020 // preamble
3021 fout << "// Class: " << className << std::endl;
3022 fout << "// Automatically generated by MethodBase::MakeClass" << std::endl << "//" << std::endl;
3023
3024 // print general information and configuration state
3025 fout << std::endl;
3026 fout << "/* configuration options =====================================================" << std::endl << std::endl;
3027 WriteStateToStream( fout );
3028 fout << std::endl;
3029 fout << "============================================================================ */" << std::endl;
3030
3031 // generate the class
3032 fout << "" << std::endl;
3033 fout << "#include <array>" << std::endl;
3034 fout << "#include <vector>" << std::endl;
3035 fout << "#include <cmath>" << std::endl;
3036 fout << "#include <string>" << std::endl;
3037 fout << "#include <iostream>" << std::endl;
3038 fout << "" << std::endl;
3039 // now if the classifier needs to write some additional classes for its response implementation
3040 // this code goes here: (at least the header declarations need to come before the main class
3041 this->MakeClassSpecificHeader( fout, className );
3042
3043 fout << "#ifndef IClassifierReader__def" << std::endl;
3044 fout << "#define IClassifierReader__def" << std::endl;
3045 fout << std::endl;
3046 fout << "class IClassifierReader {" << std::endl;
3047 fout << std::endl;
3048 fout << " public:" << std::endl;
3049 fout << std::endl;
3050 fout << " // constructor" << std::endl;
3051 fout << " IClassifierReader() : fStatusIsClean( true ) {}" << std::endl;
3052 fout << " virtual ~IClassifierReader() {}" << std::endl;
3053 fout << std::endl;
3054 fout << " // return classifier response" << std::endl;
3055 if(GetAnalysisType() == Types::kMulticlass) {
3056 fout << " virtual std::vector<double> GetMulticlassValues( const std::vector<double>& inputValues ) const = 0;" << std::endl;
3057 } else {
3058 fout << " virtual double GetMvaValue( const std::vector<double>& inputValues ) const = 0;" << std::endl;
3059 }
3060 fout << std::endl;
3061 fout << " // returns classifier status" << std::endl;
3062 fout << " bool IsStatusClean() const { return fStatusIsClean; }" << std::endl;
3063 fout << std::endl;
3064 fout << " protected:" << std::endl;
3065 fout << std::endl;
3066 fout << " bool fStatusIsClean;" << std::endl;
3067 fout << "};" << std::endl;
3068 fout << std::endl;
3069 fout << "#endif" << std::endl;
3070 fout << std::endl;
3071 fout << "class " << className << " : public IClassifierReader {" << std::endl;
3072 fout << std::endl;
3073 fout << " public:" << std::endl;
3074 fout << std::endl;
3075 fout << " // constructor" << std::endl;
3076 fout << " " << className << "( std::vector<std::string>& theInputVars )" << std::endl;
3077 fout << " : IClassifierReader()," << std::endl;
3078 fout << " fClassName( \"" << className << "\" )," << std::endl;
3079 fout << " fNvars( " << GetNvar() << " )" << std::endl;
3080 fout << " {" << std::endl;
3081 fout << " // the training input variables" << std::endl;
3082 fout << " const char* inputVars[] = { ";
3083 for (UInt_t ivar=0; ivar<GetNvar(); ivar++) {
3084 fout << "\"" << GetOriginalVarName(ivar) << "\"";
3085 if (ivar<GetNvar()-1) fout << ", ";
3086 }
3087 fout << " };" << std::endl;
3088 fout << std::endl;
3089 fout << " // sanity checks" << std::endl;
3090 fout << " if (theInputVars.size() <= 0) {" << std::endl;
3091 fout << " std::cout << \"Problem in class \\\"\" << fClassName << \"\\\": empty input vector\" << std::endl;" << std::endl;
3092 fout << " fStatusIsClean = false;" << std::endl;
3093 fout << " }" << std::endl;
3094 fout << std::endl;
3095 fout << " if (theInputVars.size() != fNvars) {" << std::endl;
3096 fout << " std::cout << \"Problem in class \\\"\" << fClassName << \"\\\": mismatch in number of input values: \"" << std::endl;
3097 fout << " << theInputVars.size() << \" != \" << fNvars << std::endl;" << std::endl;
3098 fout << " fStatusIsClean = false;" << std::endl;
3099 fout << " }" << std::endl;
3100 fout << std::endl;
3101 fout << " // validate input variables" << std::endl;
3102 fout << " for (size_t ivar = 0; ivar < theInputVars.size(); ivar++) {" << std::endl;
3103 fout << " if (theInputVars[ivar] != inputVars[ivar]) {" << std::endl;
3104 fout << " std::cout << \"Problem in class \\\"\" << fClassName << \"\\\": mismatch in input variable names\" << std::endl" << std::endl;
3105 fout << " << \" for variable [\" << ivar << \"]: \" << theInputVars[ivar].c_str() << \" != \" << inputVars[ivar] << std::endl;" << std::endl;
3106 fout << " fStatusIsClean = false;" << std::endl;
3107 fout << " }" << std::endl;
3108 fout << " }" << std::endl;
3109 fout << std::endl;
3110 fout << " // initialize min and max vectors (for normalisation)" << std::endl;
3111 for (UInt_t ivar = 0; ivar < GetNvar(); ivar++) {
3112 fout << " fVmin[" << ivar << "] = " << std::setprecision(15) << GetXmin( ivar ) << ";" << std::endl;
3113 fout << " fVmax[" << ivar << "] = " << std::setprecision(15) << GetXmax( ivar ) << ";" << std::endl;
3114 }
3115 fout << std::endl;
3116 fout << " // initialize input variable types" << std::endl;
3117 for (UInt_t ivar=0; ivar<GetNvar(); ivar++) {
3118 fout << " fType[" << ivar << "] = \'" << DataInfo().GetVariableInfo(ivar).GetVarType() << "\';" << std::endl;
3119 }
3120 fout << std::endl;
3121 fout << " // initialize constants" << std::endl;
3122 fout << " Initialize();" << std::endl;
3123 fout << std::endl;
3124 if (GetTransformationHandler().GetTransformationList().GetSize() != 0) {
3125 fout << " // initialize transformation" << std::endl;
3126 fout << " InitTransform();" << std::endl;
3127 }
3128 fout << " }" << std::endl;
3129 fout << std::endl;
3130 fout << " // destructor" << std::endl;
3131 fout << " virtual ~" << className << "() {" << std::endl;
3132 fout << " Clear(); // method-specific" << std::endl;
3133 fout << " }" << std::endl;
3134 fout << std::endl;
3135 fout << " // the classifier response" << std::endl;
3136 fout << " // \"inputValues\" is a vector of input values in the same order as the" << std::endl;
3137 fout << " // variables given to the constructor" << std::endl;
3138 if(GetAnalysisType() == Types::kMulticlass) {
3139 fout << " std::vector<double> GetMulticlassValues( const std::vector<double>& inputValues ) const override;" << std::endl;
3140 } else {
3141 fout << " double GetMvaValue( const std::vector<double>& inputValues ) const override;" << std::endl;
3142 }
3143 fout << std::endl;
3144 fout << " private:" << std::endl;
3145 fout << std::endl;
3146 fout << " // method-specific destructor" << std::endl;
3147 fout << " void Clear();" << std::endl;
3148 fout << std::endl;
3149 if (GetTransformationHandler().GetTransformationList().GetSize()!=0) {
3150 fout << " // input variable transformation" << std::endl;
3151 GetTransformationHandler().MakeFunction(fout, className,1);
3152 fout << " void InitTransform();" << std::endl;
3153 fout << " void Transform( std::vector<double> & iv, int sigOrBgd ) const;" << std::endl;
3154 fout << std::endl;
3155 }
3156 fout << " // common member variables" << std::endl;
3157 fout << " const char* fClassName;" << std::endl;
3158 fout << std::endl;
3159 fout << " const size_t fNvars;" << std::endl;
3160 fout << " size_t GetNvar() const { return fNvars; }" << std::endl;
3161 fout << " char GetType( int ivar ) const { return fType[ivar]; }" << std::endl;
3162 fout << std::endl;
3163 fout << " // normalisation of input variables" << std::endl;
3164 fout << " double fVmin[" << GetNvar() << "];" << std::endl;
3165 fout << " double fVmax[" << GetNvar() << "];" << std::endl;
3166 fout << " double NormVariable( double x, double xmin, double xmax ) const {" << std::endl;
3167 fout << " // normalise to output range: [-1, 1]" << std::endl;
3168 fout << " return 2*(x - xmin)/(xmax - xmin) - 1.0;" << std::endl;
3169 fout << " }" << std::endl;
3170 fout << std::endl;
3171 fout << " // type of input variable: 'F' or 'I'" << std::endl;
3172 fout << " char fType[" << GetNvar() << "];" << std::endl;
3173 fout << std::endl;
3174 fout << " // initialize internal variables" << std::endl;
3175 fout << " void Initialize();" << std::endl;
3176 if(GetAnalysisType() == Types::kMulticlass) {
3177 fout << " std::vector<double> GetMulticlassValues__( const std::vector<double>& inputValues ) const;" << std::endl;
3178 } else {
3179 fout << " double GetMvaValue__( const std::vector<double>& inputValues ) const;" << std::endl;
3180 }
3181 fout << "" << std::endl;
3182 fout << " // private members (method specific)" << std::endl;
3183
3184 // call the classifier specific output (the classifier must close the class !)
3185 MakeClassSpecific( fout, className );
3186
3187 if(GetAnalysisType() == Types::kMulticlass) {
3188 fout << "inline std::vector<double> " << className << "::GetMulticlassValues( const std::vector<double>& inputValues ) const" << std::endl;
3189 } else {
3190 fout << "inline double " << className << "::GetMvaValue( const std::vector<double>& inputValues ) const" << std::endl;
3191 }
3192 fout << "{" << std::endl;
3193 fout << " // classifier response value" << std::endl;
3194 if(GetAnalysisType() == Types::kMulticlass) {
3195 fout << " std::vector<double> retval;" << std::endl;
3196 } else {
3197 fout << " double retval = 0;" << std::endl;
3198 }
3199 fout << std::endl;
3200 fout << " // classifier response, sanity check first" << std::endl;
3201 fout << " if (!IsStatusClean()) {" << std::endl;
3202 fout << " std::cout << \"Problem in class \\\"\" << fClassName << \"\\\": cannot return classifier response\"" << std::endl;
3203 fout << " << \" because status is dirty\" << std::endl;" << std::endl;
3204 fout << " }" << std::endl;
3205 fout << " else {" << std::endl;
3206 if (IsNormalised()) {
3207 fout << " // normalise variables" << std::endl;
3208 fout << " std::vector<double> iV;" << std::endl;
3209 fout << " iV.reserve(inputValues.size());" << std::endl;
3210 fout << " int ivar = 0;" << std::endl;
3211 fout << " for (std::vector<double>::const_iterator varIt = inputValues.begin();" << std::endl;
3212 fout << " varIt != inputValues.end(); varIt++, ivar++) {" << std::endl;
3213 fout << " iV.push_back(NormVariable( *varIt, fVmin[ivar], fVmax[ivar] ));" << std::endl;
3214 fout << " }" << std::endl;
3215 if (GetTransformationHandler().GetTransformationList().GetSize() != 0 && GetMethodType() != Types::kLikelihood &&
3216 GetMethodType() != Types::kHMatrix) {
3217 fout << " Transform( iV, -1 );" << std::endl;
3218 }
3219
3220 if(GetAnalysisType() == Types::kMulticlass) {
3221 fout << " retval = GetMulticlassValues__( iV );" << std::endl;
3222 } else {
3223 fout << " retval = GetMvaValue__( iV );" << std::endl;
3224 }
3225 } else {
3226 if (GetTransformationHandler().GetTransformationList().GetSize() != 0 && GetMethodType() != Types::kLikelihood &&
3227 GetMethodType() != Types::kHMatrix) {
3228 fout << " std::vector<double> iV(inputValues);" << std::endl;
3229 fout << " Transform( iV, -1 );" << std::endl;
3230 if(GetAnalysisType() == Types::kMulticlass) {
3231 fout << " retval = GetMulticlassValues__( iV );" << std::endl;
3232 } else {
3233 fout << " retval = GetMvaValue__( iV );" << std::endl;
3234 }
3235 } else {
3236 if(GetAnalysisType() == Types::kMulticlass) {
3237 fout << " retval = GetMulticlassValues__( inputValues );" << std::endl;
3238 } else {
3239 fout << " retval = GetMvaValue__( inputValues );" << std::endl;
3240 }
3241 }
3242 }
3243 fout << " }" << std::endl;
3244 fout << std::endl;
3245 fout << " return retval;" << std::endl;
3246 fout << "}" << std::endl;
3247
3248 // create output for transformation - if any
3249 if (GetTransformationHandler().GetTransformationList().GetSize()!=0)
3250 GetTransformationHandler().MakeFunction(fout, className,2);
3251
3252 // close the file
3253 fout.close();
3254}
3255
3256////////////////////////////////////////////////////////////////////////////////
3257/// prints out method-specific help method
3258
3260{
3261 // if options are written to reference file, also append help info
3262 std::streambuf* cout_sbuf = std::cout.rdbuf(); // save original sbuf
3263 std::ofstream* o = 0;
3264 if (gConfig().WriteOptionsReference()) {
3265 Log() << kINFO << "Print Help message for class " << GetName() << " into file: " << GetReferenceFile() << Endl;
3266 o = new std::ofstream( GetReferenceFile(), std::ios::app );
3267 if (!o->good()) { // file could not be opened --> Error
3268 Log() << kFATAL << "<PrintHelpMessage> Unable to append to output file: " << GetReferenceFile() << Endl;
3269 }
3270 std::cout.rdbuf( o->rdbuf() ); // redirect 'std::cout' to file
3271 }
3272
3273 // "|--------------------------------------------------------------|"
3274 if (!o) {
3275 Log() << kINFO << Endl;
3276 Log() << gTools().Color("bold")
3277 << "================================================================"
3278 << gTools().Color( "reset" )
3279 << Endl;
3280 Log() << gTools().Color("bold")
3281 << "H e l p f o r M V A m e t h o d [ " << GetName() << " ] :"
3282 << gTools().Color( "reset" )
3283 << Endl;
3284 }
3285 else {
3286 Log() << "Help for MVA method [ " << GetName() << " ] :" << Endl;
3287 }
3288
3289 // print method-specific help message
3290 GetHelpMessage();
3291
3292 if (!o) {
3293 Log() << Endl;
3294 Log() << "<Suppress this message by specifying \"!H\" in the booking option>" << Endl;
3295 Log() << gTools().Color("bold")
3296 << "================================================================"
3297 << gTools().Color( "reset" )
3298 << Endl;
3299 Log() << Endl;
3300 }
3301 else {
3302 // indicate END
3303 Log() << "# End of Message___" << Endl;
3304 }
3305
3306 std::cout.rdbuf( cout_sbuf ); // restore the original stream buffer
3307 if (o) o->close();
3308}
3309
3310// ----------------------- r o o t f i n d i n g ----------------------------
3311
3312////////////////////////////////////////////////////////////////////////////////
3313/// returns efficiency as function of cut
3314
3316{
3317 Double_t retval=0;
3318
3319 // retrieve the class object
3321 retval = fSplRefS->Eval( theCut );
3322 }
3323 else retval = fEffS->GetBinContent( fEffS->FindBin( theCut ) );
3324
3325 // caution: here we take some "forbidden" action to hide a problem:
3326 // in some cases, in particular for likelihood, the binned efficiency distributions
3327 // do not equal 1, at xmin, and 0 at xmax; of course, in principle we have the
3328 // unbinned information available in the trees, but the unbinned minimization is
3329 // too slow, and we don't need to do a precision measurement here. Hence, we force
3330 // this property.
3331 Double_t eps = 1.0e-5;
3332 if (theCut-fXmin < eps) retval = (GetCutOrientation() == kPositive) ? 1.0 : 0.0;
3333 else if (fXmax-theCut < eps) retval = (GetCutOrientation() == kPositive) ? 0.0 : 1.0;
3334
3335 return retval;
3336}
3337
3338////////////////////////////////////////////////////////////////////////////////
3339/// returns the event collection (i.e. the dataset) TRANSFORMED using the
3340/// classifiers specific Variable Transformation (e.g. Decorr or Decorr:Gauss:Decorr)
3341
3343{
3344 // if there's no variable transformation for this classifier, just hand back the
3345 // event collection of the data set
3346 if (GetTransformationHandler().GetTransformationList().GetEntries() <= 0) {
3347 return (Data()->GetEventCollection(type));
3348 }
3349
3350 // otherwise, transform ALL the events and hand back the vector of the pointers to the
3351 // transformed events. If the pointer is already != 0, i.e. the whole thing has been
3352 // done before, I don't need to do it again, but just "hand over" the pointer to those events.
3353 Int_t idx = Data()->TreeIndex(type); //index indicating Training,Testing,... events/datasets
3354 if (fEventCollections.at(idx) == 0) {
3355 fEventCollections.at(idx) = &(Data()->GetEventCollection(type));
3356 fEventCollections.at(idx) = GetTransformationHandler().CalcTransformations(*(fEventCollections.at(idx)),kTRUE);
3357 }
3358 return *(fEventCollections.at(idx));
3359}
3360
3361////////////////////////////////////////////////////////////////////////////////
3362/// calculates the TMVA version string from the training version code on the fly
3363
3365{
3366 UInt_t a = GetTrainingTMVAVersionCode() & 0xff0000; a>>=16;
3367 UInt_t b = GetTrainingTMVAVersionCode() & 0x00ff00; b>>=8;
3368 UInt_t c = GetTrainingTMVAVersionCode() & 0x0000ff;
3369
3370 return TString(Form("%i.%i.%i",a,b,c));
3371}
3372
3373////////////////////////////////////////////////////////////////////////////////
3374/// calculates the ROOT version string from the training version code on the fly
3375
3377{
3378 UInt_t a = GetTrainingROOTVersionCode() & 0xff0000; a>>=16;
3379 UInt_t b = GetTrainingROOTVersionCode() & 0x00ff00; b>>=8;
3380 UInt_t c = GetTrainingROOTVersionCode() & 0x0000ff;
3381
3382 return TString(Form("%i.%02i/%02i",a,b,c));
3383}
3384
3385////////////////////////////////////////////////////////////////////////////////
3386
3388 ResultsClassification* mvaRes = dynamic_cast<ResultsClassification*>
3389 ( Data()->GetResults(GetMethodName(),Types::kTesting, Types::kClassification) );
3390
3391 if (mvaRes != NULL) {
3392 TH1D *mva_s = dynamic_cast<TH1D*> (mvaRes->GetHist("MVA_S"));
3393 TH1D *mva_b = dynamic_cast<TH1D*> (mvaRes->GetHist("MVA_B"));
3394 TH1D *mva_s_tr = dynamic_cast<TH1D*> (mvaRes->GetHist("MVA_TRAIN_S"));
3395 TH1D *mva_b_tr = dynamic_cast<TH1D*> (mvaRes->GetHist("MVA_TRAIN_B"));
3396
3397 if ( !mva_s || !mva_b || !mva_s_tr || !mva_b_tr) return -1;
3398
3399 if (SorB == 's' || SorB == 'S')
3400 return mva_s->KolmogorovTest( mva_s_tr, opt.Data() );
3401 else
3402 return mva_b->KolmogorovTest( mva_b_tr, opt.Data() );
3403 }
3404 return -1;
3405}
const Bool_t Use_Splines_for_Eff_
Definition: MethodBase.cxx:134
const Int_t NBIN_HIST_HIGH
Definition: MethodBase.cxx:137
ROOT::R::TRInterface & r
Definition: Object.C:4
#define d(i)
Definition: RSha256.hxx:102
#define b(i)
Definition: RSha256.hxx:100
#define c(i)
Definition: RSha256.hxx:101
#define s1(x)
Definition: RSha256.hxx:91
#define ROOT_VERSION_CODE
Definition: RVersion.h:21
static RooMathCoreReg dummy
int Int_t
Definition: RtypesCore.h:41
int Ssiz_t
Definition: RtypesCore.h:63
char Char_t
Definition: RtypesCore.h:29
unsigned int UInt_t
Definition: RtypesCore.h:42
const Bool_t kFALSE
Definition: RtypesCore.h:88
bool Bool_t
Definition: RtypesCore.h:59
double Double_t
Definition: RtypesCore.h:55
long long Long64_t
Definition: RtypesCore.h:69
float Float_t
Definition: RtypesCore.h:53
const Bool_t kTRUE
Definition: RtypesCore.h:87
#define ClassImp(name)
Definition: Rtypes.h:365
char name[80]
Definition: TGX11.cxx:109
int type
Definition: TGX11.cxx:120
float xmin
Definition: THbookFile.cxx:93
float xmax
Definition: THbookFile.cxx:93
double sqrt(double)
TMatrixT< Double_t > TMatrixD
Definition: TMatrixDfwd.h:22
char * Form(const char *fmt,...)
R__EXTERN TSystem * gSystem
Definition: TSystem.h:560
#define TMVA_VERSION_CODE
Definition: Version.h:47
Class to manage histogram axis.
Definition: TAxis.h:30
Double_t GetXmax() const
Definition: TAxis.h:134
Double_t GetXmin() const
Definition: TAxis.h:133
virtual Int_t GetSize() const
Return the capacity of the collection, i.e.
Definition: TCollection.h:182
virtual Int_t Write(const char *name=0, Int_t option=0, Int_t bufsize=0)
Write all objects in this collection.
This class stores the date and time with a precision of one second in an unsigned 32 bit word (950130...
Definition: TDatime.h:37
const char * AsString() const
Return the date & time as a string (ctime() format).
Definition: TDatime.cxx:101
TObject * Get(const char *namecycle) override
Return pointer to object identified by namecycle.
Describe directory structure in memory.
Definition: TDirectory.h:34
virtual TDirectory * GetDirectory(const char *namecycle, Bool_t printError=false, const char *funcname="GetDirectory")
Find a directory using apath.
Definition: TDirectory.cxx:400
virtual TDirectory * mkdir(const char *name, const char *title="", Bool_t returnExistingDirectory=kFALSE)
Create a sub-directory "a" or a hierarchy of sub-directories "a/b/c/...".
virtual Bool_t cd(const char *path=nullptr)
Change current directory to "this" directory.
Definition: TDirectory.cxx:497
A ROOT file is a suite of consecutive data records (TKey instances) with a well defined format.
Definition: TFile.h:48
static TFile * Open(const char *name, Option_t *option="", const char *ftitle="", Int_t compress=ROOT::RCompressionSetting::EDefaults::kUseCompiledDefault, Int_t netopt=0)
Create / open a file.
Definition: TFile.cxx:3923
void Close(Option_t *option="") override
Close a file.
Definition: TFile.cxx:856
A Graph is a graphics object made of two arrays X and Y with npoints each.
Definition: TGraph.h:41
1-D histogram with a double per channel (see TH1 documentation)}
Definition: TH1.h:614
1-D histogram with a float per channel (see TH1 documentation)}
Definition: TH1.h:571
The TH1 histogram class.
Definition: TH1.h:56
virtual Double_t GetBinCenter(Int_t bin) const
Return bin center for 1D histogram.
Definition: TH1.cxx:8585
virtual Int_t GetQuantiles(Int_t nprobSum, Double_t *q, const Double_t *probSum=0)
Compute Quantiles for this histogram Quantile x_q of a probability distribution Function F is defined...
Definition: TH1.cxx:4451
virtual void AddBinContent(Int_t bin)
Increment bin content by 1.
Definition: TH1.cxx:1201
virtual Double_t GetMean(Int_t axis=1) const
For axis = 1,2 or 3 returns the mean value of the histogram along X,Y or Z axis.
Definition: TH1.cxx:7074
virtual void SetXTitle(const char *title)
Definition: TH1.h:409
static void AddDirectory(Bool_t add=kTRUE)
Sets the flag controlling the automatic add of histograms in memory.
Definition: TH1.cxx:1226
TAxis * GetXaxis()
Get the behaviour adopted by the object about the statoverflows. See EStatOverflows for more informat...
Definition: TH1.h:316
virtual Double_t GetMaximum(Double_t maxval=FLT_MAX) const
Return maximum value smaller than maxval of bins in the range, unless the value has been overridden b...
Definition: TH1.cxx:7994
virtual Int_t GetNbinsX() const
Definition: TH1.h:292
virtual Int_t Fill(Double_t x)
Increment bin with abscissa X by 1.
Definition: TH1.cxx:3275
virtual void SetBinContent(Int_t bin, Double_t content)
Set bin content see convention for numbering bins in TH1::GetBin In case the bin number is greater th...
Definition: TH1.cxx:8666
virtual Int_t GetMaximumBin() const
Return location of bin with maximum value in the range.
Definition: TH1.cxx:8024
virtual Double_t GetBinContent(Int_t bin) const
Return content of bin number bin.
Definition: TH1.cxx:4899
virtual void SetYTitle(const char *title)
Definition: TH1.h:410
virtual void Scale(Double_t c1=1, Option_t *option="")
Multiply this histogram by a constant c1.
Definition: TH1.cxx:6234
virtual Int_t FindBin(Double_t x, Double_t y=0, Double_t z=0)
Return Global bin number corresponding to x,y,z.
Definition: TH1.cxx:3596
virtual Double_t KolmogorovTest(const TH1 *h2, Option_t *option="") const
Statistical test of compatibility in shape between this histogram and h2, using Kolmogorov test.
Definition: TH1.cxx:7672
virtual void Sumw2(Bool_t flag=kTRUE)
Create structure to store sum of squares of weights.
Definition: TH1.cxx:8464
static Bool_t AddDirectoryStatus()
Static function: cannot be inlined on Windows/NT.
Definition: TH1.cxx:706
2-D histogram with a float per channel (see TH1 documentation)}
Definition: TH2.h:251
Int_t Fill(Double_t)
Invalid Fill method.
Definition: TH2.cxx:292
A doubly linked list.
Definition: TList.h:44
virtual TObject * At(Int_t idx) const
Returns the object at position idx. Returns 0 if idx is out of range.
Definition: TList.cxx:354
Class that contains all the information of a class.
Definition: ClassInfo.h:49
UInt_t GetNumber() const
Definition: ClassInfo.h:65
TString fWeightFileExtension
Definition: Config.h:125
VariablePlotting & GetVariablePlotting()
Definition: Config.h:99
class TMVA::Config::VariablePlotting fVariablePlotting
IONames & GetIONames()
Definition: Config.h:100
MsgLogger * fLogger
Definition: Configurable.h:128
Class that contains all the data information.
Definition: DataSetInfo.h:60
Float_t GetValue(UInt_t ivar) const
return value of i'th variable
Definition: Event.cxx:237
Double_t GetWeight() const
return the event weight - depending on whether the flag IgnoreNegWeightsInTraining is or not.
Definition: Event.cxx:382
static void SetIsTraining(Bool_t)
when this static function is called, it sets the flag whether events with negative event weight shoul...
Definition: Event.cxx:392
Float_t GetTarget(UInt_t itgt) const
Definition: Event.h:102
static void SetIgnoreNegWeightsInTraining(Bool_t)
when this static function is called, it sets the flag whether events with negative event weight shoul...
Definition: Event.cxx:401
Interface for all concrete MVA method implementations.
Definition: IMethod.h:54
void Init(std::vector< TString > &graphTitles)
This function gets some title and it creates a TGraph for every title.
Definition: MethodBase.cxx:174
IPythonInteractive()
standard constructor
Definition: MethodBase.cxx:151
~IPythonInteractive()
standard destructor
Definition: MethodBase.cxx:159
void ClearGraphs()
This function sets the point number to 0 for all graphs.
Definition: MethodBase.cxx:198
void AddPoint(Double_t x, Double_t y1, Double_t y2)
This function is used only in 2 TGraph case, and it will add new data points to graphs.
Definition: MethodBase.cxx:212
Virtual base Class for all MVA method.
Definition: MethodBase.h:111
TDirectory * MethodBaseDir() const
returns the ROOT directory where all instances of the corresponding MVA method are stored
virtual Double_t GetKSTrainingVsTest(Char_t SorB, TString opt="X")
MethodBase(const TString &jobName, Types::EMVA methodType, const TString &methodTitle, DataSetInfo &dsi, const TString &theOption="")
standard constructor
Definition: MethodBase.cxx:242
virtual Double_t GetSeparation(TH1 *, TH1 *) const
compute "separation" defined as
void ReadClassesFromXML(void *clsnode)
read number of classes from XML
void SetWeightFileDir(TString fileDir)
set directory of weight file
void WriteStateToXML(void *parent) const
general method used in writing the header of the weight files where the used variables,...
void DeclareBaseOptions()
define the options (their key words) that can be set in the option string here the options valid for ...
Definition: MethodBase.cxx:514
virtual void TestRegression(Double_t &bias, Double_t &biasT, Double_t &dev, Double_t &devT, Double_t &rms, Double_t &rmsT, Double_t &mInf, Double_t &mInfT, Double_t &corr, Types::ETreeType type)
calculate <sum-of-deviation-squared> of regression output versus "true" value from test sample
Definition: MethodBase.cxx:982
virtual void DeclareCompatibilityOptions()
options that are used ONLY for the READER to ensure backward compatibility they are hence without any...
Definition: MethodBase.cxx:601
virtual Double_t GetSignificance() const
compute significance of mean difference
virtual Double_t GetProba(const Event *ev)
const char * GetName() const
Definition: MethodBase.h:333
virtual TMatrixD GetMulticlassConfusionMatrix(Double_t effB, Types::ETreeType type)
Construct a confusion matrix for a multiclass classifier.
void PrintHelpMessage() const
prints out method-specific help method
virtual void WriteEvaluationHistosToFile(Types::ETreeType treetype)
writes all MVA evaluation histograms to file
virtual void TestMulticlass()
test multiclass classification
const std::vector< TMVA::Event * > & GetEventCollection(Types::ETreeType type)
returns the event collection (i.e.
void SetupMethod()
setup of methods
Definition: MethodBase.cxx:411
TDirectory * BaseDir() const
returns the ROOT directory where info/histograms etc of the corresponding MVA method instance are sto...
virtual std::vector< Float_t > GetMulticlassEfficiency(std::vector< std::vector< Float_t > > &purity)
void AddInfoItem(void *gi, const TString &name, const TString &value) const
xml writing
virtual void AddClassifierOutputProb(Types::ETreeType type)
prepare tree branch with the method's discriminating variable
Definition: MethodBase.cxx:941
virtual Double_t GetEfficiency(const TString &, Types::ETreeType, Double_t &err)
fill background efficiency (resp.
TString GetTrainingTMVAVersionString() const
calculates the TMVA version string from the training version code on the fly
void Statistics(Types::ETreeType treeType, const TString &theVarName, Double_t &, Double_t &, Double_t &, Double_t &, Double_t &, Double_t &)
calculates rms,mean, xmin, xmax of the event variable this can be either done for the variables as th...
Bool_t GetLine(std::istream &fin, char *buf)
reads one line from the input stream checks for certain keywords and interprets the line if keywords ...
void ProcessSetup()
process all options the "CheckForUnusedOptions" is done in an independent call, since it may be overr...
Definition: MethodBase.cxx:428
virtual std::vector< Double_t > GetMvaValues(Long64_t firstEvt=0, Long64_t lastEvt=-1, Bool_t logProgress=false)
get all the MVA values for the events of the current Data type
Definition: MethodBase.cxx:899
virtual Bool_t IsSignalLike()
uses a pre-set cut on the MVA output (SetSignalReferenceCut and SetSignalReferenceCutOrientation) for...
Definition: MethodBase.cxx:859
virtual ~MethodBase()
destructor
Definition: MethodBase.cxx:369
virtual Double_t GetMaximumSignificance(Double_t SignalEvents, Double_t BackgroundEvents, Double_t &optimal_significance_value) const
plot significance, , curve for given number of signal and background events; returns cut for maximum ...
virtual Double_t GetTrainingEfficiency(const TString &)
void SetWeightFileName(TString)
set the weight file name (depreciated)
virtual void MakeClass(const TString &classFileName=TString("")) const
create reader class for method (classification only at present)
TString GetWeightFileName() const
retrieve weight file name
virtual void TestClassification()
initialization
void AddOutput(Types::ETreeType type, Types::EAnalysisType analysisType)
virtual void WriteMonitoringHistosToFile() const
write special monitoring histograms to file dummy implementation here --------------—
virtual void AddRegressionOutput(Types::ETreeType type)
prepare tree branch with the method's discriminating variable
Definition: MethodBase.cxx:749
void InitBase()
default initialization called by all constructors
Definition: MethodBase.cxx:446
virtual void GetRegressionDeviation(UInt_t tgtNum, Types::ETreeType type, Double_t &stddev, Double_t &stddev90Percent) const
Definition: MethodBase.cxx:729
void ReadStateFromXMLString(const char *xmlstr)
for reading from memory
void CreateMVAPdfs()
Create PDFs of the MVA output variables.
TString GetTrainingROOTVersionString() const
calculates the ROOT version string from the training version code on the fly
virtual Double_t GetValueForRoot(Double_t)
returns efficiency as function of cut
void ReadStateFromFile()
Function to write options and weights to file.
void WriteVarsToStream(std::ostream &tf, const TString &prefix="") const
write the list of variables (name, min, max) for a given data transformation method to the stream
void ReadVarsFromStream(std::istream &istr)
Read the variables (name, min, max) for a given data transformation method from the stream.
void ReadSpectatorsFromXML(void *specnode)
read spectator info from XML
virtual Double_t GetMvaValue(Double_t *errLower=0, Double_t *errUpper=0)=0
void SetTestvarName(const TString &v="")
Definition: MethodBase.h:340
void ReadVariablesFromXML(void *varnode)
read variable info from XML
virtual std::map< TString, Double_t > OptimizeTuningParameters(TString fomType="ROCIntegral", TString fitType="FitGA")
call the Optimizer with the set of parameters and ranges that are meant to be tuned.
Definition: MethodBase.cxx:628
virtual std::vector< Float_t > GetMulticlassTrainingEfficiency(std::vector< std::vector< Float_t > > &purity)
void WriteStateToStream(std::ostream &tf) const
general method used in writing the header of the weight files where the used variables,...
virtual Double_t GetRarity(Double_t mvaVal, Types::ESBType reftype=Types::kBackground) const
compute rarity:
virtual void SetTuneParameters(std::map< TString, Double_t > tuneParameters)
set the tuning parameters according to the argument This is just a dummy .
Definition: MethodBase.cxx:649
void ReadStateFromStream(std::istream &tf)
read the header from the weight files of the different MVA methods
void AddVarsXMLTo(void *parent) const
write variable info to XML
void AddTargetsXMLTo(void *parent) const
write target info to XML
void ReadTargetsFromXML(void *tarnode)
read target info from XML
void ProcessBaseOptions()
the option string is decoded, for available options see "DeclareOptions"
Definition: MethodBase.cxx:545
void ReadStateFromXML(void *parent)
void NoErrorCalc(Double_t *const err, Double_t *const errUpper)
Definition: MethodBase.cxx:841
void WriteStateToFile() const
write options and weights to file note that each one text file for the main configuration information...
void AddClassesXMLTo(void *parent) const
write class info to XML
virtual void AddClassifierOutput(Types::ETreeType type)
prepare tree branch with the method's discriminating variable
Definition: MethodBase.cxx:873
void AddSpectatorsXMLTo(void *parent) const
write spectator info to XML
virtual Double_t GetROCIntegral(TH1D *histS, TH1D *histB) const
calculate the area (integral) under the ROC curve as a overall quality measure of the classification
virtual void AddMulticlassOutput(Types::ETreeType type)
prepare tree branch with the method's discriminating variable
Definition: MethodBase.cxx:799
virtual void CheckSetup()
check may be overridden by derived class (sometimes, eg, fitters are used which can only be implement...
Definition: MethodBase.cxx:438
void SetSource(const std::string &source)
Definition: MsgLogger.h:70
PDF wrapper for histograms; uses user-defined spline interpolation.
Definition: PDF.h:63
Double_t GetXmin() const
Definition: PDF.h:104
Double_t GetXmax() const
Definition: PDF.h:105
Double_t GetVal(Double_t x) const
returns value PDF(x)
Definition: PDF.cxx:700
@ kSpline3
Definition: PDF.h:70
@ kSpline2
Definition: PDF.h:70
Double_t GetIntegral(Double_t xmin, Double_t xmax)
computes PDF integral within given ranges
Definition: PDF.cxx:653
Class that is the base-class for a vector of result.
std::vector< Bool_t > * GetValueVectorTypes()
void SetValue(Float_t value, Int_t ievt)
set MVA response
std::vector< Float_t > * GetValueVector()
Class which takes the results of a multiclass classification.
TMatrixD GetConfusionMatrix(Double_t effB)
Returns a confusion matrix where each class is pitted against each other.
Float_t GetAchievablePur(UInt_t cls)
std::vector< Double_t > GetBestMultiClassCuts(UInt_t targetClass)
calculate the best working point (optimal cut values) for the multiclass classifier
void CreateMulticlassHistos(TString prefix, Int_t nbins, Int_t nbins_high)
this function fills the mva response histos for multiclass classification
Float_t GetAchievableEff(UInt_t cls)
void CreateMulticlassPerformanceHistos(TString prefix)
Create performance graphs for this classifier a multiclass setting.
Class that is the base-class for a vector of result.
Class that is the base-class for a vector of result.
Definition: Results.h:57
Bool_t DoesExist(const TString &alias) const
Returns true if there is an object stored in the result for a given alias, false otherwise.
Definition: Results.cxx:127
TH1 * GetHist(const TString &alias) const
Definition: Results.cxx:136
TList * GetStorage() const
Definition: Results.h:73
void Store(TObject *obj, const char *alias=0)
Definition: Results.cxx:86
Root finding using Brents algorithm (translated from CERNLIB function RZERO)
Definition: RootFinder.h:48
Double_t Root(Double_t refValue)
Root finding using Brents algorithm; taken from CERNLIB function RZERO.
Definition: RootFinder.cxx:72
Linear interpolation of TGraph.
Definition: TSpline1.h:43
Timing information for training and evaluation of MVA methods.
Definition: Timer.h:58
Double_t ElapsedSeconds(void)
computes elapsed tim in seconds
Definition: Timer.cxx:131
TString GetElapsedTime(Bool_t Scientific=kTRUE)
returns pretty string with elapsed time
Definition: Timer.cxx:140
void DrawProgressBar(Int_t, const TString &comment="")
draws progress bar in color or B&W caution:
Definition: Timer.cxx:196
void ComputeStat(const std::vector< TMVA::Event * > &, std::vector< Float_t > *, Double_t &, Double_t &, Double_t &, Double_t &, Double_t &, Double_t &, Int_t signalClass, Bool_t norm=kFALSE)
sanity check
Definition: Tools.cxx:214
TList * ParseFormatLine(TString theString, const char *sep=":")
Parse the string and cut into labels separated by ":".
Definition: Tools.cxx:413
void * GetNextChild(void *prevchild, const char *childname=0)
XML helpers.
Definition: Tools.cxx:1174
Double_t GetSeparation(TH1 *S, TH1 *B) const
compute "separation" defined as
Definition: Tools.cxx:133
void * AddChild(void *parent, const char *childname, const char *content=0, bool isRootNode=false)
add child node
Definition: Tools.cxx:1136
Double_t GetMutualInformation(const TH2F &)
Mutual Information method for non-linear correlations estimates in 2D histogram Author: Moritz Backes...
Definition: Tools.cxx:601
const TString & Color(const TString &)
human readable color strings
Definition: Tools.cxx:840
void * GetChild(void *parent, const char *childname=0)
get child node
Definition: Tools.cxx:1162
TXMLEngine & xmlengine()
Definition: Tools.h:270
Bool_t CheckSplines(const TH1 *, const TSpline *)
check quality of splining by comparing splines and histograms in each bin
Definition: Tools.cxx:491
void ReadAttr(void *node, const char *, T &value)
read attribute from xml
Definition: Tools.h:337
void AddAttr(void *node, const char *, const T &value, Int_t precision=16)
add attribute to xml
Definition: Tools.h:355
Double_t NormHist(TH1 *theHist, Double_t norm=1.0)
normalises histogram
Definition: Tools.cxx:395
Singleton class for Global types used by TMVA.
Definition: Types.h:73
@ kSignal
Definition: Types.h:136
@ kBackground
Definition: Types.h:137
@ kLikelihood
Definition: Types.h:81
@ kHMatrix
Definition: Types.h:83
EAnalysisType
Definition: Types.h:127
@ kMulticlass
Definition: Types.h:130
@ kNoAnalysisType
Definition: Types.h:131
@ kClassification
Definition: Types.h:128
@ kMaxAnalysisType
Definition: Types.h:132
@ kRegression
Definition: Types.h:129
@ kTraining
Definition: Types.h:144
@ kTesting
Definition: Types.h:145
Linear interpolation class.
Gaussian Transformation of input variables.
Class for type info of MVA input variable.
Definition: VariableInfo.h:47
void ReadFromXML(void *varnode)
read VariableInfo from stream
const TString & GetExpression() const
Definition: VariableInfo.h:57
char GetVarType() const
Definition: VariableInfo.h:61
void ReadFromStream(std::istream &istr)
read VariableInfo from stream
void AddToXML(void *varnode)
write class to XML
void SetExternalLink(void *p)
Definition: VariableInfo.h:75
void * GetExternalLink() const
Definition: VariableInfo.h:83
void BuildTransformationFromVarInfo(const std::vector< TMVA::VariableInfo > &var)
this method is only used when building a normalization transformation from old text files in this cas...
Linear interpolation class.
Linear interpolation class.
virtual void ReadTransformationFromStream(std::istream &istr, const TString &classname="")=0
A TMultiGraph is a collection of TGraph (or derived) objects.
Definition: TMultiGraph.h:35
virtual const char * GetName() const
Returns name of object.
Definition: TNamed.h:47
Collectable string class.
Definition: TObjString.h:28
virtual Int_t Write(const char *name=0, Int_t option=0, Int_t bufsize=0)
Write this object to the current directory.
Definition: TObject.cxx:785
Basic string class.
Definition: TString.h:131
Ssiz_t Length() const
Definition: TString.h:405
void ToLower()
Change string to lower-case.
Definition: TString.cxx:1125
Int_t Atoi() const
Return integer value of string.
Definition: TString.cxx:1921
Bool_t EndsWith(const char *pat, ECaseCompare cmp=kExact) const
Return true if string ends with the specified string.
Definition: TString.cxx:2177
TSubString Strip(EStripType s=kTrailing, char c=' ') const
Return a substring of self stripped at beginning and/or end.
Definition: TString.cxx:1106
const char * Data() const
Definition: TString.h:364
TString & ReplaceAll(const TString &s1, const TString &s2)
Definition: TString.h:687
@ kLeading
Definition: TString.h:262
Ssiz_t Last(char c) const
Find last occurrence of a character c.
Definition: TString.cxx:892
Bool_t IsNull() const
Definition: TString.h:402
static TString Format(const char *fmt,...)
Static method which formats a string using a printf style format descriptor and return a TString.
Definition: TString.cxx:2311
Ssiz_t Index(const char *pat, Ssiz_t i=0, ECaseCompare cmp=kExact) const
Definition: TString.h:634
virtual const char * GetBuildNode() const
Return the build node name.
Definition: TSystem.cxx:3889
virtual int mkdir(const char *name, Bool_t recursive=kFALSE)
Make a file system directory.
Definition: TSystem.cxx:914
virtual const char * WorkingDirectory()
Return working directory.
Definition: TSystem.cxx:879
virtual UserGroup_t * GetUserInfo(Int_t uid)
Returns all user info in the UserGroup_t structure.
Definition: TSystem.cxx:1589
void SaveDoc(XMLDocPointer_t xmldoc, const char *filename, Int_t layout=1)
store document content to file if layout<=0, no any spaces or newlines will be placed between xmlnode...
void FreeDoc(XMLDocPointer_t xmldoc)
frees allocated document data and deletes document itself
XMLNodePointer_t DocGetRootElement(XMLDocPointer_t xmldoc)
returns root node of document
XMLDocPointer_t NewDoc(const char *version="1.0")
creates new xml document with provided version
XMLDocPointer_t ParseFile(const char *filename, Int_t maxbuf=100000)
Parses content of file and tries to produce xml structures.
XMLDocPointer_t ParseString(const char *xmlstring)
parses content of string and tries to produce xml structures
void DocSetRootElement(XMLDocPointer_t xmldoc, XMLNodePointer_t xmlnode)
set main (root) node for document
TLine * line
Double_t x[n]
Definition: legend1.C:17
TH1F * h1
Definition: legend1.C:5
std::string GetMethodName(TCppMethod_t)
Definition: Cppyy.cxx:757
std::string GetName(const std::string &scope_name)
Definition: Cppyy.cxx:150
void Init(TClassEdit::TInterpreterLookupHelper *helper)
Definition: TClassEdit.cxx:155
static constexpr double s
static constexpr double m2
Config & gConfig()
Tools & gTools()
void CreateVariableTransforms(const TString &trafoDefinition, TMVA::DataSetInfo &dataInfo, TMVA::TransformationHandler &transformationHandler, TMVA::MsgLogger &log)
MsgLogger & Endl(MsgLogger &ml)
Definition: MsgLogger.h:158
Short_t Max(Short_t a, Short_t b)
Definition: TMathBase.h:212
Double_t Log(Double_t x)
Definition: TMath.h:750
Double_t Sqrt(Double_t x)
Definition: TMath.h:681
Short_t Min(Short_t a, Short_t b)
Definition: TMathBase.h:180
Short_t Abs(Short_t d)
Definition: TMathBase.h:120
TString fUser
Definition: TSystem.h:142
auto * a
Definition: textangle.C:12
REAL epsilon
Definition: triangle.c:617