Logo ROOT  
Reference Guide
 
Loading...
Searching...
No Matches
MethodBase.cxx
Go to the documentation of this file.
1// @(#)root/tmva $Id$
2// Author: Andreas Hoecker, Peter Speckmayer, Joerg Stelzer, Helge Voss, Kai Voss, Eckhard von Toerne, Jan Therhaag
3
4/**********************************************************************************
5 * Project: TMVA - a Root-integrated toolkit for multivariate data analysis *
6 * Package: TMVA *
7 * Class : MethodBase *
8 * *
9 * *
10 * Description: *
11 * Implementation (see header for description) *
12 * *
13 * Authors (alphabetical): *
14 * Andreas Hoecker <Andreas.Hocker@cern.ch> - CERN, Switzerland *
15 * Joerg Stelzer <Joerg.Stelzer@cern.ch> - CERN, Switzerland *
16 * Peter Speckmayer <Peter.Speckmayer@cern.ch> - CERN, Switzerland *
17 * Helge Voss <Helge.Voss@cern.ch> - MPI-K Heidelberg, Germany *
18 * Kai Voss <Kai.Voss@cern.ch> - U. of Victoria, Canada *
19 * Jan Therhaag <Jan.Therhaag@cern.ch> - U of Bonn, Germany *
20 * Eckhard v. Toerne <evt@uni-bonn.de> - U of Bonn, Germany *
21 * *
22 * Copyright (c) 2005-2011: *
23 * CERN, Switzerland *
24 * U. of Victoria, Canada *
25 * MPI-K Heidelberg, Germany *
26 * U. of Bonn, Germany *
27 * *
28 * Redistribution and use in source and binary forms, with or without *
29 * modification, are permitted according to the terms listed in LICENSE *
30 * (see tmva/doc/LICENSE) *
31 * *
32 **********************************************************************************/
33
34/*! \class TMVA::MethodBase
35\ingroup TMVA
36
37 Virtual base Class for all MVA method
38
39 MethodBase hosts several specific evaluation methods.
40
41 The kind of MVA that provides optimal performance in an analysis strongly
42 depends on the particular application. The evaluation factory provides a
43 number of numerical benchmark results to directly assess the performance
44 of the MVA training on the independent test sample. These are:
45
46 - The _signal efficiency_ at three representative background efficiencies
47 (which is 1 &minus; rejection).
48 - The _significance_ of an MVA estimator, defined by the difference
49 between the MVA mean values for signal and background, divided by the
50 quadratic sum of their root mean squares.
51 - The _separation_ of an MVA _x_, defined by the integral
52 \f[
53 \frac{1}{2} \int \frac{(S(x) - B(x))^2}{(S(x) + B(x))} dx
54 \f]
55 where
56 \f$ S(x) \f$ and \f$ B(x) \f$ are the signal and background distributions,
57 respectively. The separation is zero for identical signal and background MVA
58 shapes, and it is one for disjunctive shapes.
59 - The average, \f$ \int x \mu (S(x)) dx \f$, of the signal \f$ \mu_{transform} \f$.
60 The \f$ \mu_{transform} \f$ of an MVA denotes the transformation that yields
61 a uniform background distribution. In this way, the signal distributions
62 \f$ S(x) \f$ can be directly compared among the various MVAs. The stronger
63 \f$ S(x) \f$ peaks towards one, the better is the discrimination of the MVA.
64 The \f$ \mu_{transform} \f$ is
65 [documented here](http://tel.ccsd.cnrs.fr/documents/archives0/00/00/29/91/index_fr.html).
66
67 The MVA standard output also prints the linear correlation coefficients between
68 signal and background, which can be useful to eliminate variables that exhibit too
69 strong correlations.
70*/
71
72#include "TMVA/MethodBase.h"
73
74#include "TMVA/Config.h"
75#include "TMVA/Configurable.h"
76#include "TMVA/DataSetInfo.h"
77#include "TMVA/DataSet.h"
78#include "TMVA/Factory.h"
79#include "TMVA/IMethod.h"
80#include "TMVA/MsgLogger.h"
81#include "TMVA/PDF.h"
82#include "TMVA/Ranking.h"
83#include "TMVA/DataLoader.h"
84#include "TMVA/Tools.h"
85#include "TMVA/Results.h"
89#include "TMVA/RootFinder.h"
90#include "TMVA/Timer.h"
91#include "TMVA/TSpline1.h"
92#include "TMVA/Types.h"
96#include "TMVA/VariableInfo.h"
100#include "TMVA/Version.h"
101
102#include "TROOT.h"
103#include "TSystem.h"
104#include "TObjString.h"
105#include "TQObject.h"
106#include "TSpline.h"
107#include "TMatrix.h"
108#include "TMath.h"
109#include "TH1F.h"
110#include "TH2F.h"
111#include "TFile.h"
112#include "TGraph.h"
113#include "TXMLEngine.h"
114
115#include <iomanip>
116#include <iostream>
117#include <fstream>
118#include <sstream>
119#include <cstdlib>
120#include <algorithm>
121#include <limits>
122
123
124
125using std::endl;
126using std::atof;
127
128//const Int_t MethodBase_MaxIterations_ = 200;
130
131//const Int_t NBIN_HIST_PLOT = 100;
132const Int_t NBIN_HIST_HIGH = 10000;
133
134#ifdef _WIN32
135/* Disable warning C4355: 'this' : used in base member initializer list */
136#pragma warning ( disable : 4355 )
137#endif
138
139
140#include "TMultiGraph.h"
141
142////////////////////////////////////////////////////////////////////////////////
143/// standard constructor
144
146{
147 fNumGraphs = 0;
148 fIndex = 0;
149}
150
151////////////////////////////////////////////////////////////////////////////////
152/// standard destructor
154{
155 if (fMultiGraph){
156 delete fMultiGraph;
157 fMultiGraph = nullptr;
158 }
159 return;
160}
161
162////////////////////////////////////////////////////////////////////////////////
163/// This function gets some title and it creates a TGraph for every title.
164/// It also sets up the style for every TGraph. All graphs are added to a single TMultiGraph.
165///
166/// \param[in] graphTitles vector of titles
167
169{
170 if (fNumGraphs!=0){
171 std::cerr << kERROR << "IPythonInteractive::Init: already initialized..." << std::endl;
172 return;
173 }
174 Int_t color = 2;
175 for(auto& title : graphTitles){
176 fGraphs.push_back( new TGraph() );
177 fGraphs.back()->SetTitle(title);
178 fGraphs.back()->SetName(title);
179 fGraphs.back()->SetFillColor(color);
180 fGraphs.back()->SetLineColor(color);
181 fGraphs.back()->SetMarkerColor(color);
182 fMultiGraph->Add(fGraphs.back());
183 color += 2;
184 fNumGraphs += 1;
185 }
186 return;
187}
188
189////////////////////////////////////////////////////////////////////////////////
190/// This function sets the point number to 0 for all graphs.
191
193{
194 for(Int_t i=0; i<fNumGraphs; i++){
195 fGraphs[i]->Set(0);
196 }
197}
198
199////////////////////////////////////////////////////////////////////////////////
200/// This function is used only in 2 TGraph case, and it will add new data points to graphs.
201///
202/// \param[in] x the x coordinate
203/// \param[in] y1 the y coordinate for the first TGraph
204/// \param[in] y2 the y coordinate for the second TGraph
205
207{
208 fGraphs[0]->Set(fIndex+1);
209 fGraphs[1]->Set(fIndex+1);
210 fGraphs[0]->SetPoint(fIndex, x, y1);
211 fGraphs[1]->SetPoint(fIndex, x, y2);
212 fIndex++;
213 return;
214}
215
216////////////////////////////////////////////////////////////////////////////////
217/// This function can add data points to as many TGraphs as we have.
218///
219/// \param[in] dat vector of data points. The dat[0] contains the x coordinate,
220/// dat[1] contains the y coordinate for first TGraph, dat[2] for second, ...
221
222void TMVA::IPythonInteractive::AddPoint(std::vector<Double_t>& dat)
223{
224 for(Int_t i=0; i<fNumGraphs;i++){
225 fGraphs[i]->Set(fIndex+1);
226 fGraphs[i]->SetPoint(fIndex, dat[0], dat[i+1]);
227 }
228 fIndex++;
229 return;
230}
231
232
233////////////////////////////////////////////////////////////////////////////////
234/// standard constructor
235
238 const TString& methodTitle,
240 const TString& theOption) :
241 IMethod(),
243 fTmpEvent ( 0 ),
244 fRanking ( 0 ),
245 fInputVars ( 0 ),
246 fAnalysisType ( Types::kNoAnalysisType ),
247 fRegressionReturnVal ( 0 ),
248 fMulticlassReturnVal ( 0 ),
249 fDataSetInfo ( dsi ),
250 fSignalReferenceCut ( 0.5 ),
251 fSignalReferenceCutOrientation( 1. ),
252 fVariableTransformType ( Types::kSignal ),
253 fJobName ( jobName ),
254 fMethodName ( methodTitle ),
255 fMethodType ( methodType ),
256 fTestvar ( "" ),
257 fTMVATrainingVersion ( TMVA_VERSION_CODE ),
258 fROOTTrainingVersion ( ROOT_VERSION_CODE ),
259 fConstructedFromWeightFile ( kFALSE ),
260 fBaseDir ( 0 ),
261 fMethodBaseDir ( 0 ),
262 fFile ( 0 ),
263 fSilentFile (kFALSE),
264 fModelPersistence (kTRUE),
265 fWeightFile ( "" ),
266 fEffS ( 0 ),
267 fDefaultPDF ( 0 ),
268 fMVAPdfS ( 0 ),
269 fMVAPdfB ( 0 ),
270 fSplS ( 0 ),
271 fSplB ( 0 ),
272 fSpleffBvsS ( 0 ),
273 fSplTrainS ( 0 ),
274 fSplTrainB ( 0 ),
275 fSplTrainEffBvsS ( 0 ),
276 fVarTransformString ( "None" ),
277 fTransformationPointer ( 0 ),
278 fTransformation ( dsi, methodTitle ),
279 fVerbose ( kFALSE ),
280 fVerbosityLevelString ( "Default" ),
281 fHelp ( kFALSE ),
282 fHasMVAPdfs ( kFALSE ),
283 fIgnoreNegWeightsInTraining( kFALSE ),
284 fSignalClass ( 0 ),
285 fBackgroundClass ( 0 ),
286 fSplRefS ( 0 ),
287 fSplRefB ( 0 ),
288 fSplTrainRefS ( 0 ),
289 fSplTrainRefB ( 0 ),
290 fSetupCompleted (kFALSE)
291{
294
295// // default extension for weight files
296}
297
298////////////////////////////////////////////////////////////////////////////////
299/// constructor used for Testing + Application of the MVA,
300/// only (no training), using given WeightFiles
301
304 const TString& weightFile ) :
305 IMethod(),
306 Configurable(""),
307 fTmpEvent ( 0 ),
308 fRanking ( 0 ),
309 fInputVars ( 0 ),
310 fAnalysisType ( Types::kNoAnalysisType ),
311 fRegressionReturnVal ( 0 ),
312 fMulticlassReturnVal ( 0 ),
313 fDataSetInfo ( dsi ),
314 fSignalReferenceCut ( 0.5 ),
315 fVariableTransformType ( Types::kSignal ),
316 fJobName ( "" ),
317 fMethodName ( "MethodBase" ),
318 fMethodType ( methodType ),
319 fTestvar ( "" ),
320 fTMVATrainingVersion ( 0 ),
321 fROOTTrainingVersion ( 0 ),
322 fConstructedFromWeightFile ( kTRUE ),
323 fBaseDir ( 0 ),
324 fMethodBaseDir ( 0 ),
325 fFile ( 0 ),
326 fSilentFile (kFALSE),
327 fModelPersistence (kTRUE),
328 fWeightFile ( weightFile ),
329 fEffS ( 0 ),
330 fDefaultPDF ( 0 ),
331 fMVAPdfS ( 0 ),
332 fMVAPdfB ( 0 ),
333 fSplS ( 0 ),
334 fSplB ( 0 ),
335 fSpleffBvsS ( 0 ),
336 fSplTrainS ( 0 ),
337 fSplTrainB ( 0 ),
338 fSplTrainEffBvsS ( 0 ),
339 fVarTransformString ( "None" ),
340 fTransformationPointer ( 0 ),
341 fTransformation ( dsi, "" ),
342 fVerbose ( kFALSE ),
343 fVerbosityLevelString ( "Default" ),
344 fHelp ( kFALSE ),
345 fHasMVAPdfs ( kFALSE ),
346 fIgnoreNegWeightsInTraining( kFALSE ),
347 fSignalClass ( 0 ),
348 fBackgroundClass ( 0 ),
349 fSplRefS ( 0 ),
350 fSplRefB ( 0 ),
351 fSplTrainRefS ( 0 ),
352 fSplTrainRefB ( 0 ),
353 fSetupCompleted (kFALSE)
354{
356// // constructor used for Testing + Application of the MVA,
357// // only (no training), using given WeightFiles
358}
359
360////////////////////////////////////////////////////////////////////////////////
361/// destructor
362
364{
365 // destructor
366 if (!fSetupCompleted) Log() << kWARNING <<Form("Dataset[%s] : ",DataInfo().GetName())<< "Calling destructor of method which got never setup" << Endl;
367
368 // destructor
369 if (fInputVars != 0) { fInputVars->clear(); delete fInputVars; }
370 if (fRanking != 0) delete fRanking;
371
372 // PDFs
373 if (fDefaultPDF!= 0) { delete fDefaultPDF; fDefaultPDF = 0; }
374 if (fMVAPdfS != 0) { delete fMVAPdfS; fMVAPdfS = 0; }
375 if (fMVAPdfB != 0) { delete fMVAPdfB; fMVAPdfB = 0; }
376
377 // Splines
378 if (fSplS) { delete fSplS; fSplS = 0; }
379 if (fSplB) { delete fSplB; fSplB = 0; }
380 if (fSpleffBvsS) { delete fSpleffBvsS; fSpleffBvsS = 0; }
381 if (fSplRefS) { delete fSplRefS; fSplRefS = 0; }
382 if (fSplRefB) { delete fSplRefB; fSplRefB = 0; }
383 if (fSplTrainRefS) { delete fSplTrainRefS; fSplTrainRefS = 0; }
384 if (fSplTrainRefB) { delete fSplTrainRefB; fSplTrainRefB = 0; }
385 if (fSplTrainEffBvsS) { delete fSplTrainEffBvsS; fSplTrainEffBvsS = 0; }
386
387 for (size_t i = 0; i < fEventCollections.size(); i++ ) {
388 if (fEventCollections.at(i)) {
389 for (std::vector<Event*>::const_iterator it = fEventCollections.at(i)->begin();
390 it != fEventCollections.at(i)->end(); ++it) {
391 delete (*it);
392 }
393 delete fEventCollections.at(i);
394 fEventCollections.at(i) = nullptr;
395 }
396 }
397
398 if (fRegressionReturnVal) delete fRegressionReturnVal;
399 if (fMulticlassReturnVal) delete fMulticlassReturnVal;
400}
401
402////////////////////////////////////////////////////////////////////////////////
403/// setup of methods
404
406{
407 // setup of methods
408
409 if (fSetupCompleted) Log() << kFATAL <<Form("Dataset[%s] : ",DataInfo().GetName())<< "Calling SetupMethod for the second time" << Endl;
410 InitBase();
411 DeclareBaseOptions();
412 Init();
413 DeclareOptions();
414 fSetupCompleted = kTRUE;
415}
416
417////////////////////////////////////////////////////////////////////////////////
418/// process all options
419/// the "CheckForUnusedOptions" is done in an independent call, since it may be overridden by derived class
420/// (sometimes, eg, fitters are used which can only be implemented during training phase)
421
423{
424 ProcessBaseOptions();
425 ProcessOptions();
426}
427
428////////////////////////////////////////////////////////////////////////////////
429/// check may be overridden by derived class
430/// (sometimes, eg, fitters are used which can only be implemented during training phase)
431
433{
434 CheckForUnusedOptions();
435}
436
437////////////////////////////////////////////////////////////////////////////////
438/// default initialization called by all constructors
439
441{
442 SetConfigDescription( "Configuration options for classifier architecture and tuning" );
443
444 fNbins = gConfig().fVariablePlotting.fNbinsXOfROCCurve;
445 fNbinsMVAoutput = gConfig().fVariablePlotting.fNbinsMVAoutput;
446 fNbinsH = NBIN_HIST_HIGH;
447
448 fSplTrainS = 0;
449 fSplTrainB = 0;
450 fSplTrainEffBvsS = 0;
451 fMeanS = -1;
452 fMeanB = -1;
453 fRmsS = -1;
454 fRmsB = -1;
455 fXmin = DBL_MAX;
456 fXmax = -DBL_MAX;
457 fTxtWeightsOnly = kTRUE;
458 fSplRefS = 0;
459 fSplRefB = 0;
460
461 fTrainTime = -1.;
462 fTestTime = -1.;
463
464 fRanking = 0;
465
466 // temporary until the move to DataSet is complete
467 fInputVars = new std::vector<TString>;
468 for (UInt_t ivar=0; ivar<GetNvar(); ivar++) {
469 fInputVars->push_back(DataInfo().GetVariableInfo(ivar).GetLabel());
470 }
471 fRegressionReturnVal = 0;
472 fMulticlassReturnVal = 0;
473
474 fEventCollections.resize( 2 );
475 fEventCollections.at(0) = 0;
476 fEventCollections.at(1) = 0;
477
478 // retrieve signal and background class index
479 if (DataInfo().GetClassInfo("Signal") != 0) {
480 fSignalClass = DataInfo().GetClassInfo("Signal")->GetNumber();
481 }
482 if (DataInfo().GetClassInfo("Background") != 0) {
483 fBackgroundClass = DataInfo().GetClassInfo("Background")->GetNumber();
484 }
485
486 SetConfigDescription( "Configuration options for MVA method" );
487 SetConfigName( TString("Method") + GetMethodTypeName() );
488}
489
490////////////////////////////////////////////////////////////////////////////////
491/// define the options (their key words) that can be set in the option string
492/// here the options valid for ALL MVA methods are declared.
493///
494/// know options:
495///
496/// - VariableTransform=None,Decorrelated,PCA to use transformed variables
497/// instead of the original ones
498/// - VariableTransformType=Signal,Background which decorrelation matrix to use
499/// in the method. Only the Likelihood
500/// Method can make proper use of independent
501/// transformations of signal and background
502/// - fNbinsMVAPdf = 50 Number of bins used to create a PDF of MVA
503/// - fNsmoothMVAPdf = 2 Number of times a histogram is smoothed before creating the PDF
504/// - fHasMVAPdfs create PDFs for the MVA outputs
505/// - V for Verbose output (!V) for non verbos
506/// - H for Help message
507
509{
510 DeclareOptionRef( fVerbose, "V", "Verbose output (short form of \"VerbosityLevel\" below - overrides the latter one)" );
511
512 DeclareOptionRef( fVerbosityLevelString="Default", "VerbosityLevel", "Verbosity level" );
513 AddPreDefVal( TString("Default") ); // uses default defined in MsgLogger header
514 AddPreDefVal( TString("Debug") );
515 AddPreDefVal( TString("Verbose") );
516 AddPreDefVal( TString("Info") );
517 AddPreDefVal( TString("Warning") );
518 AddPreDefVal( TString("Error") );
519 AddPreDefVal( TString("Fatal") );
520
521 // If True (default): write all training results (weights) as text files only;
522 // if False: write also in ROOT format (not available for all methods - will abort if not
523 fTxtWeightsOnly = kTRUE; // OBSOLETE !!!
524 fNormalise = kFALSE; // OBSOLETE !!!
525
526 DeclareOptionRef( fVarTransformString, "VarTransform", "List of variable transformations performed before training, e.g., \"D_Background,P_Signal,G,N_AllClasses\" for: \"Decorrelation, PCA-transformation, Gaussianisation, Normalisation, each for the given class of events ('AllClasses' denotes all events of all classes, if no class indication is given, 'All' is assumed)\"" );
527
528 DeclareOptionRef( fHelp, "H", "Print method-specific help message" );
529
530 DeclareOptionRef( fHasMVAPdfs, "CreateMVAPdfs", "Create PDFs for classifier outputs (signal and background)" );
531
532 DeclareOptionRef( fIgnoreNegWeightsInTraining, "IgnoreNegWeightsInTraining",
533 "Events with negative weights are ignored in the training (but are included for testing and performance evaluation)" );
534}
535
536////////////////////////////////////////////////////////////////////////////////
537/// the option string is decoded, for available options see "DeclareOptions"
538
540{
541 if (HasMVAPdfs()) {
542 // setting the default bin num... maybe should be static ? ==> Please no static (JS)
543 // You can't use the logger in the constructor!!! Log() << kINFO << "Create PDFs" << Endl;
544 // reading every PDF's definition and passing the option string to the next one to be read and marked
545 fDefaultPDF = new PDF( TString(GetName())+"_PDF", GetOptions(), "MVAPdf" );
546 fDefaultPDF->DeclareOptions();
547 fDefaultPDF->ParseOptions();
548 fDefaultPDF->ProcessOptions();
549 fMVAPdfB = new PDF( TString(GetName())+"_PDFBkg", fDefaultPDF->GetOptions(), "MVAPdfBkg", fDefaultPDF );
550 fMVAPdfB->DeclareOptions();
551 fMVAPdfB->ParseOptions();
552 fMVAPdfB->ProcessOptions();
553 fMVAPdfS = new PDF( TString(GetName())+"_PDFSig", fMVAPdfB->GetOptions(), "MVAPdfSig", fDefaultPDF );
554 fMVAPdfS->DeclareOptions();
555 fMVAPdfS->ParseOptions();
556 fMVAPdfS->ProcessOptions();
557
558 // the final marked option string is written back to the original methodbase
559 SetOptions( fMVAPdfS->GetOptions() );
560 }
561
562 TMVA::CreateVariableTransforms( fVarTransformString,
563 DataInfo(),
564 GetTransformationHandler(),
565 Log() );
566
567 if (!HasMVAPdfs()) {
568 if (fDefaultPDF!= 0) { delete fDefaultPDF; fDefaultPDF = 0; }
569 if (fMVAPdfS != 0) { delete fMVAPdfS; fMVAPdfS = 0; }
570 if (fMVAPdfB != 0) { delete fMVAPdfB; fMVAPdfB = 0; }
571 }
572
573 if (fVerbose) { // overwrites other settings
574 fVerbosityLevelString = TString("Verbose");
575 Log().SetMinType( kVERBOSE );
576 }
577 else if (fVerbosityLevelString == "Debug" ) Log().SetMinType( kDEBUG );
578 else if (fVerbosityLevelString == "Verbose" ) Log().SetMinType( kVERBOSE );
579 else if (fVerbosityLevelString == "Info" ) Log().SetMinType( kINFO );
580 else if (fVerbosityLevelString == "Warning" ) Log().SetMinType( kWARNING );
581 else if (fVerbosityLevelString == "Error" ) Log().SetMinType( kERROR );
582 else if (fVerbosityLevelString == "Fatal" ) Log().SetMinType( kFATAL );
583 else if (fVerbosityLevelString != "Default" ) {
584 Log() << kFATAL << "<ProcessOptions> Verbosity level type '"
585 << fVerbosityLevelString << "' unknown." << Endl;
586 }
587 Event::SetIgnoreNegWeightsInTraining(fIgnoreNegWeightsInTraining);
588}
589
590////////////////////////////////////////////////////////////////////////////////
591/// options that are used ONLY for the READER to ensure backward compatibility
592/// they are hence without any effect (the reader is only reading the training
593/// options that HAD been used at the training of the .xml weight file at hand
594
596{
597 DeclareOptionRef( fNormalise=kFALSE, "Normalise", "Normalise input variables" ); // don't change the default !!!
598 DeclareOptionRef( fUseDecorr=kFALSE, "D", "Use-decorrelated-variables flag" );
599 DeclareOptionRef( fVariableTransformTypeString="Signal", "VarTransformType",
600 "Use signal or background events to derive for variable transformation (the transformation is applied on both types of, course)" );
601 AddPreDefVal( TString("Signal") );
602 AddPreDefVal( TString("Background") );
603 DeclareOptionRef( fTxtWeightsOnly=kTRUE, "TxtWeightFilesOnly", "If True: write all training results (weights) as text files (False: some are written in ROOT format)" );
604 // Why on earth ?? was this here? Was the verbosity level option meant to 'disappear? Not a good idea i think..
605 // DeclareOptionRef( fVerbosityLevelString="Default", "VerboseLevel", "Verbosity level" );
606 // AddPreDefVal( TString("Default") ); // uses default defined in MsgLogger header
607 // AddPreDefVal( TString("Debug") );
608 // AddPreDefVal( TString("Verbose") );
609 // AddPreDefVal( TString("Info") );
610 // AddPreDefVal( TString("Warning") );
611 // AddPreDefVal( TString("Error") );
612 // AddPreDefVal( TString("Fatal") );
613 DeclareOptionRef( fNbinsMVAPdf = 60, "NbinsMVAPdf", "Number of bins used for the PDFs of classifier outputs" );
614 DeclareOptionRef( fNsmoothMVAPdf = 2, "NsmoothMVAPdf", "Number of smoothing iterations for classifier PDFs" );
615}
616
617
618////////////////////////////////////////////////////////////////////////////////
619/// call the Optimizer with the set of parameters and ranges that
620/// are meant to be tuned.
621
622std::map<TString,Double_t> TMVA::MethodBase::OptimizeTuningParameters(TString /* fomType */ , TString /* fitType */)
623{
624 // this is just a dummy... needs to be implemented for each method
625 // individually (as long as we don't have it automatized via the
626 // configuration string
627
628 Log() << kWARNING <<Form("Dataset[%s] : ",DataInfo().GetName())<< "Parameter optimization is not yet implemented for method "
629 << GetName() << Endl;
630 Log() << kWARNING <<Form("Dataset[%s] : ",DataInfo().GetName())<< "Currently we need to set hardcoded which parameter is tuned in which ranges"<<Endl;
631
632 std::map<TString,Double_t> tunedParameters;
633 tunedParameters.size(); // just to get rid of "unused" warning
634 return tunedParameters;
635
636}
637
638////////////////////////////////////////////////////////////////////////////////
639/// set the tuning parameters according to the argument
640/// This is just a dummy .. have a look at the MethodBDT how you could
641/// perhaps implement the same thing for the other Classifiers..
642
643void TMVA::MethodBase::SetTuneParameters(std::map<TString,Double_t> /* tuneParameters */)
644{
645}
646
647////////////////////////////////////////////////////////////////////////////////
648
650{
651 Data()->SetCurrentType(Types::kTraining);
652 Event::SetIsTraining(kTRUE); // used to set negative event weights to zero if chosen to do so
653
654 // train the MVA method
655 if (Help()) PrintHelpMessage();
656
657 // all histograms should be created in the method's subdirectory
658 if(!IsSilentFile()) BaseDir()->cd();
659
660 // once calculate all the transformation (e.g. the sequence of Decorr:Gauss:Decorr)
661 // needed for this classifier
662 GetTransformationHandler().CalcTransformations(Data()->GetEventCollection());
663
664 // call training of derived MVA
665 Log() << kDEBUG //<<Form("\tDataset[%s] : ",DataInfo().GetName())
666 << "Begin training" << Endl;
667 Long64_t nEvents = Data()->GetNEvents();
668 Timer traintimer( nEvents, GetName(), kTRUE );
669 Train();
670 Log() << kDEBUG //<<Form("Dataset[%s] : ",DataInfo().GetName()
671 << "\tEnd of training " << Endl;
672 SetTrainTime(traintimer.ElapsedSeconds());
673 Log() << kINFO //<<Form("Dataset[%s] : ",DataInfo().GetName())
674 << "Elapsed time for training with " << nEvents << " events: "
675 << traintimer.GetElapsedTime() << " " << Endl;
676
677 Log() << kDEBUG //<<Form("Dataset[%s] : ",DataInfo().GetName())
678 << "\tCreate MVA output for ";
679
680 // create PDFs for the signal and background MVA distributions (if required)
681 if (DoMulticlass()) {
682 Log() <<Form("[%s] : ",DataInfo().GetName())<< "Multiclass classification on training sample" << Endl;
683 AddMulticlassOutput(Types::kTraining);
684 }
685 else if (!DoRegression()) {
686
687 Log() <<Form("[%s] : ",DataInfo().GetName())<< "classification on training sample" << Endl;
688 AddClassifierOutput(Types::kTraining);
689 if (HasMVAPdfs()) {
690 CreateMVAPdfs();
691 AddClassifierOutputProb(Types::kTraining);
692 }
693
694 } else {
695
696 Log() <<Form("Dataset[%s] : ",DataInfo().GetName())<< "regression on training sample" << Endl;
697 AddRegressionOutput( Types::kTraining );
698
699 if (HasMVAPdfs() ) {
700 Log() <<Form("Dataset[%s] : ",DataInfo().GetName())<< "Create PDFs" << Endl;
701 CreateMVAPdfs();
702 }
703 }
704
705 // write the current MVA state into stream
706 // produced are one text file and one ROOT file
707 if (fModelPersistence ) WriteStateToFile();
708
709 // produce standalone make class (presently only supported for classification)
710 if ((!DoRegression()) && (fModelPersistence)) MakeClass();
711
712 // write additional monitoring histograms to main target file (not the weight file)
713 // again, make sure the histograms go into the method's subdirectory
714 if(!IsSilentFile())
715 {
716 BaseDir()->cd();
717 WriteMonitoringHistosToFile();
718 }
719}
720
721////////////////////////////////////////////////////////////////////////////////
722
724{
725 if (!DoRegression()) Log() << kFATAL <<Form("Dataset[%s] : ",DataInfo().GetName())<< "Trying to use GetRegressionDeviation() with a classification job" << Endl;
726 Log() << kINFO <<Form("Dataset[%s] : ",DataInfo().GetName())<< "Create results for " << (type==Types::kTraining?"training":"testing") << Endl;
727 ResultsRegression* regRes = (ResultsRegression*)Data()->GetResults(GetMethodName(), Types::kTesting, Types::kRegression);
728 bool truncate = false;
729 TH1F* h1 = regRes->QuadraticDeviation( tgtNum , truncate, 1.);
730 stddev = sqrt(h1->GetMean());
731 truncate = true;
732 Double_t yq[1], xq[]={0.9};
733 h1->GetQuantiles(1,yq,xq);
734 TH1F* h2 = regRes->QuadraticDeviation( tgtNum , truncate, yq[0]);
735 stddev90Percent = sqrt(h2->GetMean());
736 delete h1;
737 delete h2;
738}
739
740////////////////////////////////////////////////////////////////////////////////
741/// Get al regression values in one call
743{
744 Long64_t nEvents = Data()->GetNEvents();
745 // use timer
746 Timer timer( nEvents, GetName(), kTRUE );
747
748 // Drawing the progress bar every event was causing a huge slowdown in the evaluation time
749 // So we set some parameters to draw the progress bar a total of totalProgressDraws, i.e. only draw every 1 in 100
750
751 Int_t totalProgressDraws = 100; // total number of times to update the progress bar
752 Int_t drawProgressEvery = 1; // draw every nth event such that we have a total of totalProgressDraws
754
755 size_t ntargets = Data()->GetEvent(0)->GetNTargets();
756 std::vector<float> output(nEvents*ntargets);
757 auto itr = output.begin();
758 for (Int_t ievt=0; ievt<nEvents; ievt++) {
759
760 Data()->SetCurrentEvent(ievt);
761 std::vector< Float_t > vals = GetRegressionValues();
762 if (vals.size() != ntargets)
763 Log() << kFATAL << "Output regression vector with size " << vals.size() << " is not consistent with target size of "
764 << ntargets << std::endl;
765
766 std::copy(vals.begin(), vals.end(), itr);
767 itr += vals.size();
768
769 // Only draw the progress bar once in a while, doing this every event causes the evaluation to be ridiculously slow
770 if(ievt % drawProgressEvery == 0 || ievt==nEvents-1) timer.DrawProgressBar( ievt );
771 }
772
773 return output;
774}
775
776////////////////////////////////////////////////////////////////////////////////
777/// prepare tree branch with the method's discriminating variable
778
780{
781 Data()->SetCurrentType(type);
782
783 Log() << kINFO <<Form("Dataset[%s] : ",DataInfo().GetName())<< "Create results for " << (type==Types::kTraining?"training":"testing") << Endl;
784
785 ResultsRegression* regRes = (ResultsRegression*)Data()->GetResults(GetMethodName(), type, Types::kRegression);
786
787 Long64_t nEvents = Data()->GetNEvents();
788
789 // use timer
790 Timer timer( nEvents, GetName(), kTRUE );
791
792 Log() << kINFO <<Form("Dataset[%s] : ",DataInfo().GetName()) << "Evaluation of " << GetMethodName() << " on "
793 << (type==Types::kTraining?"training":"testing") << " sample" << Endl;
794
795 regRes->Resize( nEvents );
796
797 std::vector<float> output = GetAllRegressionValues();
798 // assume we have all number of targets for all events
799 Data()->SetCurrentEvent(0);
800 size_t nTargets = GetEvent()->GetNTargets();
801 auto regValuesBegin = output.begin();
803
804 if (output.size() != nTargets * size_t(nEvents))
805 Log() << kFATAL << "Output regression vector with size " << output.size() << " is not consistent with target size of "
806 << nTargets << " and number of events " << nEvents << std::endl;
807
808
809 for (Int_t ievt=0; ievt<nEvents; ievt++) {
810
811 std::vector< Float_t > vals(regValuesBegin, regValuesEnd);
812 regRes->SetValue( vals, ievt );
813
816 }
817
818 Log() << kINFO <<Form("Dataset[%s] : ",DataInfo().GetName())
819 << "Elapsed time for evaluation of " << nEvents << " events: "
820 << timer.GetElapsedTime() << " " << Endl;
821
822 // store time used for testing
824 SetTestTime(timer.ElapsedSeconds());
825
826 TString histNamePrefix(GetTestvarName());
827 histNamePrefix += (type==Types::kTraining?"train":"test");
828 regRes->CreateDeviationHistograms( histNamePrefix );
829}
830////////////////////////////////////////////////////////////////////////////////
831/// Get all multi-class values
833{
834 // use timer for progress bar
835
836 Long64_t nEvents = Data()->GetNEvents();
837 Timer timer( nEvents, GetName(), kTRUE );
838
839 Int_t modulo = Int_t(nEvents/100) + 1;
840 // call first time to get number of classes
841 Data()->SetCurrentEvent(0);
842 std::vector< Float_t > vals = GetMulticlassValues();
843 std::vector<float> output(nEvents * vals.size());
844 auto itr = output.begin();
845 std::copy(vals.begin(), vals.end(), itr);
846 for (Int_t ievt=1; ievt<nEvents; ievt++) {
847 itr += vals.size();
848 Data()->SetCurrentEvent(ievt);
849 vals = GetMulticlassValues();
850
851 std::copy(vals.begin(), vals.end(), itr);
852
853 if (ievt%modulo == 0) timer.DrawProgressBar( ievt );
854 }
855 return output;
856}
857////////////////////////////////////////////////////////////////////////////////
858/// prepare tree branch with the method's discriminating variable
859
861{
862 Data()->SetCurrentType(type);
863
864 Log() << kINFO <<Form("Dataset[%s] : ",DataInfo().GetName())<< "Create results for " << (type==Types::kTraining?"training":"testing") << Endl;
865
866 ResultsMulticlass* resMulticlass = dynamic_cast<ResultsMulticlass*>(Data()->GetResults(GetMethodName(), type, Types::kMulticlass));
867 if (!resMulticlass) Log() << kFATAL<<Form("Dataset[%s] : ",DataInfo().GetName())<< "unable to create pointer in AddMulticlassOutput, exiting."<<Endl;
868
869 Long64_t nEvents = Data()->GetNEvents();
870
871 // use timer
872 Timer timer( nEvents, GetName(), kTRUE );
873
874 Log() << kINFO <<Form("Dataset[%s] : ",DataInfo().GetName())<< "Multiclass evaluation of " << GetMethodName() << " on "
875 << (type==Types::kTraining?"training":"testing") << " sample" << Endl;
876
877 resMulticlass->Resize( nEvents );
878 std::vector<Float_t> output = GetAllMulticlassValues();
879 size_t nClasses = output.size()/nEvents;
880 for (Int_t ievt=0; ievt<nEvents; ievt++) {
881 std::vector< Float_t > vals(output.begin()+ievt*nClasses, output.begin()+(ievt+1)*nClasses);
882 resMulticlass->SetValue( vals, ievt );
883 }
884
885
886 Log() << kINFO <<Form("Dataset[%s] : ",DataInfo().GetName())
887 << "Elapsed time for evaluation of " << nEvents << " events: "
888 << timer.GetElapsedTime() << " " << Endl;
889
890 // store time used for testing
892 SetTestTime(timer.ElapsedSeconds());
893
894 TString histNamePrefix(GetTestvarName());
895 histNamePrefix += (type==Types::kTraining?"_Train":"_Test");
896
897 resMulticlass->CreateMulticlassHistos( histNamePrefix, fNbinsMVAoutput, fNbinsH );
898 resMulticlass->CreateMulticlassPerformanceHistos(histNamePrefix);
899}
900
901////////////////////////////////////////////////////////////////////////////////
902
904 if (err) *err=-1;
905 if (errUpper) *errUpper=-1;
906}
907
908////////////////////////////////////////////////////////////////////////////////
909
911 fTmpEvent = ev;
912 Double_t val = GetMvaValue(err, errUpper);
913 fTmpEvent = 0;
914 return val;
915}
916
917////////////////////////////////////////////////////////////////////////////////
918/// uses a pre-set cut on the MVA output (SetSignalReferenceCut and SetSignalReferenceCutOrientation)
919/// for a quick determination if an event would be selected as signal or background
920
922 return GetMvaValue()*GetSignalReferenceCutOrientation() > GetSignalReferenceCut()*GetSignalReferenceCutOrientation() ? kTRUE : kFALSE;
923}
924////////////////////////////////////////////////////////////////////////////////
925/// uses a pre-set cut on the MVA output (SetSignalReferenceCut and SetSignalReferenceCutOrientation)
926/// for a quick determination if an event with this mva output value would be selected as signal or background
927
929 return mvaVal*GetSignalReferenceCutOrientation() > GetSignalReferenceCut()*GetSignalReferenceCutOrientation() ? kTRUE : kFALSE;
930}
931
932////////////////////////////////////////////////////////////////////////////////
933/// prepare tree branch with the method's discriminating variable
934
936{
937 Data()->SetCurrentType(type);
938
940 (ResultsClassification*)Data()->GetResults(GetMethodName(), type, Types::kClassification );
941
942 Long64_t nEvents = Data()->GetNEvents();
943 clRes->Resize( nEvents );
944
945 // use timer
946 Timer timer( nEvents, GetName(), kTRUE );
947
948 Log() << kHEADER << Form("[%s] : ",DataInfo().GetName())
949 << "Evaluation of " << GetMethodName() << " on "
950 << (Data()->GetCurrentType() == Types::kTraining ? "training" : "testing")
951 << " sample (" << nEvents << " events)" << Endl;
952
953 std::vector<Double_t> mvaValues = GetMvaValues(0, nEvents, true);
954
955 Log() << kINFO
956 << "Elapsed time for evaluation of " << nEvents << " events: "
957 << timer.GetElapsedTime() << " " << Endl;
958
959 // store time used for testing
961 SetTestTime(timer.ElapsedSeconds());
962
963 // load mva values and type to results object
964 for (Int_t ievt = 0; ievt < nEvents; ievt++) {
965 // note we do not need the trasformed event to get the signal/background information
966 // by calling Data()->GetEvent instead of this->GetEvent we access the untransformed one
967 auto ev = Data()->GetEvent(ievt);
968 clRes->SetValue(mvaValues[ievt], ievt, DataInfo().IsSignal(ev));
969 }
970}
971
972////////////////////////////////////////////////////////////////////////////////
973/// get all the MVA values for the events of the current Data type
975{
976
977 Long64_t nEvents = Data()->GetNEvents();
978 if (firstEvt > lastEvt || lastEvt > nEvents) lastEvt = nEvents;
979 if (firstEvt < 0) firstEvt = 0;
980 std::vector<Double_t> values(lastEvt-firstEvt);
981 // log in case of looping on all the events
982 nEvents = values.size();
983
984 // use timer
985 Timer timer( nEvents, GetName(), kTRUE );
986
987 if (logProgress)
988 Log() << kHEADER << Form("[%s] : ",DataInfo().GetName())
989 << "Evaluation of " << GetMethodName() << " on "
990 << (Data()->GetCurrentType() == Types::kTraining ? "training" : "testing")
991 << " sample (" << nEvents << " events)" << Endl;
992
993 for (Int_t ievt=firstEvt; ievt<lastEvt; ievt++) {
994 Data()->SetCurrentEvent(ievt);
995 values[ievt] = GetMvaValue();
996
997 // print progress
998 if (logProgress) {
999 Int_t modulo = Int_t(nEvents/100);
1000 if (modulo <= 0 ) modulo = 1;
1001 if (ievt%modulo == 0) timer.DrawProgressBar( ievt );
1002 }
1003 }
1004 if (logProgress) {
1005 Log() << kINFO //<<Form("Dataset[%s] : ",DataInfo().GetName())
1006 << "Elapsed time for evaluation of " << nEvents << " events: "
1007 << timer.GetElapsedTime() << " " << Endl;
1008 }
1009
1010 return values;
1011}
1012
1013////////////////////////////////////////////////////////////////////////////////
1014/// get all the MVA values for the events of the given Data type
1015// (this is used by Method Category and it does not need to be re-implemented by derived classes )
1017{
1018 fTmpData = data;
1019 auto result = GetMvaValues(firstEvt, lastEvt, logProgress);
1020 fTmpData = nullptr;
1021 return result;
1022}
1023
1024////////////////////////////////////////////////////////////////////////////////
1025/// prepare tree branch with the method's discriminating variable
1026
1028{
1029 Data()->SetCurrentType(type);
1030
1032 (ResultsClassification*)Data()->GetResults(TString("prob_")+GetMethodName(), type, Types::kClassification );
1033
1034 Long64_t nEvents = Data()->GetNEvents();
1035
1036 // use timer
1037 Timer timer( nEvents, GetName(), kTRUE );
1038
1039 Log() << kINFO <<Form("Dataset[%s] : ",DataInfo().GetName()) << "Evaluation of " << GetMethodName() << " on "
1040 << (type==Types::kTraining?"training":"testing") << " sample" << Endl;
1041
1042 mvaProb->Resize( nEvents );
1043 Int_t modulo = Int_t(nEvents/100);
1044 if (modulo <= 0 ) modulo = 1;
1045 for (Int_t ievt=0; ievt<nEvents; ievt++) {
1046
1047 Data()->SetCurrentEvent(ievt);
1048 Float_t proba = ((Float_t)GetProba( GetMvaValue(), 0.5 ));
1049 if (proba < 0) break;
1050 mvaProb->SetValue( proba, ievt, DataInfo().IsSignal( Data()->GetEvent()) );
1051
1052 // print progress
1053 if (ievt%modulo == 0) timer.DrawProgressBar( ievt );
1054 }
1055
1056 Log() << kDEBUG <<Form("Dataset[%s] : ",DataInfo().GetName())
1057 << "Elapsed time for evaluation of " << nEvents << " events: "
1058 << timer.GetElapsedTime() << " " << Endl;
1059}
1060
1061////////////////////////////////////////////////////////////////////////////////
1062/// calculate <sum-of-deviation-squared> of regression output versus "true" value from test sample
1063///
1064/// - bias = average deviation
1065/// - dev = average absolute deviation
1066/// - rms = rms of deviation
1067
1072 Double_t& corr,
1074{
1075 Types::ETreeType savedType = Data()->GetCurrentType();
1076 Data()->SetCurrentType(type);
1077
1078 bias = 0; biasT = 0; dev = 0; devT = 0; rms = 0; rmsT = 0;
1079 Double_t sumw = 0;
1080 Double_t m1 = 0, m2 = 0, s1 = 0, s2 = 0, s12 = 0; // for correlation
1081 const Int_t nevt = GetNEvents();
1082 Float_t* rV = new Float_t[nevt];
1083 Float_t* tV = new Float_t[nevt];
1084 Float_t* wV = new Float_t[nevt];
1085 Float_t xmin = 1e30, xmax = -1e30;
1086 Log() << kINFO << "Calculate regression for all events" << Endl;
1087 Timer timer( nevt, GetName(), kTRUE );
1088 Long64_t modulo = Long64_t(nevt / 100) + 1;
1089 auto output = GetAllRegressionValues();
1090 int ntargets = Data()->GetEvent(0)->GetNTargets();
1091 for (Long64_t ievt=0; ievt<nevt; ievt++) {
1092 const Event* ev = Data()->GetEvent(ievt); // NOTE: need untransformed event here !
1093 Float_t t = ev->GetTarget(0);
1094 Float_t w = ev->GetWeight();
1096 Float_t d = (r-t);
1097
1098 // find min/max
1101
1102 // store for truncated RMS computation
1103 rV[ievt] = r;
1104 tV[ievt] = t;
1105 wV[ievt] = w;
1106
1107 // compute deviation-squared
1108 sumw += w;
1109 bias += w * d;
1110 dev += w * TMath::Abs(d);
1111 rms += w * d * d;
1112
1113 // compute correlation between target and regression estimate
1114 m1 += t*w; s1 += t*t*w;
1115 m2 += r*w; s2 += r*r*w;
1116 s12 += t*r;
1117 // print progress
1118 if (ievt % modulo == 0)
1119 timer.DrawProgressBar(ievt);
1120 }
1121 timer.DrawProgressBar(nevt - 1);
1122 Log() << kINFO << "Elapsed time for evaluation of " << nevt << " events: "
1123 << timer.GetElapsedTime() << " " << Endl;
1124
1125 // standard quantities
1126 bias /= sumw;
1127 dev /= sumw;
1128 rms /= sumw;
1130
1131 // correlation
1132 m1 /= sumw;
1133 m2 /= sumw;
1134 corr = s12/sumw - m1*m2;
1135 corr /= TMath::Sqrt( (s1/sumw - m1*m1) * (s2/sumw - m2*m2) );
1136
1137 // create histogram required for computation of mutual information
1138 TH2F* hist = new TH2F( "hist", "hist", 150, xmin, xmax, 100, xmin, xmax );
1139 TH2F* histT = new TH2F( "histT", "histT", 150, xmin, xmax, 100, xmin, xmax );
1140
1141 // compute truncated RMS and fill histogram
1142 Double_t devMax = bias + 2*rms;
1143 Double_t devMin = bias - 2*rms;
1144 sumw = 0;
1145 for (Long64_t ievt=0; ievt<nevt; ievt++) {
1146 Float_t d = (rV[ievt] - tV[ievt]);
1147 hist->Fill( rV[ievt], tV[ievt], wV[ievt] );
1148 if (d >= devMin && d <= devMax) {
1149 sumw += wV[ievt];
1150 biasT += wV[ievt] * d;
1151 devT += wV[ievt] * TMath::Abs(d);
1152 rmsT += wV[ievt] * d * d;
1153 histT->Fill( rV[ievt], tV[ievt], wV[ievt] );
1154 }
1155 }
1156 biasT /= sumw;
1157 devT /= sumw;
1158 rmsT /= sumw;
1160 mInf = gTools().GetMutualInformation( *hist );
1162
1163 delete hist;
1164 delete histT;
1165
1166 delete [] rV;
1167 delete [] tV;
1168 delete [] wV;
1169
1170 Data()->SetCurrentType(savedType);
1171}
1172
1173
1174////////////////////////////////////////////////////////////////////////////////
1175/// test multiclass classification
1176
1178{
1179 ResultsMulticlass* resMulticlass = dynamic_cast<ResultsMulticlass*>(Data()->GetResults(GetMethodName(), Types::kTesting, Types::kMulticlass));
1180 if (!resMulticlass) Log() << kFATAL<<Form("Dataset[%s] : ",DataInfo().GetName())<< "unable to create pointer in TestMulticlass, exiting."<<Endl;
1181
1182 // GA evaluation of best cut for sig eff * sig pur. Slow, disabled for now.
1183 // Log() << kINFO <<Form("Dataset[%s] : ",DataInfo().GetName())<< "Determine optimal multiclass cuts for test
1184 // data..." << Endl; for (UInt_t icls = 0; icls<DataInfo().GetNClasses(); ++icls) {
1185 // resMulticlass->GetBestMultiClassCuts(icls);
1186 // }
1187
1188 // Create histograms for use in TMVA GUI
1189 TString histNamePrefix(GetTestvarName());
1192
1193 resMulticlass->CreateMulticlassHistos(histNamePrefixTest, fNbinsMVAoutput, fNbinsH);
1194 resMulticlass->CreateMulticlassPerformanceHistos(histNamePrefixTest);
1195
1196 resMulticlass->CreateMulticlassHistos(histNamePrefixTrain, fNbinsMVAoutput, fNbinsH);
1197 resMulticlass->CreateMulticlassPerformanceHistos(histNamePrefixTrain);
1198}
1199
1200
1201////////////////////////////////////////////////////////////////////////////////
1202/// initialization
1203
1205{
1206 Data()->SetCurrentType(Types::kTesting);
1207
1209 ( Data()->GetResults(GetMethodName(),Types::kTesting, Types::kClassification) );
1210
1211 // sanity checks: tree must exist, and theVar must be in tree
1212 if (0==mvaRes && !(GetMethodTypeName().Contains("Cuts"))) {
1213 Log()<<Form("Dataset[%s] : ",DataInfo().GetName()) << "mvaRes " << mvaRes << " GetMethodTypeName " << GetMethodTypeName()
1214 << " contains " << !(GetMethodTypeName().Contains("Cuts")) << Endl;
1215 Log() << kFATAL<<Form("Dataset[%s] : ",DataInfo().GetName()) << "<TestInit> Test variable " << GetTestvarName()
1216 << " not found in tree" << Endl;
1217 }
1218
1219 // basic statistics operations are made in base class
1220 gTools().ComputeStat( GetEventCollection(Types::kTesting), mvaRes->GetValueVector(),
1221 fMeanS, fMeanB, fRmsS, fRmsB, fXmin, fXmax, fSignalClass );
1222
1223 // choose reasonable histogram ranges, by removing outliers
1224 Double_t nrms = 10;
1225 fXmin = TMath::Max( TMath::Min( fMeanS - nrms*fRmsS, fMeanB - nrms*fRmsB ), fXmin );
1226 fXmax = TMath::Min( TMath::Max( fMeanS + nrms*fRmsS, fMeanB + nrms*fRmsB ), fXmax );
1227
1228 // determine cut orientation
1229 fCutOrientation = (fMeanS > fMeanB) ? kPositive : kNegative;
1230
1231 // fill 2 types of histograms for the various analyses
1232 // this one is for actual plotting
1233
1234 Double_t sxmax = fXmax+0.00001;
1235
1236 // classifier response distributions for training sample
1237 // MVA plots used for graphics representation (signal)
1239 if(IsSilentFile()) {
1240 TestvarName = TString::Format("[%s]%s",DataInfo().GetName(),GetTestvarName().Data());
1241 } else {
1242 TestvarName=GetTestvarName();
1243 }
1244 TH1* mva_s = new TH1D( TestvarName + "_S",TestvarName + "_S", fNbinsMVAoutput, fXmin, sxmax );
1245 TH1* mva_b = new TH1D( TestvarName + "_B",TestvarName + "_B", fNbinsMVAoutput, fXmin, sxmax );
1246 mvaRes->Store(mva_s, "MVA_S");
1247 mvaRes->Store(mva_b, "MVA_B");
1248 mva_s->Sumw2();
1249 mva_b->Sumw2();
1250
1251 TH1* proba_s = 0;
1252 TH1* proba_b = 0;
1253 TH1* rarity_s = 0;
1254 TH1* rarity_b = 0;
1255 if (HasMVAPdfs()) {
1256 // P(MVA) plots used for graphics representation
1257 proba_s = new TH1D( TestvarName + "_Proba_S", TestvarName + "_Proba_S", fNbinsMVAoutput, 0.0, 1.0 );
1258 proba_b = new TH1D( TestvarName + "_Proba_B", TestvarName + "_Proba_B", fNbinsMVAoutput, 0.0, 1.0 );
1259 mvaRes->Store(proba_s, "Prob_S");
1260 mvaRes->Store(proba_b, "Prob_B");
1261 proba_s->Sumw2();
1262 proba_b->Sumw2();
1263
1264 // R(MVA) plots used for graphics representation
1265 rarity_s = new TH1D( TestvarName + "_Rarity_S", TestvarName + "_Rarity_S", fNbinsMVAoutput, 0.0, 1.0 );
1266 rarity_b = new TH1D( TestvarName + "_Rarity_B", TestvarName + "_Rarity_B", fNbinsMVAoutput, 0.0, 1.0 );
1267 mvaRes->Store(rarity_s, "Rar_S");
1268 mvaRes->Store(rarity_b, "Rar_B");
1269 rarity_s->Sumw2();
1270 rarity_b->Sumw2();
1271 }
1272
1273 // MVA plots used for efficiency calculations (large number of bins)
1274 TH1* mva_eff_s = new TH1D( TestvarName + "_S_high", TestvarName + "_S_high", fNbinsH, fXmin, sxmax );
1275 TH1* mva_eff_b = new TH1D( TestvarName + "_B_high", TestvarName + "_B_high", fNbinsH, fXmin, sxmax );
1276 mvaRes->Store(mva_eff_s, "MVA_HIGHBIN_S");
1277 mvaRes->Store(mva_eff_b, "MVA_HIGHBIN_B");
1278 mva_eff_s->Sumw2();
1279 mva_eff_b->Sumw2();
1280
1281 // fill the histograms
1282
1284 (Data()->GetResults( TString("prob_")+GetMethodName(), Types::kTesting, Types::kMaxAnalysisType ) );
1285
1286 Log() << kHEADER <<Form("[%s] : ",DataInfo().GetName())<< "Loop over test events and fill histograms with classifier response..." << Endl << Endl;
1287 if (mvaProb) Log() << kINFO << "Also filling probability and rarity histograms (on request)..." << Endl;
1288 //std::vector<Bool_t>* mvaResTypes = mvaRes->GetValueVectorTypes();
1289
1290 //LM: this is needed to avoid crashes in ROOCCURVE
1291 if ( mvaRes->GetSize() != GetNEvents() ) {
1292 Log() << kFATAL << TString::Format("Inconsistent result size %lld with number of events %u ", mvaRes->GetSize() , GetNEvents() ) << Endl;
1293 assert(mvaRes->GetSize() == GetNEvents());
1294 }
1295
1296 for (Long64_t ievt=0; ievt<GetNEvents(); ievt++) {
1297
1298 const Event* ev = GetEvent(ievt);
1299 Float_t v = (*mvaRes)[ievt][0];
1300 Float_t w = ev->GetWeight();
1301
1302 if (DataInfo().IsSignal(ev)) {
1303 //mvaResTypes->push_back(kTRUE);
1304 mva_s ->Fill( v, w );
1305 if (mvaProb) {
1306 proba_s->Fill( (*mvaProb)[ievt][0], w );
1307 rarity_s->Fill( GetRarity( v ), w );
1308 }
1309
1310 mva_eff_s ->Fill( v, w );
1311 }
1312 else {
1313 //mvaResTypes->push_back(kFALSE);
1314 mva_b ->Fill( v, w );
1315 if (mvaProb) {
1316 proba_b->Fill( (*mvaProb)[ievt][0], w );
1317 rarity_b->Fill( GetRarity( v ), w );
1318 }
1319 mva_eff_b ->Fill( v, w );
1320 }
1321 }
1322
1323 // uncomment those (and several others if you want unnormalized output
1324 gTools().NormHist( mva_s );
1325 gTools().NormHist( mva_b );
1326 gTools().NormHist( proba_s );
1327 gTools().NormHist( proba_b );
1332
1333 // create PDFs from histograms, using default splines, and no additional smoothing
1334 if (fSplS) { delete fSplS; fSplS = 0; }
1335 if (fSplB) { delete fSplB; fSplB = 0; }
1336 fSplS = new PDF( TString(GetName()) + " PDF Sig", mva_s, PDF::kSpline2 );
1337 fSplB = new PDF( TString(GetName()) + " PDF Bkg", mva_b, PDF::kSpline2 );
1338}
1339
1340////////////////////////////////////////////////////////////////////////////////
1341/// general method used in writing the header of the weight files where
1342/// the used variables, variable transformation type etc. is specified
1343
1344void TMVA::MethodBase::WriteStateToStream( std::ostream& tf ) const
1345{
1346 TString prefix = "";
1348
1349 tf << prefix << "#GEN -*-*-*-*-*-*-*-*-*-*-*- general info -*-*-*-*-*-*-*-*-*-*-*-" << std::endl << prefix << std::endl;
1350 tf << prefix << "Method : " << GetMethodTypeName() << "::" << GetMethodName() << std::endl;
1351 tf.setf(std::ios::left);
1352 tf << prefix << "TMVA Release : " << std::setw(10) << GetTrainingTMVAVersionString() << " ["
1353 << GetTrainingTMVAVersionCode() << "]" << std::endl;
1354 tf << prefix << "ROOT Release : " << std::setw(10) << GetTrainingROOTVersionString() << " ["
1355 << GetTrainingROOTVersionCode() << "]" << std::endl;
1356 tf << prefix << "Creator : " << userInfo->fUser << std::endl;
1357 tf << prefix << "Date : "; TDatime *d = new TDatime; tf << d->AsString() << std::endl; delete d;
1358 tf << prefix << "Host : " << gSystem->GetBuildNode() << std::endl;
1359 tf << prefix << "Dir : " << gSystem->WorkingDirectory() << std::endl;
1360 tf << prefix << "Training events: " << Data()->GetNTrainingEvents() << std::endl;
1361
1362 TString analysisType(((const_cast<TMVA::MethodBase*>(this)->GetAnalysisType()==Types::kRegression) ? "Regression" : "Classification"));
1363
1364 tf << prefix << "Analysis type : " << "[" << ((GetAnalysisType()==Types::kRegression) ? "Regression" : "Classification") << "]" << std::endl;
1365 tf << prefix << std::endl;
1366
1367 delete userInfo;
1368
1369 // First write all options
1370 tf << prefix << std::endl << prefix << "#OPT -*-*-*-*-*-*-*-*-*-*-*-*- options -*-*-*-*-*-*-*-*-*-*-*-*-" << std::endl << prefix << std::endl;
1371 WriteOptionsToStream( tf, prefix );
1372 tf << prefix << std::endl;
1373
1374 // Second write variable info
1375 tf << prefix << std::endl << prefix << "#VAR -*-*-*-*-*-*-*-*-*-*-*-* variables *-*-*-*-*-*-*-*-*-*-*-*-" << std::endl << prefix << std::endl;
1376 WriteVarsToStream( tf, prefix );
1377 tf << prefix << std::endl;
1378}
1379
1380////////////////////////////////////////////////////////////////////////////////
1381/// xml writing
1382
1383void TMVA::MethodBase::AddInfoItem( void* gi, const TString& name, const TString& value) const
1384{
1385 void* it = gTools().AddChild(gi,"Info");
1386 gTools().AddAttr(it,"name", name);
1387 gTools().AddAttr(it,"value", value);
1388}
1389
1390////////////////////////////////////////////////////////////////////////////////
1391
1393 if (analysisType == Types::kRegression) {
1394 AddRegressionOutput( type );
1395 } else if (analysisType == Types::kMulticlass) {
1396 AddMulticlassOutput( type );
1397 } else {
1398 AddClassifierOutput( type );
1399 if (HasMVAPdfs())
1400 AddClassifierOutputProb( type );
1401 }
1402}
1403
1404////////////////////////////////////////////////////////////////////////////////
1405/// general method used in writing the header of the weight files where
1406/// the used variables, variable transformation type etc. is specified
1407
1408void TMVA::MethodBase::WriteStateToXML( void* parent ) const
1409{
1410 if (!parent) return;
1411
1413
1414 void* gi = gTools().AddChild(parent, "GeneralInfo");
1415 AddInfoItem( gi, "TMVA Release", GetTrainingTMVAVersionString() + " [" + gTools().StringFromInt(GetTrainingTMVAVersionCode()) + "]" );
1416 AddInfoItem( gi, "ROOT Release", GetTrainingROOTVersionString() + " [" + gTools().StringFromInt(GetTrainingROOTVersionCode()) + "]");
1417 AddInfoItem( gi, "Creator", userInfo->fUser);
1418 TDatime dt; AddInfoItem( gi, "Date", dt.AsString());
1419 AddInfoItem( gi, "Host", gSystem->GetBuildNode() );
1420 AddInfoItem( gi, "Dir", gSystem->WorkingDirectory());
1421 AddInfoItem( gi, "Training events", gTools().StringFromInt(Data()->GetNTrainingEvents()));
1422 AddInfoItem( gi, "TrainingTime", gTools().StringFromDouble(const_cast<TMVA::MethodBase*>(this)->GetTrainTime()));
1423
1424 Types::EAnalysisType aType = const_cast<TMVA::MethodBase*>(this)->GetAnalysisType();
1425 TString analysisType((aType==Types::kRegression) ? "Regression" :
1426 (aType==Types::kMulticlass ? "Multiclass" : "Classification"));
1427 AddInfoItem( gi, "AnalysisType", analysisType );
1428 delete userInfo;
1429
1430 // write options
1431 AddOptionsXMLTo( parent );
1432
1433 // write variable info
1434 AddVarsXMLTo( parent );
1435
1436 // write spectator info
1437 if (fModelPersistence)
1438 AddSpectatorsXMLTo( parent );
1439
1440 // write class info if in multiclass mode
1441 AddClassesXMLTo(parent);
1442
1443 // write target info if in regression mode
1444 if (DoRegression()) AddTargetsXMLTo(parent);
1445
1446 // write transformations
1447 GetTransformationHandler(false).AddXMLTo( parent );
1448
1449 // write MVA variable distributions
1450 void* pdfs = gTools().AddChild(parent, "MVAPdfs");
1451 if (fMVAPdfS) fMVAPdfS->AddXMLTo(pdfs);
1452 if (fMVAPdfB) fMVAPdfB->AddXMLTo(pdfs);
1453
1454 // write weights
1455 AddWeightsXMLTo( parent );
1456}
1457
1458////////////////////////////////////////////////////////////////////////////////
1459/// write reference MVA distributions (and other information)
1460/// to a ROOT type weight file
1461
1463{
1465 TH1::AddDirectory( 0 ); // this avoids the binding of the hists in PDF to the current ROOT file
1466 fMVAPdfS = (TMVA::PDF*)rf.Get( "MVA_PDF_Signal" );
1467 fMVAPdfB = (TMVA::PDF*)rf.Get( "MVA_PDF_Background" );
1468
1470
1471 ReadWeightsFromStream( rf );
1472
1473 SetTestvarName();
1474}
1475
1476////////////////////////////////////////////////////////////////////////////////
1477/// write options and weights to file
1478/// note that each one text file for the main configuration information
1479/// and one ROOT file for ROOT objects are created
1480
1482{
1483 // ---- create the text file
1484 TString tfname( GetWeightFileName() );
1485
1486 // writing xml file
1487 TString xmlfname( tfname ); xmlfname.ReplaceAll( ".txt", ".xml" );
1488 Log() << kINFO //<<Form("Dataset[%s] : ",DataInfo().GetName())
1489 << "Creating xml weight file: "
1490 << gTools().Color("lightblue") << xmlfname << gTools().Color("reset") << Endl;
1491 void* doc = gTools().xmlengine().NewDoc();
1492 void* rootnode = gTools().AddChild(0,"MethodSetup", "", true);
1493 gTools().xmlengine().DocSetRootElement(doc,rootnode);
1494 gTools().AddAttr(rootnode,"Method", GetMethodTypeName() + "::" + GetMethodName());
1495 WriteStateToXML(rootnode);
1498}
1499
1500////////////////////////////////////////////////////////////////////////////////
1501/// Function to write options and weights to file
1502
1504{
1505 // get the filename
1506
1507 TString tfname(GetWeightFileName());
1508
1509 Log() << kINFO //<<Form("Dataset[%s] : ",DataInfo().GetName())
1510 << "Reading weight file: "
1511 << gTools().Color("lightblue") << tfname << gTools().Color("reset") << Endl;
1512
1513 if (tfname.EndsWith(".xml") ) {
1514 void* doc = gTools().xmlengine().ParseFile(tfname,gTools().xmlenginebuffersize()); // the default buffer size in TXMLEngine::ParseFile is 100k. Starting with ROOT 5.29 one can set the buffer size, see: http://savannah.cern.ch/bugs/?78864. This might be necessary for large XML files
1515 if (!doc) {
1516 Log() << kFATAL << "Error parsing XML file " << tfname << Endl;
1517 }
1518 void* rootnode = gTools().xmlengine().DocGetRootElement(doc); // node "MethodSetup"
1519 ReadStateFromXML(rootnode);
1521 }
1522 else {
1523 std::filebuf fb;
1524 fb.open(tfname.Data(),std::ios::in);
1525 if (!fb.is_open()) { // file not found --> Error
1526 Log() << kFATAL <<Form("Dataset[%s] : ",DataInfo().GetName())<< "<ReadStateFromFile> "
1527 << "Unable to open input weight file: " << tfname << Endl;
1528 }
1529 std::istream fin(&fb);
1530 ReadStateFromStream(fin);
1531 fb.close();
1532 }
1533 if (!fTxtWeightsOnly) {
1534 // ---- read the ROOT file
1535 TString rfname( tfname ); rfname.ReplaceAll( ".txt", ".root" );
1536 Log() << kINFO <<Form("Dataset[%s] : ",DataInfo().GetName())<< "Reading root weight file: "
1537 << gTools().Color("lightblue") << rfname << gTools().Color("reset") << Endl;
1538 TFile* rfile = TFile::Open( rfname, "READ" );
1539 ReadStateFromStream( *rfile );
1540 rfile->Close();
1541 }
1542}
1543////////////////////////////////////////////////////////////////////////////////
1544/// for reading from memory
1545
1547 void* doc = gTools().xmlengine().ParseString(xmlstr);
1548 void* rootnode = gTools().xmlengine().DocGetRootElement(doc); // node "MethodSetup"
1549 ReadStateFromXML(rootnode);
1551
1552 return;
1553}
1554
1555////////////////////////////////////////////////////////////////////////////////
1556
1558{
1559
1561 gTools().ReadAttr( methodNode, "Method", fullMethodName );
1562
1563 fMethodName = fullMethodName(fullMethodName.Index("::")+2,fullMethodName.Length());
1564
1565 // update logger
1566 Log().SetSource( GetName() );
1567 Log() << kDEBUG//<<Form("Dataset[%s] : ",DataInfo().GetName())
1568 << "Read method \"" << GetMethodName() << "\" of type \"" << GetMethodTypeName() << "\"" << Endl;
1569
1570 // after the method name is read, the testvar can be set
1571 SetTestvarName();
1572
1573 TString nodeName("");
1574 void* ch = gTools().GetChild(methodNode);
1575 while (ch!=0) {
1576 nodeName = TString( gTools().GetName(ch) );
1577
1578 if (nodeName=="GeneralInfo") {
1579 // read analysis type
1580
1581 TString name(""),val("");
1582 void* antypeNode = gTools().GetChild(ch);
1583 while (antypeNode) {
1584 gTools().ReadAttr( antypeNode, "name", name );
1585
1586 if (name == "TrainingTime")
1587 gTools().ReadAttr( antypeNode, "value", fTrainTime );
1588
1589 if (name == "AnalysisType") {
1590 gTools().ReadAttr( antypeNode, "value", val );
1591 val.ToLower();
1592 if (val == "regression" ) SetAnalysisType( Types::kRegression );
1593 else if (val == "classification" ) SetAnalysisType( Types::kClassification );
1594 else if (val == "multiclass" ) SetAnalysisType( Types::kMulticlass );
1595 else Log() << kFATAL <<Form("Dataset[%s] : ",DataInfo().GetName())<< "Analysis type " << val << " is not known." << Endl;
1596 }
1597
1598 if (name == "TMVA Release" || name == "TMVA") {
1599 TString s;
1600 gTools().ReadAttr( antypeNode, "value", s);
1601 fTMVATrainingVersion = TString(s(s.Index("[")+1,s.Index("]")-s.Index("[")-1)).Atoi();
1602 Log() << kDEBUG <<Form("[%s] : ",DataInfo().GetName()) << "MVA method was trained with TMVA Version: " << GetTrainingTMVAVersionString() << Endl;
1603 }
1604
1605 if (name == "ROOT Release" || name == "ROOT") {
1606 TString s;
1607 gTools().ReadAttr( antypeNode, "value", s);
1608 fROOTTrainingVersion = TString(s(s.Index("[")+1,s.Index("]")-s.Index("[")-1)).Atoi();
1609 Log() << kDEBUG //<<Form("Dataset[%s] : ",DataInfo().GetName())
1610 << "MVA method was trained with ROOT Version: " << GetTrainingROOTVersionString() << Endl;
1611 }
1613 }
1614 }
1615 else if (nodeName=="Options") {
1616 ReadOptionsFromXML(ch);
1617 ParseOptions();
1618
1619 }
1620 else if (nodeName=="Variables") {
1621 ReadVariablesFromXML(ch);
1622 }
1623 else if (nodeName=="Spectators") {
1624 ReadSpectatorsFromXML(ch);
1625 }
1626 else if (nodeName=="Classes") {
1627 if (DataInfo().GetNClasses()==0) ReadClassesFromXML(ch);
1628 }
1629 else if (nodeName=="Targets") {
1630 if (DataInfo().GetNTargets()==0 && DoRegression()) ReadTargetsFromXML(ch);
1631 }
1632 else if (nodeName=="Transformations") {
1633 GetTransformationHandler().ReadFromXML(ch);
1634 }
1635 else if (nodeName=="MVAPdfs") {
1637 if (fMVAPdfS) { delete fMVAPdfS; fMVAPdfS=0; }
1638 if (fMVAPdfB) { delete fMVAPdfB; fMVAPdfB=0; }
1639 void* pdfnode = gTools().GetChild(ch);
1640 if (pdfnode) {
1641 gTools().ReadAttr(pdfnode, "Name", pdfname);
1642 fMVAPdfS = new PDF(pdfname);
1643 fMVAPdfS->ReadXML(pdfnode);
1645 gTools().ReadAttr(pdfnode, "Name", pdfname);
1646 fMVAPdfB = new PDF(pdfname);
1647 fMVAPdfB->ReadXML(pdfnode);
1648 }
1649 }
1650 else if (nodeName=="Weights") {
1651 ReadWeightsFromXML(ch);
1652 }
1653 else {
1654 Log() << kWARNING <<Form("Dataset[%s] : ",DataInfo().GetName())<< "Unparsed XML node: '" << nodeName << "'" << Endl;
1655 }
1656 ch = gTools().GetNextChild(ch);
1657
1658 }
1659
1660 // update transformation handler
1661 if (GetTransformationHandler().GetCallerName() == "") GetTransformationHandler().SetCallerName( GetName() );
1662}
1663
1664////////////////////////////////////////////////////////////////////////////////
1665/// read the header from the weight files of the different MVA methods
1666
1668{
1669 char buf[512];
1670
1671 // when reading from stream, we assume the files are produced with TMVA<=397
1672 SetAnalysisType(Types::kClassification);
1673
1674
1675 // first read the method name
1676 GetLine(fin,buf);
1677 while (!TString(buf).BeginsWith("Method")) GetLine(fin,buf);
1678 TString namestr(buf);
1679
1680 TString methodType = namestr(0,namestr.Index("::"));
1681 methodType = methodType(methodType.Last(' '),methodType.Length());
1683
1684 TString methodName = namestr(namestr.Index("::")+2,namestr.Length());
1685 methodName = methodName.Strip(TString::kLeading);
1686 if (methodName == "") methodName = methodType;
1687 fMethodName = methodName;
1688
1689 Log() << kINFO <<Form("Dataset[%s] : ",DataInfo().GetName())<< "Read method \"" << GetMethodName() << "\" of type \"" << GetMethodTypeName() << "\"" << Endl;
1690
1691 // update logger
1692 Log().SetSource( GetName() );
1693
1694 // now the question is whether to read the variables first or the options (well, of course the order
1695 // of writing them needs to agree)
1696 //
1697 // the option "Decorrelation" is needed to decide if the variables we
1698 // read are decorrelated or not
1699 //
1700 // the variables are needed by some methods (TMLP) to build the NN
1701 // which is done in ProcessOptions so for the time being we first Read and Parse the options then
1702 // we read the variables, and then we process the options
1703
1704 // now read all options
1705 GetLine(fin,buf);
1706 while (!TString(buf).BeginsWith("#OPT")) GetLine(fin,buf);
1707 ReadOptionsFromStream(fin);
1708 ParseOptions();
1709
1710 // Now read variable info
1711 fin.getline(buf,512);
1712 while (!TString(buf).BeginsWith("#VAR")) fin.getline(buf,512);
1713 ReadVarsFromStream(fin);
1714
1715 // now we process the options (of the derived class)
1716 ProcessOptions();
1717
1718 if (IsNormalised()) {
1720 GetTransformationHandler().AddTransformation( new VariableNormalizeTransform(DataInfo()), -1 );
1721 norm->BuildTransformationFromVarInfo( DataInfo().GetVariableInfos() );
1722 }
1724 if ( fVarTransformString == "None") {
1725 if (fUseDecorr)
1726 varTrafo = GetTransformationHandler().AddTransformation( new VariableDecorrTransform(DataInfo()), -1 );
1727 } else if ( fVarTransformString == "Decorrelate" ) {
1728 varTrafo = GetTransformationHandler().AddTransformation( new VariableDecorrTransform(DataInfo()), -1 );
1729 } else if ( fVarTransformString == "PCA" ) {
1730 varTrafo = GetTransformationHandler().AddTransformation( new VariablePCATransform(DataInfo()), -1 );
1731 } else if ( fVarTransformString == "Uniform" ) {
1732 varTrafo = GetTransformationHandler().AddTransformation( new VariableGaussTransform(DataInfo(),"Uniform"), -1 );
1733 } else if ( fVarTransformString == "Gauss" ) {
1734 varTrafo = GetTransformationHandler().AddTransformation( new VariableGaussTransform(DataInfo()), -1 );
1735 } else if ( fVarTransformString == "GaussDecorr" ) {
1736 varTrafo = GetTransformationHandler().AddTransformation( new VariableGaussTransform(DataInfo()), -1 );
1737 varTrafo2 = GetTransformationHandler().AddTransformation( new VariableDecorrTransform(DataInfo()), -1 );
1738 } else {
1739 Log() << kFATAL <<Form("Dataset[%s] : ",DataInfo().GetName())<< "<ProcessOptions> Variable transform '"
1740 << fVarTransformString << "' unknown." << Endl;
1741 }
1742 // Now read decorrelation matrix if available
1743 if (GetTransformationHandler().GetTransformationList().GetSize() > 0) {
1744 fin.getline(buf,512);
1745 while (!TString(buf).BeginsWith("#MAT")) fin.getline(buf,512);
1746 if (varTrafo) {
1747 TString trafo(fVariableTransformTypeString); trafo.ToLower();
1748 varTrafo->ReadTransformationFromStream(fin, trafo );
1749 }
1750 if (varTrafo2) {
1751 TString trafo(fVariableTransformTypeString); trafo.ToLower();
1752 varTrafo2->ReadTransformationFromStream(fin, trafo );
1753 }
1754 }
1755
1756
1757 if (HasMVAPdfs()) {
1758 // Now read the MVA PDFs
1759 fin.getline(buf,512);
1760 while (!TString(buf).BeginsWith("#MVAPDFS")) fin.getline(buf,512);
1761 if (fMVAPdfS != 0) { delete fMVAPdfS; fMVAPdfS = 0; }
1762 if (fMVAPdfB != 0) { delete fMVAPdfB; fMVAPdfB = 0; }
1763 fMVAPdfS = new PDF(TString(GetName()) + " MVA PDF Sig");
1764 fMVAPdfB = new PDF(TString(GetName()) + " MVA PDF Bkg");
1765 fMVAPdfS->SetReadingVersion( GetTrainingTMVAVersionCode() );
1766 fMVAPdfB->SetReadingVersion( GetTrainingTMVAVersionCode() );
1767
1768 fin >> *fMVAPdfS;
1769 fin >> *fMVAPdfB;
1770 }
1771
1772 // Now read weights
1773 fin.getline(buf,512);
1774 while (!TString(buf).BeginsWith("#WGT")) fin.getline(buf,512);
1775 fin.getline(buf,512);
1776 ReadWeightsFromStream( fin );
1777
1778 // update transformation handler
1779 if (GetTransformationHandler().GetCallerName() == "") GetTransformationHandler().SetCallerName( GetName() );
1780
1781}
1782
1783////////////////////////////////////////////////////////////////////////////////
1784/// write the list of variables (name, min, max) for a given data
1785/// transformation method to the stream
1786
1787void TMVA::MethodBase::WriteVarsToStream( std::ostream& o, const TString& prefix ) const
1788{
1789 o << prefix << "NVar " << DataInfo().GetNVariables() << std::endl;
1790 std::vector<VariableInfo>::const_iterator varIt = DataInfo().GetVariableInfos().begin();
1791 for (; varIt!=DataInfo().GetVariableInfos().end(); ++varIt) { o << prefix; varIt->WriteToStream(o); }
1792 o << prefix << "NSpec " << DataInfo().GetNSpectators() << std::endl;
1793 varIt = DataInfo().GetSpectatorInfos().begin();
1794 for (; varIt!=DataInfo().GetSpectatorInfos().end(); ++varIt) { o << prefix; varIt->WriteToStream(o); }
1795}
1796
1797////////////////////////////////////////////////////////////////////////////////
1798/// Read the variables (name, min, max) for a given data
1799/// transformation method from the stream. In the stream we only
1800/// expect the limits which will be set
1801
1803{
1804 TString dummy;
1806 istr >> dummy >> readNVar;
1807
1808 if (readNVar!=DataInfo().GetNVariables()) {
1809 Log() << kFATAL <<Form("Dataset[%s] : ",DataInfo().GetName())<< "You declared "<< DataInfo().GetNVariables() << " variables in the Reader"
1810 << " while there are " << readNVar << " variables declared in the file"
1811 << Endl;
1812 }
1813
1814 // we want to make sure all variables are read in the order they are defined
1816 std::vector<VariableInfo>::iterator varIt = DataInfo().GetVariableInfos().begin();
1817 int varIdx = 0;
1818 for (; varIt!=DataInfo().GetVariableInfos().end(); ++varIt, ++varIdx) {
1819 varInfo.ReadFromStream(istr);
1820 if (varIt->GetExpression() == varInfo.GetExpression()) {
1821 varInfo.SetExternalLink((*varIt).GetExternalLink());
1822 (*varIt) = varInfo;
1823 }
1824 else {
1825 Log() << kINFO <<Form("Dataset[%s] : ",DataInfo().GetName())<< "ERROR in <ReadVarsFromStream>" << Endl;
1826 Log() << kINFO << "The definition (or the order) of the variables found in the input file is" << Endl;
1827 Log() << kINFO << "is not the same as the one declared in the Reader (which is necessary for" << Endl;
1828 Log() << kINFO << "the correct working of the method):" << Endl;
1829 Log() << kINFO << " var #" << varIdx <<" declared in Reader: " << varIt->GetExpression() << Endl;
1830 Log() << kINFO << " var #" << varIdx <<" declared in file : " << varInfo.GetExpression() << Endl;
1831 Log() << kFATAL << "The expression declared to the Reader needs to be checked (name or order are wrong)" << Endl;
1832 }
1833 }
1834}
1835
1836////////////////////////////////////////////////////////////////////////////////
1837/// write variable info to XML
1838
1839void TMVA::MethodBase::AddVarsXMLTo( void* parent ) const
1840{
1841 void* vars = gTools().AddChild(parent, "Variables");
1842 gTools().AddAttr( vars, "NVar", gTools().StringFromInt(DataInfo().GetNVariables()) );
1843
1844 for (UInt_t idx=0; idx<DataInfo().GetVariableInfos().size(); idx++) {
1845 VariableInfo& vi = DataInfo().GetVariableInfos()[idx];
1846 void* var = gTools().AddChild( vars, "Variable" );
1847 gTools().AddAttr( var, "VarIndex", idx );
1848 vi.AddToXML( var );
1849 }
1850}
1851
1852////////////////////////////////////////////////////////////////////////////////
1853/// write spectator info to XML
1854
1856{
1857 void* specs = gTools().AddChild(parent, "Spectators");
1858
1859 UInt_t writeIdx=0;
1860 for (UInt_t idx=0; idx<DataInfo().GetSpectatorInfos().size(); idx++) {
1861
1862 VariableInfo& vi = DataInfo().GetSpectatorInfos()[idx];
1863
1864 // we do not want to write spectators that are category-cuts,
1865 // except if the method is the category method and the spectators belong to it
1866 if (vi.GetVarType()=='C') continue;
1867
1868 void* spec = gTools().AddChild( specs, "Spectator" );
1869 gTools().AddAttr( spec, "SpecIndex", writeIdx++ );
1870 vi.AddToXML( spec );
1871 }
1872 gTools().AddAttr( specs, "NSpec", gTools().StringFromInt(writeIdx) );
1873}
1874
1875////////////////////////////////////////////////////////////////////////////////
1876/// write class info to XML
1877
1878void TMVA::MethodBase::AddClassesXMLTo( void* parent ) const
1879{
1880 UInt_t nClasses=DataInfo().GetNClasses();
1881
1882 void* classes = gTools().AddChild(parent, "Classes");
1883 gTools().AddAttr( classes, "NClass", nClasses );
1884
1885 for (UInt_t iCls=0; iCls<nClasses; ++iCls) {
1886 ClassInfo *classInfo=DataInfo().GetClassInfo (iCls);
1887 TString className =classInfo->GetName();
1888 UInt_t classNumber=classInfo->GetNumber();
1889
1890 void* classNode=gTools().AddChild(classes, "Class");
1891 gTools().AddAttr( classNode, "Name", className );
1892 gTools().AddAttr( classNode, "Index", classNumber );
1893 }
1894}
1895////////////////////////////////////////////////////////////////////////////////
1896/// write target info to XML
1897
1898void TMVA::MethodBase::AddTargetsXMLTo( void* parent ) const
1899{
1900 void* targets = gTools().AddChild(parent, "Targets");
1901 gTools().AddAttr( targets, "NTrgt", gTools().StringFromInt(DataInfo().GetNTargets()) );
1902
1903 for (UInt_t idx=0; idx<DataInfo().GetTargetInfos().size(); idx++) {
1904 VariableInfo& vi = DataInfo().GetTargetInfos()[idx];
1905 void* tar = gTools().AddChild( targets, "Target" );
1906 gTools().AddAttr( tar, "TargetIndex", idx );
1907 vi.AddToXML( tar );
1908 }
1909}
1910
1911////////////////////////////////////////////////////////////////////////////////
1912/// read variable info from XML
1913
1915{
1917 gTools().ReadAttr( varnode, "NVar", readNVar);
1918
1919 if (readNVar!=DataInfo().GetNVariables()) {
1920 Log() << kFATAL <<Form("Dataset[%s] : ",DataInfo().GetName())<< "You declared "<< DataInfo().GetNVariables() << " variables in the Reader"
1921 << " while there are " << readNVar << " variables declared in the file"
1922 << Endl;
1923 }
1924
1925 // we want to make sure all variables are read in the order they are defined
1927 int varIdx = 0;
1928 void* ch = gTools().GetChild(varnode);
1929 while (ch) {
1930 gTools().ReadAttr( ch, "VarIndex", varIdx);
1931 existingVarInfo = DataInfo().GetVariableInfos()[varIdx];
1932 readVarInfo.ReadFromXML(ch);
1933
1934 if (existingVarInfo.GetExpression() == readVarInfo.GetExpression()) {
1935 readVarInfo.SetExternalLink(existingVarInfo.GetExternalLink());
1937 }
1938 else {
1939 Log() << kINFO <<Form("Dataset[%s] : ",DataInfo().GetName())<< "ERROR in <ReadVariablesFromXML>" << Endl;
1940 Log() << kINFO << "The definition (or the order) of the variables found in the input file is" << Endl;
1941 Log() << kINFO << "not the same as the one declared in the Reader (which is necessary for the" << Endl;
1942 Log() << kINFO << "correct working of the method):" << Endl;
1943 Log() << kINFO << " var #" << varIdx <<" declared in Reader: " << existingVarInfo.GetExpression() << Endl;
1944 Log() << kINFO << " var #" << varIdx <<" declared in file : " << readVarInfo.GetExpression() << Endl;
1945 Log() << kFATAL << "The expression declared to the Reader needs to be checked (name or order are wrong)" << Endl;
1946 }
1947 ch = gTools().GetNextChild(ch);
1948 }
1949}
1950
1951////////////////////////////////////////////////////////////////////////////////
1952/// read spectator info from XML
1953
1955{
1957 gTools().ReadAttr( specnode, "NSpec", readNSpec);
1958
1959 if (readNSpec!=DataInfo().GetNSpectators(kFALSE)) {
1960 Log() << kFATAL<<Form("Dataset[%s] : ",DataInfo().GetName()) << "You declared "<< DataInfo().GetNSpectators(kFALSE) << " spectators in the Reader"
1961 << " while there are " << readNSpec << " spectators declared in the file"
1962 << Endl;
1963 }
1964
1965 // we want to make sure all variables are read in the order they are defined
1967 int specIdx = 0;
1968 void* ch = gTools().GetChild(specnode);
1969 while (ch) {
1970 gTools().ReadAttr( ch, "SpecIndex", specIdx);
1971 existingSpecInfo = DataInfo().GetSpectatorInfos()[specIdx];
1972 readSpecInfo.ReadFromXML(ch);
1973
1974 if (existingSpecInfo.GetExpression() == readSpecInfo.GetExpression()) {
1975 readSpecInfo.SetExternalLink(existingSpecInfo.GetExternalLink());
1977 }
1978 else {
1979 Log() << kINFO <<Form("Dataset[%s] : ",DataInfo().GetName())<< "ERROR in <ReadSpectatorsFromXML>" << Endl;
1980 Log() << kINFO << "The definition (or the order) of the spectators found in the input file is" << Endl;
1981 Log() << kINFO << "not the same as the one declared in the Reader (which is necessary for the" << Endl;
1982 Log() << kINFO << "correct working of the method):" << Endl;
1983 Log() << kINFO << " spec #" << specIdx <<" declared in Reader: " << existingSpecInfo.GetExpression() << Endl;
1984 Log() << kINFO << " spec #" << specIdx <<" declared in file : " << readSpecInfo.GetExpression() << Endl;
1985 Log() << kFATAL << "The expression declared to the Reader needs to be checked (name or order are wrong)" << Endl;
1986 }
1987 ch = gTools().GetNextChild(ch);
1988 }
1989}
1990
1991////////////////////////////////////////////////////////////////////////////////
1992/// read number of classes from XML
1993
1995{
1997 // coverity[tainted_data_argument]
1998 gTools().ReadAttr( clsnode, "NClass", readNCls);
1999
2000 TString className="";
2002 void* ch = gTools().GetChild(clsnode);
2003 if (!ch) {
2004 for (UInt_t icls = 0; icls<readNCls;++icls) {
2005 TString classname = TString::Format("class%i",icls);
2006 DataInfo().AddClass(classname);
2007
2008 }
2009 }
2010 else{
2011 while (ch) {
2012 gTools().ReadAttr( ch, "Index", classIndex);
2013 gTools().ReadAttr( ch, "Name", className );
2014 DataInfo().AddClass(className);
2015
2016 ch = gTools().GetNextChild(ch);
2017 }
2018 }
2019
2020 // retrieve signal and background class index
2021 if (DataInfo().GetClassInfo("Signal") != 0) {
2022 fSignalClass = DataInfo().GetClassInfo("Signal")->GetNumber();
2023 }
2024 else
2025 fSignalClass=0;
2026 if (DataInfo().GetClassInfo("Background") != 0) {
2027 fBackgroundClass = DataInfo().GetClassInfo("Background")->GetNumber();
2028 }
2029 else
2030 fBackgroundClass=1;
2031}
2032
2033////////////////////////////////////////////////////////////////////////////////
2034/// read target info from XML
2035
2037{
2039 gTools().ReadAttr( tarnode, "NTrgt", readNTar);
2040
2041 int tarIdx = 0;
2042 TString expression;
2043 void* ch = gTools().GetChild(tarnode);
2044 while (ch) {
2045 gTools().ReadAttr( ch, "TargetIndex", tarIdx);
2046 gTools().ReadAttr( ch, "Expression", expression);
2047 DataInfo().AddTarget(expression,"","",0,0);
2048
2049 ch = gTools().GetNextChild(ch);
2050 }
2051}
2052
2053////////////////////////////////////////////////////////////////////////////////
2054/// returns the ROOT directory where info/histograms etc of the
2055/// corresponding MVA method instance are stored
2056
2058{
2059 if (fBaseDir != 0) return fBaseDir;
2060 Log()<<kDEBUG<<Form("Dataset[%s] : ",DataInfo().GetName())<<" Base Directory for " << GetMethodName() << " not set yet --> check if already there.." <<Endl;
2061
2062 if (IsSilentFile()) {
2063 Log() << kFATAL << Form("Dataset[%s] : ", DataInfo().GetName())
2064 << "MethodBase::BaseDir() - No directory exists when running a Method without output file. Enable the "
2065 "output when creating the factory"
2066 << Endl;
2067 }
2068
2069 TDirectory* methodDir = MethodBaseDir();
2070 if (methodDir==0)
2071 Log() << kFATAL <<Form("Dataset[%s] : ",DataInfo().GetName())<< "MethodBase::BaseDir() - MethodBaseDir() return a NULL pointer!" << Endl;
2072
2073 TString defaultDir = GetMethodName();
2074 TDirectory *sdir = methodDir->GetDirectory(defaultDir.Data());
2075 if(!sdir)
2076 {
2077 Log()<<kDEBUG<<Form("Dataset[%s] : ",DataInfo().GetName())<<" Base Directory for " << GetMethodTypeName() << " does not exist yet--> created it" <<Endl;
2078 sdir = methodDir->mkdir(defaultDir);
2079 sdir->cd();
2080 // write weight file name into target file
2081 if (fModelPersistence) {
2083 TObjString wfileName( GetWeightFileName() );
2084 wfilePath.Write( "TrainingPath" );
2085 wfileName.Write( "WeightFileName" );
2086 }
2087 }
2088
2089 Log()<<kDEBUG<<Form("Dataset[%s] : ",DataInfo().GetName())<<" Base Directory for " << GetMethodTypeName() << " existed, return it.." <<Endl;
2090 return sdir;
2091}
2092
2093////////////////////////////////////////////////////////////////////////////////
2094/// returns the ROOT directory where all instances of the
2095/// corresponding MVA method are stored
2096
2098{
2099 if (fMethodBaseDir != 0) {
2100 return fMethodBaseDir;
2101 }
2102
2103 const char *datasetName = DataInfo().GetName();
2104
2105 Log() << kDEBUG << Form("Dataset[%s] : ", datasetName) << " Base Directory for " << GetMethodTypeName()
2106 << " not set yet --> check if already there.." << Endl;
2107
2108 TDirectory *factoryBaseDir = GetFile();
2109 if (!factoryBaseDir) return nullptr;
2110 fMethodBaseDir = factoryBaseDir->GetDirectory(datasetName);
2111 if (!fMethodBaseDir) {
2112 fMethodBaseDir = factoryBaseDir->mkdir(datasetName, TString::Format("Base directory for dataset %s", datasetName).Data());
2113 if (!fMethodBaseDir) {
2114 Log() << kFATAL << "Can not create dir " << datasetName;
2115 }
2116 }
2117 TString methodTypeDir = TString::Format("Method_%s", GetMethodTypeName().Data());
2118 fMethodBaseDir = fMethodBaseDir->GetDirectory(methodTypeDir.Data());
2119
2120 if (!fMethodBaseDir) {
2122 TString methodTypeDirHelpStr = TString::Format("Directory for all %s methods", GetMethodTypeName().Data());
2123 fMethodBaseDir = datasetDir->mkdir(methodTypeDir.Data(), methodTypeDirHelpStr);
2124 Log() << kDEBUG << Form("Dataset[%s] : ", datasetName) << " Base Directory for " << GetMethodName()
2125 << " does not exist yet--> created it" << Endl;
2126 }
2127
2128 Log() << kDEBUG << Form("Dataset[%s] : ", datasetName)
2129 << "Return from MethodBaseDir() after creating base directory " << Endl;
2130 return fMethodBaseDir;
2131}
2132
2133////////////////////////////////////////////////////////////////////////////////
2134/// set directory of weight file
2135
2137{
2138 fFileDir = fileDir;
2139 gSystem->mkdir( fFileDir, kTRUE );
2140}
2141
2142////////////////////////////////////////////////////////////////////////////////
2143/// set the weight file name (depreciated)
2144
2149
2150////////////////////////////////////////////////////////////////////////////////
2151/// retrieve weight file name
2152
2154{
2155 if (fWeightFile!="") return fWeightFile;
2156
2157 // the default consists of
2158 // directory/jobname_methodname_suffix.extension.{root/txt}
2159 TString suffix = "";
2160 TString wFileDir(GetWeightFileDir());
2161 TString wFileName = GetJobName() + "_" + GetMethodName() +
2162 suffix + "." + gConfig().GetIONames().fWeightFileExtension + ".xml";
2163 if (wFileDir.IsNull() ) return wFileName;
2164 // add weight file directory of it is not null
2165 return ( wFileDir + (wFileDir[wFileDir.Length()-1]=='/' ? "" : "/")
2166 + wFileName );
2167}
2168////////////////////////////////////////////////////////////////////////////////
2169/// writes all MVA evaluation histograms to file
2170
2172{
2173 BaseDir()->cd();
2174
2175
2176 // write MVA PDFs to file - if exist
2177 if (0 != fMVAPdfS) {
2178 fMVAPdfS->GetOriginalHist()->Write();
2179 fMVAPdfS->GetSmoothedHist()->Write();
2180 fMVAPdfS->GetPDFHist()->Write();
2181 }
2182 if (0 != fMVAPdfB) {
2183 fMVAPdfB->GetOriginalHist()->Write();
2184 fMVAPdfB->GetSmoothedHist()->Write();
2185 fMVAPdfB->GetPDFHist()->Write();
2186 }
2187
2188 // write result-histograms
2189 Results* results = Data()->GetResults( GetMethodName(), treetype, Types::kMaxAnalysisType );
2190 if (!results)
2191 Log() << kFATAL <<Form("Dataset[%s] : ",DataInfo().GetName())<< "<WriteEvaluationHistosToFile> Unknown result: "
2192 << GetMethodName() << (treetype==Types::kTraining?"/kTraining":"/kTesting")
2193 << "/kMaxAnalysisType" << Endl;
2194 results->GetStorage()->Write();
2196 // skipping plotting of variables if too many (default is 200)
2197 if ((int) DataInfo().GetNVariables()< gConfig().GetVariablePlotting().fMaxNumOfAllowedVariables)
2198 GetTransformationHandler().PlotVariables (GetEventCollection( Types::kTesting ), BaseDir() );
2199 else
2200 Log() << kINFO << TString::Format("Dataset[%s] : ",DataInfo().GetName())
2201 << " variable plots are not produces ! The number of variables is " << DataInfo().GetNVariables()
2202 << " , it is larger than " << gConfig().GetVariablePlotting().fMaxNumOfAllowedVariables << Endl;
2203 }
2204}
2205
2206////////////////////////////////////////////////////////////////////////////////
2207/// write special monitoring histograms to file
2208/// dummy implementation here -----------------
2209
2213
2214////////////////////////////////////////////////////////////////////////////////
2215/// reads one line from the input stream
2216/// checks for certain keywords and interprets
2217/// the line if keywords are found
2218
2219Bool_t TMVA::MethodBase::GetLine(std::istream& fin, char* buf )
2220{
2221 fin.getline(buf,512);
2222 TString line(buf);
2223 if (line.BeginsWith("TMVA Release")) {
2224 Ssiz_t start = line.First('[')+1;
2225 Ssiz_t length = line.Index("]",start)-start;
2226 TString code = line(start,length);
2227 std::stringstream s(code.Data());
2228 s >> fTMVATrainingVersion;
2229 Log() << kINFO <<Form("Dataset[%s] : ",DataInfo().GetName())<< "MVA method was trained with TMVA Version: " << GetTrainingTMVAVersionString() << Endl;
2230 }
2231 if (line.BeginsWith("ROOT Release")) {
2232 Ssiz_t start = line.First('[')+1;
2233 Ssiz_t length = line.Index("]",start)-start;
2234 TString code = line(start,length);
2235 std::stringstream s(code.Data());
2236 s >> fROOTTrainingVersion;
2237 Log() << kINFO <<Form("Dataset[%s] : ",DataInfo().GetName())<< "MVA method was trained with ROOT Version: " << GetTrainingROOTVersionString() << Endl;
2238 }
2239 if (line.BeginsWith("Analysis type")) {
2240 Ssiz_t start = line.First('[')+1;
2241 Ssiz_t length = line.Index("]",start)-start;
2242 TString code = line(start,length);
2243 std::stringstream s(code.Data());
2244 std::string analysisType;
2245 s >> analysisType;
2246 if (analysisType == "regression" || analysisType == "Regression") SetAnalysisType( Types::kRegression );
2247 else if (analysisType == "classification" || analysisType == "Classification") SetAnalysisType( Types::kClassification );
2248 else if (analysisType == "multiclass" || analysisType == "Multiclass") SetAnalysisType( Types::kMulticlass );
2249 else Log() << kFATAL << "Analysis type " << analysisType << " from weight-file not known!" << std::endl;
2250
2251 Log() << kINFO <<Form("Dataset[%s] : ",DataInfo().GetName())<< "Method was trained for "
2252 << (GetAnalysisType() == Types::kRegression ? "Regression" :
2253 (GetAnalysisType() == Types::kMulticlass ? "Multiclass" : "Classification")) << Endl;
2254 }
2255
2256 return true;
2257}
2258
2259////////////////////////////////////////////////////////////////////////////////
2260/// Create PDFs of the MVA output variables
2261
2263{
2264 Data()->SetCurrentType(Types::kTraining);
2265
2266 // the PDF's are stored as results ONLY if the corresponding "results" are booked,
2267 // otherwise they will be only used 'online'
2269 ( Data()->GetResults(GetMethodName(), Types::kTraining, Types::kClassification) );
2270
2271 if (mvaRes==0 || mvaRes->GetSize()==0) {
2272 Log() << kERROR<<Form("Dataset[%s] : ",DataInfo().GetName())<< "<CreateMVAPdfs> No result of classifier testing available" << Endl;
2273 }
2274
2275 Double_t minVal = *std::min_element(mvaRes->GetValueVector()->begin(),mvaRes->GetValueVector()->end());
2276 Double_t maxVal = *std::max_element(mvaRes->GetValueVector()->begin(),mvaRes->GetValueVector()->end());
2277
2278 // create histograms that serve as basis to create the MVA Pdfs
2279 TH1* histMVAPdfS = new TH1D( GetMethodTypeName() + "_tr_S", GetMethodTypeName() + "_tr_S",
2280 fMVAPdfS->GetHistNBins( mvaRes->GetSize() ), minVal, maxVal );
2281 TH1* histMVAPdfB = new TH1D( GetMethodTypeName() + "_tr_B", GetMethodTypeName() + "_tr_B",
2282 fMVAPdfB->GetHistNBins( mvaRes->GetSize() ), minVal, maxVal );
2283
2284
2285 // compute sum of weights properly
2286 histMVAPdfS->Sumw2();
2287 histMVAPdfB->Sumw2();
2288
2289 // fill histograms
2290 for (UInt_t ievt=0; ievt<mvaRes->GetSize(); ievt++) {
2291 Double_t theVal = mvaRes->GetValueVector()->at(ievt);
2292 Double_t theWeight = Data()->GetEvent(ievt)->GetWeight();
2293
2294 if (DataInfo().IsSignal(Data()->GetEvent(ievt))) histMVAPdfS->Fill( theVal, theWeight );
2295 else histMVAPdfB->Fill( theVal, theWeight );
2296 }
2297
2300
2301 // momentary hack for ROOT problem
2302 if(!IsSilentFile())
2303 {
2304 histMVAPdfS->Write();
2305 histMVAPdfB->Write();
2306 }
2307 // create PDFs
2308 fMVAPdfS->BuildPDF ( histMVAPdfS );
2309 fMVAPdfB->BuildPDF ( histMVAPdfB );
2310 fMVAPdfS->ValidatePDF( histMVAPdfS );
2311 fMVAPdfB->ValidatePDF( histMVAPdfB );
2312
2313 if (DataInfo().GetNClasses() == 2) { // TODO: this is an ugly hack.. adapt this to new framework
2314 Log() << kINFO<<Form("Dataset[%s] : ",DataInfo().GetName())
2315 << TString::Format( "<CreateMVAPdfs> Separation from histogram (PDF): %1.3f (%1.3f)",
2316 GetSeparation( histMVAPdfS, histMVAPdfB ), GetSeparation( fMVAPdfS, fMVAPdfB ) )
2317 << Endl;
2318 }
2319
2320 delete histMVAPdfS;
2321 delete histMVAPdfB;
2322}
2323
2325 // the simple one, automatically calculates the mvaVal and uses the
2326 // SAME sig/bkg ratio as given in the training sample (typically 50/50
2327 // .. (NormMode=EqualNumEvents) but can be different)
2328 if (!fMVAPdfS || !fMVAPdfB) {
2329 Log() << kINFO<<Form("Dataset[%s] : ",DataInfo().GetName()) << "<GetProba> MVA PDFs for Signal and Background don't exist yet, we'll create them on demand" << Endl;
2330 CreateMVAPdfs();
2331 }
2332 Double_t sigFraction = DataInfo().GetTrainingSumSignalWeights() / (DataInfo().GetTrainingSumSignalWeights() + DataInfo().GetTrainingSumBackgrWeights() );
2333 Double_t mvaVal = GetMvaValue(ev);
2334
2335 return GetProba(mvaVal,sigFraction);
2336
2337}
2338////////////////////////////////////////////////////////////////////////////////
2339/// compute likelihood ratio
2340
2342{
2343 if (!fMVAPdfS || !fMVAPdfB) {
2344 Log() << kWARNING <<Form("Dataset[%s] : ",DataInfo().GetName())<< "<GetProba> MVA PDFs for Signal and Background don't exist" << Endl;
2345 return -1.0;
2346 }
2347 Double_t p_s = fMVAPdfS->GetVal( mvaVal );
2348 Double_t p_b = fMVAPdfB->GetVal( mvaVal );
2349
2350 Double_t denom = p_s*ap_sig + p_b*(1 - ap_sig);
2351
2352 return (denom > 0) ? (p_s*ap_sig) / denom : -1;
2353}
2354
2355////////////////////////////////////////////////////////////////////////////////
2356/// compute rarity:
2357/// \f[
2358/// R(x) = \int_{[-\infty..x]} { PDF(x') dx' }
2359/// \f]
2360/// where PDF(x) is the PDF of the classifier's signal or background distribution
2361
2363{
2364 if ((reftype == Types::kSignal && !fMVAPdfS) || (reftype == Types::kBackground && !fMVAPdfB)) {
2365 Log() << kWARNING <<Form("Dataset[%s] : ",DataInfo().GetName())<< "<GetRarity> Required MVA PDF for Signal or Background does not exist: "
2366 << "select option \"CreateMVAPdfs\"" << Endl;
2367 return 0.0;
2368 }
2369
2370 PDF* thePdf = ((reftype == Types::kSignal) ? fMVAPdfS : fMVAPdfB);
2371
2372 return thePdf->GetIntegral( thePdf->GetXmin(), mvaVal );
2373}
2374
2375////////////////////////////////////////////////////////////////////////////////
2376/// fill background efficiency (resp. rejection) versus signal efficiency plots
2377/// returns signal efficiency at background efficiency indicated in theString
2378
2380{
2381 Data()->SetCurrentType(type);
2382 Results* results = Data()->GetResults( GetMethodName(), type, Types::kClassification );
2383 std::vector<Float_t>* mvaRes = dynamic_cast<ResultsClassification*>(results)->GetValueVector();
2384
2385 // parse input string for required background efficiency
2387
2388 // sanity check
2390 if (!list || list->GetSize() < 2) computeArea = kTRUE; // the area is computed
2391 else if (list->GetSize() > 2) {
2392 Log() << kFATAL <<Form("Dataset[%s] : ",DataInfo().GetName())<< "<GetEfficiency> Wrong number of arguments"
2393 << " in string: " << theString
2394 << " | required format, e.g., Efficiency:0.05, or empty string" << Endl;
2395 delete list;
2396 return -1;
2397 }
2398
2399 // sanity check
2400 if ( results->GetHist("MVA_S")->GetNbinsX() != results->GetHist("MVA_B")->GetNbinsX() ||
2401 results->GetHist("MVA_HIGHBIN_S")->GetNbinsX() != results->GetHist("MVA_HIGHBIN_B")->GetNbinsX() ) {
2402 Log() << kFATAL <<Form("Dataset[%s] : ",DataInfo().GetName())<< "<GetEfficiency> Binning mismatch between signal and background histos" << Endl;
2403 delete list;
2404 return -1.0;
2405 }
2406
2407 // create histograms
2408
2409 // first, get efficiency histograms for signal and background
2410 TH1 * effhist = results->GetHist("MVA_HIGHBIN_S");
2411 Double_t xmin = effhist->GetXaxis()->GetXmin();
2412 Double_t xmax = effhist->GetXaxis()->GetXmax();
2413
2415
2416 // first round ? --> create histograms
2417 if (results->DoesExist("MVA_EFF_S")==0) {
2418
2419 // for efficiency plot
2420 TH1* eff_s = new TH1D( GetTestvarName() + "_effS", GetTestvarName() + " (signal)", fNbinsH, xmin, xmax );
2421 TH1* eff_b = new TH1D( GetTestvarName() + "_effB", GetTestvarName() + " (background)", fNbinsH, xmin, xmax );
2422 results->Store(eff_s, "MVA_EFF_S");
2423 results->Store(eff_b, "MVA_EFF_B");
2424
2425 // sign if cut
2426 Int_t sign = (fCutOrientation == kPositive) ? +1 : -1;
2427
2428 // this method is unbinned
2429 nevtS = 0;
2430 for (UInt_t ievt=0; ievt<Data()->GetNEvents(); ievt++) {
2431
2432 // read the tree
2433 Bool_t isSignal = DataInfo().IsSignal(GetEvent(ievt));
2434 Float_t theWeight = GetEvent(ievt)->GetWeight();
2435 Float_t theVal = (*mvaRes)[ievt];
2436
2437 // select histogram depending on if sig or bgd
2439
2440 // count signal and background events in tree
2441 if (isSignal) nevtS+=theWeight;
2442
2443 TAxis* axis = theHist->GetXaxis();
2444 Int_t maxbin = Int_t((theVal - axis->GetXmin())/(axis->GetXmax() - axis->GetXmin())*fNbinsH) + 1;
2445 if (sign > 0 && maxbin > fNbinsH) continue; // can happen... event doesn't count
2446 if (sign < 0 && maxbin < 1 ) continue; // can happen... event doesn't count
2447 if (sign > 0 && maxbin < 1 ) maxbin = 1;
2448 if (sign < 0 && maxbin > fNbinsH) maxbin = fNbinsH;
2449
2450 if (sign > 0)
2451 for (Int_t ibin=1; ibin<=maxbin; ibin++) theHist->AddBinContent( ibin , theWeight);
2452 else if (sign < 0)
2453 for (Int_t ibin=maxbin+1; ibin<=fNbinsH; ibin++) theHist->AddBinContent( ibin , theWeight );
2454 else
2455 Log() << kFATAL <<Form("Dataset[%s] : ",DataInfo().GetName())<< "<GetEfficiency> Mismatch in sign" << Endl;
2456 }
2457
2458 // renormalise maximum to <=1
2459 // eff_s->Scale( 1.0/TMath::Max(1.,eff_s->GetMaximum()) );
2460 // eff_b->Scale( 1.0/TMath::Max(1.,eff_b->GetMaximum()) );
2461
2462 eff_s->Scale( 1.0/TMath::Max(std::numeric_limits<double>::epsilon(),eff_s->GetMaximum()) );
2463 eff_b->Scale( 1.0/TMath::Max(std::numeric_limits<double>::epsilon(),eff_b->GetMaximum()) );
2464
2465 // background efficiency versus signal efficiency
2466 TH1* eff_BvsS = new TH1D( GetTestvarName() + "_effBvsS", GetTestvarName() + "", fNbins, 0, 1 );
2467 results->Store(eff_BvsS, "MVA_EFF_BvsS");
2468 eff_BvsS->SetXTitle( "Signal eff" );
2469 eff_BvsS->SetYTitle( "Backgr eff" );
2470
2471 // background rejection (=1-eff.) versus signal efficiency
2472 TH1* rej_BvsS = new TH1D( GetTestvarName() + "_rejBvsS", GetTestvarName() + "", fNbins, 0, 1 );
2473 results->Store(rej_BvsS);
2474 rej_BvsS->SetXTitle( "Signal eff" );
2475 rej_BvsS->SetYTitle( "Backgr rejection (1-eff)" );
2476
2477 // inverse background eff (1/eff.) versus signal efficiency
2478 TH1* inveff_BvsS = new TH1D( GetTestvarName() + "_invBeffvsSeff",
2479 GetTestvarName(), fNbins, 0, 1 );
2480 results->Store(inveff_BvsS);
2481 inveff_BvsS->SetXTitle( "Signal eff" );
2482 inveff_BvsS->SetYTitle( "Inverse backgr. eff (1/eff)" );
2483
2484 // use root finder
2485 // spline background efficiency plot
2486 // note that there is a bin shift when going from a TH1D object to a TGraph :-(
2488 fSplRefS = new TSpline1( "spline2_signal", new TGraph( eff_s ) );
2489 fSplRefB = new TSpline1( "spline2_background", new TGraph( eff_b ) );
2490
2491 // verify spline sanity
2492 gTools().CheckSplines( eff_s, fSplRefS );
2493 gTools().CheckSplines( eff_b, fSplRefB );
2494 }
2495
2496 // make the background-vs-signal efficiency plot
2497
2498 // create root finder
2499 RootFinder rootFinder( this, fXmin, fXmax );
2500
2501 Double_t effB = 0;
2502 fEffS = eff_s; // to be set for the root finder
2503 for (Int_t bini=1; bini<=fNbins; bini++) {
2504
2505 // find cut value corresponding to a given signal efficiency
2506 Double_t effS = eff_BvsS->GetBinCenter( bini );
2507 Double_t cut = rootFinder.Root( effS );
2508
2509 // retrieve background efficiency for given cut
2510 if (Use_Splines_for_Eff_) effB = fSplRefB->Eval( cut );
2511 else effB = eff_b->GetBinContent( eff_b->FindBin( cut ) );
2512
2513 // and fill histograms
2514 eff_BvsS->SetBinContent( bini, effB );
2515 rej_BvsS->SetBinContent( bini, 1.0-effB );
2516 if (effB>std::numeric_limits<double>::epsilon())
2517 inveff_BvsS->SetBinContent( bini, 1.0/effB );
2518 }
2519
2520 // create splines for histogram
2521 fSpleffBvsS = new TSpline1( "effBvsS", new TGraph( eff_BvsS ) );
2522
2523 // search for overlap point where, when cutting on it,
2524 // one would obtain: eff_S = rej_B = 1 - eff_B
2525 Double_t effS = 0., rejB, effS_ = 0., rejB_ = 0.;
2526 Int_t nbins_ = 5000;
2527 for (Int_t bini=1; bini<=nbins_; bini++) {
2528
2529 // get corresponding signal and background efficiencies
2530 effS = (bini - 0.5)/Float_t(nbins_);
2531 rejB = 1.0 - fSpleffBvsS->Eval( effS );
2532
2533 // find signal efficiency that corresponds to required background efficiency
2534 if ((effS - rejB)*(effS_ - rejB_) < 0) break;
2535 effS_ = effS;
2536 rejB_ = rejB;
2537 }
2538
2539 // find cut that corresponds to signal efficiency and update signal-like criterion
2540 Double_t cut = rootFinder.Root( 0.5*(effS + effS_) );
2541 SetSignalReferenceCut( cut );
2542 fEffS = 0;
2543 }
2544
2545 // must exist...
2546 if (0 == fSpleffBvsS) {
2547 delete list;
2548 return 0.0;
2549 }
2550
2551 // now find signal efficiency that corresponds to required background efficiency
2552 Double_t effS = 0, effB = 0, effS_ = 0, effB_ = 0;
2553 Int_t nbins_ = 1000;
2554
2555 if (computeArea) {
2556
2557 // compute area of rej-vs-eff plot
2558 Double_t integral = 0;
2559 for (Int_t bini=1; bini<=nbins_; bini++) {
2560
2561 // get corresponding signal and background efficiencies
2562 effS = (bini - 0.5)/Float_t(nbins_);
2563 effB = fSpleffBvsS->Eval( effS );
2564 integral += (1.0 - effB);
2565 }
2566 integral /= nbins_;
2567
2568 delete list;
2569 return integral;
2570 }
2571 else {
2572
2573 // that will be the value of the efficiency retured (does not affect
2574 // the efficiency-vs-bkg plot which is done anyway.
2575 Float_t effBref = atof( ((TObjString*)list->At(1))->GetString() );
2576
2577 // find precise efficiency value
2578 for (Int_t bini=1; bini<=nbins_; bini++) {
2579
2580 // get corresponding signal and background efficiencies
2581 effS = (bini - 0.5)/Float_t(nbins_);
2582 effB = fSpleffBvsS->Eval( effS );
2583
2584 // find signal efficiency that corresponds to required background efficiency
2585 if ((effB - effBref)*(effB_ - effBref) <= 0) break;
2586 effS_ = effS;
2587 effB_ = effB;
2588 }
2589
2590 // take mean between bin above and bin below
2591 effS = 0.5*(effS + effS_);
2592
2593 effSerr = 0;
2594 if (nevtS > 0) effSerr = TMath::Sqrt( effS*(1.0 - effS)/nevtS );
2595
2596 delete list;
2597 return effS;
2598 }
2599
2600 return -1;
2601}
2602
2603////////////////////////////////////////////////////////////////////////////////
2604
2606{
2607 Data()->SetCurrentType(Types::kTraining);
2608
2609 Results* results = Data()->GetResults(GetMethodName(), Types::kTesting, Types::kNoAnalysisType);
2610
2611 // fill background efficiency (resp. rejection) versus signal efficiency plots
2612 // returns signal efficiency at background efficiency indicated in theString
2613
2614 // parse input string for required background efficiency
2616 // sanity check
2617
2618 if (list->GetSize() != 2) {
2619 Log() << kFATAL <<Form("Dataset[%s] : ",DataInfo().GetName())<< "<GetTrainingEfficiency> Wrong number of arguments"
2620 << " in string: " << theString
2621 << " | required format, e.g., Efficiency:0.05" << Endl;
2622 delete list;
2623 return -1;
2624 }
2625 // that will be the value of the efficiency retured (does not affect
2626 // the efficiency-vs-bkg plot which is done anyway.
2627 Float_t effBref = atof( ((TObjString*)list->At(1))->GetString() );
2628
2629 delete list;
2630
2631 // sanity check
2632 if (results->GetHist("MVA_S")->GetNbinsX() != results->GetHist("MVA_B")->GetNbinsX() ||
2633 results->GetHist("MVA_HIGHBIN_S")->GetNbinsX() != results->GetHist("MVA_HIGHBIN_B")->GetNbinsX() ) {
2634 Log() << kFATAL <<Form("Dataset[%s] : ",DataInfo().GetName())<< "<GetTrainingEfficiency> Binning mismatch between signal and background histos"
2635 << Endl;
2636 return -1.0;
2637 }
2638
2639 // create histogram
2640
2641 // first, get efficiency histograms for signal and background
2642 TH1 * effhist = results->GetHist("MVA_HIGHBIN_S");
2643 Double_t xmin = effhist->GetXaxis()->GetXmin();
2644 Double_t xmax = effhist->GetXaxis()->GetXmax();
2645
2646 // first round ? --> create and fill histograms
2647 if (results->DoesExist("MVA_TRAIN_S")==0) {
2648
2649 // classifier response distributions for test sample
2650 Double_t sxmax = fXmax+0.00001;
2651
2652 // MVA plots on the training sample (check for overtraining)
2653 TH1* mva_s_tr = new TH1D( GetTestvarName() + "_Train_S",GetTestvarName() + "_Train_S", fNbinsMVAoutput, fXmin, sxmax );
2654 TH1* mva_b_tr = new TH1D( GetTestvarName() + "_Train_B",GetTestvarName() + "_Train_B", fNbinsMVAoutput, fXmin, sxmax );
2655 results->Store(mva_s_tr, "MVA_TRAIN_S");
2656 results->Store(mva_b_tr, "MVA_TRAIN_B");
2657 mva_s_tr->Sumw2();
2658 mva_b_tr->Sumw2();
2659
2660 // Training efficiency plots
2661 TH1* mva_eff_tr_s = new TH1D( GetTestvarName() + "_trainingEffS", GetTestvarName() + " (signal)",
2662 fNbinsH, xmin, xmax );
2663 TH1* mva_eff_tr_b = new TH1D( GetTestvarName() + "_trainingEffB", GetTestvarName() + " (background)",
2664 fNbinsH, xmin, xmax );
2665 results->Store(mva_eff_tr_s, "MVA_TRAINEFF_S");
2666 results->Store(mva_eff_tr_b, "MVA_TRAINEFF_B");
2667
2668 // sign if cut
2669 Int_t sign = (fCutOrientation == kPositive) ? +1 : -1;
2670
2671 std::vector<Double_t> mvaValues = GetMvaValues(0,Data()->GetNEvents());
2672 assert( (Long64_t) mvaValues.size() == Data()->GetNEvents());
2673
2674 // this method is unbinned
2675 for (Int_t ievt=0; ievt<Data()->GetNEvents(); ievt++) {
2676
2677 Data()->SetCurrentEvent(ievt);
2678 const Event* ev = GetEvent();
2679
2680 Double_t theVal = mvaValues[ievt];
2681 Double_t theWeight = ev->GetWeight();
2682
2683 TH1* theEffHist = DataInfo().IsSignal(ev) ? mva_eff_tr_s : mva_eff_tr_b;
2684 TH1* theClsHist = DataInfo().IsSignal(ev) ? mva_s_tr : mva_b_tr;
2685
2686 theClsHist->Fill( theVal, theWeight );
2687
2688 TAxis* axis = theEffHist->GetXaxis();
2689 Int_t maxbin = Int_t((theVal - axis->GetXmin())/(axis->GetXmax() - axis->GetXmin())*fNbinsH) + 1;
2690 if (sign > 0 && maxbin > fNbinsH) continue; // can happen... event doesn't count
2691 if (sign < 0 && maxbin < 1 ) continue; // can happen... event doesn't count
2692 if (sign > 0 && maxbin < 1 ) maxbin = 1;
2693 if (sign < 0 && maxbin > fNbinsH) maxbin = fNbinsH;
2694
2695 if (sign > 0) for (Int_t ibin=1; ibin<=maxbin; ibin++) theEffHist->AddBinContent( ibin , theWeight );
2696 else for (Int_t ibin=maxbin+1; ibin<=fNbinsH; ibin++) theEffHist->AddBinContent( ibin , theWeight );
2697 }
2698
2699 // normalise output distributions
2700 // uncomment those (and several others if you want unnormalized output
2703
2704 // renormalise to maximum
2705 mva_eff_tr_s->Scale( 1.0/TMath::Max(std::numeric_limits<double>::epsilon(), mva_eff_tr_s->GetMaximum()) );
2706 mva_eff_tr_b->Scale( 1.0/TMath::Max(std::numeric_limits<double>::epsilon(), mva_eff_tr_b->GetMaximum()) );
2707
2708 // Training background efficiency versus signal efficiency
2709 TH1* eff_bvss = new TH1D( GetTestvarName() + "_trainingEffBvsS", GetTestvarName() + "", fNbins, 0, 1 );
2710 // Training background rejection (=1-eff.) versus signal efficiency
2711 TH1* rej_bvss = new TH1D( GetTestvarName() + "_trainingRejBvsS", GetTestvarName() + "", fNbins, 0, 1 );
2712 results->Store(eff_bvss, "EFF_BVSS_TR");
2713 results->Store(rej_bvss, "REJ_BVSS_TR");
2714
2715 // use root finder
2716 // spline background efficiency plot
2717 // note that there is a bin shift when going from a TH1D object to a TGraph :-(
2719 if (fSplTrainRefS) delete fSplTrainRefS;
2720 if (fSplTrainRefB) delete fSplTrainRefB;
2721 fSplTrainRefS = new TSpline1( "spline2_signal", new TGraph( mva_eff_tr_s ) );
2722 fSplTrainRefB = new TSpline1( "spline2_background", new TGraph( mva_eff_tr_b ) );
2723
2724 // verify spline sanity
2725 gTools().CheckSplines( mva_eff_tr_s, fSplTrainRefS );
2726 gTools().CheckSplines( mva_eff_tr_b, fSplTrainRefB );
2727 }
2728
2729 // make the background-vs-signal efficiency plot
2730
2731 // create root finder
2732 RootFinder rootFinder(this, fXmin, fXmax );
2733
2734 Double_t effB = 0;
2735 fEffS = results->GetHist("MVA_TRAINEFF_S");
2736 for (Int_t bini=1; bini<=fNbins; bini++) {
2737
2738 // find cut value corresponding to a given signal efficiency
2739 Double_t effS = eff_bvss->GetBinCenter( bini );
2740
2741 Double_t cut = rootFinder.Root( effS );
2742
2743 // retrieve background efficiency for given cut
2744 if (Use_Splines_for_Eff_) effB = fSplTrainRefB->Eval( cut );
2745 else effB = mva_eff_tr_b->GetBinContent( mva_eff_tr_b->FindBin( cut ) );
2746
2747 // and fill histograms
2748 eff_bvss->SetBinContent( bini, effB );
2749 rej_bvss->SetBinContent( bini, 1.0-effB );
2750 }
2751 fEffS = 0;
2752
2753 // create splines for histogram
2754 fSplTrainEffBvsS = new TSpline1( "effBvsS", new TGraph( eff_bvss ) );
2755 }
2756
2757 // must exist...
2758 if (0 == fSplTrainEffBvsS) return 0.0;
2759
2760 // now find signal efficiency that corresponds to required background efficiency
2761 Double_t effS = 0., effB, effS_ = 0., effB_ = 0.;
2762 Int_t nbins_ = 1000;
2763 for (Int_t bini=1; bini<=nbins_; bini++) {
2764
2765 // get corresponding signal and background efficiencies
2766 effS = (bini - 0.5)/Float_t(nbins_);
2767 effB = fSplTrainEffBvsS->Eval( effS );
2768
2769 // find signal efficiency that corresponds to required background efficiency
2770 if ((effB - effBref)*(effB_ - effBref) <= 0) break;
2771 effS_ = effS;
2772 effB_ = effB;
2773 }
2774
2775 return 0.5*(effS + effS_); // the mean between bin above and bin below
2776}
2777
2778////////////////////////////////////////////////////////////////////////////////
2779
2780std::vector<Float_t> TMVA::MethodBase::GetMulticlassEfficiency(std::vector<std::vector<Float_t> >& purity)
2781{
2782 Data()->SetCurrentType(Types::kTesting);
2783 ResultsMulticlass* resMulticlass = dynamic_cast<ResultsMulticlass*>(Data()->GetResults(GetMethodName(), Types::kTesting, Types::kMulticlass));
2784 if (!resMulticlass) Log() << kFATAL<<Form("Dataset[%s] : ",DataInfo().GetName())<< "unable to create pointer in GetMulticlassEfficiency, exiting."<<Endl;
2785
2786 purity.push_back(resMulticlass->GetAchievablePur());
2787 return resMulticlass->GetAchievableEff();
2788}
2789
2790////////////////////////////////////////////////////////////////////////////////
2791
2792std::vector<Float_t> TMVA::MethodBase::GetMulticlassTrainingEfficiency(std::vector<std::vector<Float_t> >& purity)
2793{
2794 Data()->SetCurrentType(Types::kTraining);
2795 ResultsMulticlass* resMulticlass = dynamic_cast<ResultsMulticlass*>(Data()->GetResults(GetMethodName(), Types::kTraining, Types::kMulticlass));
2796 if (!resMulticlass) Log() << kFATAL<< "unable to create pointer in GetMulticlassTrainingEfficiency, exiting."<<Endl;
2797
2798 Log() << kINFO <<Form("Dataset[%s] : ",DataInfo().GetName())<< "Determine optimal multiclass cuts for training data..." << Endl;
2799 for (UInt_t icls = 0; icls<DataInfo().GetNClasses(); ++icls) {
2800 resMulticlass->GetBestMultiClassCuts(icls);
2801 }
2802
2803 purity.push_back(resMulticlass->GetAchievablePur());
2804 return resMulticlass->GetAchievableEff();
2805}
2806
2807////////////////////////////////////////////////////////////////////////////////
2808/// Construct a confusion matrix for a multiclass classifier. The confusion
2809/// matrix compares, in turn, each class agaist all other classes in a pair-wise
2810/// fashion. In rows with index \f$ k_r = 0 ... K \f$, \f$ k_r \f$ is
2811/// considered signal for the sake of comparison and for each column
2812/// \f$ k_c = 0 ... K \f$ the corresponding class is considered background.
2813///
2814/// Note that the diagonal elements will be returned as NaN since this will
2815/// compare a class against itself.
2816///
2817/// \see TMVA::ResultsMulticlass::GetConfusionMatrix
2818///
2819/// \param[in] effB The background efficiency for which to evaluate.
2820/// \param[in] type The data set on which to evaluate (training, testing ...).
2821///
2822/// \return A matrix containing signal efficiencies for the given background
2823/// efficiency. The diagonal elements are NaN since this measure is
2824/// meaningless (comparing a class against itself).
2825///
2826
2828{
2829 if (GetAnalysisType() != Types::kMulticlass) {
2830 Log() << kFATAL << "Cannot get confusion matrix for non-multiclass analysis." << std::endl;
2831 return TMatrixD(0, 0);
2832 }
2833
2834 Data()->SetCurrentType(type);
2836 dynamic_cast<ResultsMulticlass *>(Data()->GetResults(GetMethodName(), type, Types::kMulticlass));
2837
2838 if (resMulticlass == nullptr) {
2839 Log() << kFATAL << Form("Dataset[%s] : ", DataInfo().GetName())
2840 << "unable to create pointer in GetMulticlassEfficiency, exiting." << Endl;
2841 return TMatrixD(0, 0);
2842 }
2843
2844 return resMulticlass->GetConfusionMatrix(effB);
2845}
2846
2847////////////////////////////////////////////////////////////////////////////////
2848/// compute significance of mean difference
2849/// \f[
2850/// significance = \frac{|<S> - <B>|}{\sqrt{RMS_{S2} + RMS_{B2}}}
2851/// \f]
2852
2854{
2855 Double_t rms = sqrt( fRmsS*fRmsS + fRmsB*fRmsB );
2856
2857 return (rms > 0) ? TMath::Abs(fMeanS - fMeanB)/rms : 0;
2858}
2859
2860////////////////////////////////////////////////////////////////////////////////
2861/// compute "separation" defined as
2862/// \f[
2863/// <s2> = \frac{1}{2} \int_{-\infty}^{+\infty} { \frac{(S(x) - B(x))^2}{(S(x) + B(x))} dx }
2864/// \f]
2865
2870
2871////////////////////////////////////////////////////////////////////////////////
2872/// compute "separation" defined as
2873/// \f[
2874/// <s2> = \frac{1}{2} \int_{-\infty}^{+\infty} { \frac{(S(x) - B(x))^2}{(S(x) + B(x))} dx }
2875/// \f]
2876
2878{
2879 // note, if zero pointers given, use internal pdf
2880 // sanity check first
2881 if ((!pdfS && pdfB) || (pdfS && !pdfB))
2882 Log() << kFATAL <<Form("Dataset[%s] : ",DataInfo().GetName())<< "<GetSeparation> Mismatch in pdfs" << Endl;
2883 if (!pdfS) pdfS = fSplS;
2884 if (!pdfB) pdfB = fSplB;
2885
2886 if (!fSplS || !fSplB) {
2887 Log()<<kDEBUG<<Form("[%s] : ",DataInfo().GetName())<< "could not calculate the separation, distributions"
2888 << " fSplS or fSplB are not yet filled" << Endl;
2889 return 0;
2890 }else{
2891 return gTools().GetSeparation( *pdfS, *pdfB );
2892 }
2893}
2894
2895////////////////////////////////////////////////////////////////////////////////
2896/// calculate the area (integral) under the ROC curve as a
2897/// overall quality measure of the classification
2898
2900{
2901 // note, if zero pointers given, use internal pdf
2902 // sanity check first
2903 if ((!histS && histB) || (histS && !histB))
2904 Log() << kFATAL <<Form("Dataset[%s] : ",DataInfo().GetName())<< "<GetROCIntegral(TH1D*, TH1D*)> Mismatch in hists" << Endl;
2905
2906 if (histS==0 || histB==0) return 0.;
2907
2908 TMVA::PDF *pdfS = new TMVA::PDF( " PDF Sig", histS, TMVA::PDF::kSpline3 );
2909 TMVA::PDF *pdfB = new TMVA::PDF( " PDF Bkg", histB, TMVA::PDF::kSpline3 );
2910
2911
2912 Double_t xmin = TMath::Min(pdfS->GetXmin(), pdfB->GetXmin());
2913 Double_t xmax = TMath::Max(pdfS->GetXmax(), pdfB->GetXmax());
2914
2915 Double_t integral = 0;
2916 UInt_t nsteps = 1000;
2917 Double_t step = (xmax-xmin)/Double_t(nsteps);
2918 Double_t cut = xmin;
2919 for (UInt_t i=0; i<nsteps; i++) {
2920 integral += (1-pdfB->GetIntegral(cut,xmax)) * pdfS->GetVal(cut);
2921 cut+=step;
2922 }
2923 delete pdfS;
2924 delete pdfB;
2925 return integral*step;
2926}
2927
2928
2929////////////////////////////////////////////////////////////////////////////////
2930/// calculate the area (integral) under the ROC curve as a
2931/// overall quality measure of the classification
2932
2934{
2935 // note, if zero pointers given, use internal pdf
2936 // sanity check first
2937 if ((!pdfS && pdfB) || (pdfS && !pdfB))
2938 Log() << kFATAL <<Form("Dataset[%s] : ",DataInfo().GetName())<< "<GetSeparation> Mismatch in pdfs" << Endl;
2939 if (!pdfS) pdfS = fSplS;
2940 if (!pdfB) pdfB = fSplB;
2941
2942 if (pdfS==0 || pdfB==0) return 0.;
2943
2944 Double_t xmin = TMath::Min(pdfS->GetXmin(), pdfB->GetXmin());
2945 Double_t xmax = TMath::Max(pdfS->GetXmax(), pdfB->GetXmax());
2946
2947 Double_t integral = 0;
2948 UInt_t nsteps = 1000;
2949 Double_t step = (xmax-xmin)/Double_t(nsteps);
2950 Double_t cut = xmin;
2951 for (UInt_t i=0; i<nsteps; i++) {
2952 integral += (1-pdfB->GetIntegral(cut,xmax)) * pdfS->GetVal(cut);
2953 cut+=step;
2954 }
2955 return integral*step;
2956}
2957
2958////////////////////////////////////////////////////////////////////////////////
2959/// plot significance, \f$ \frac{S}{\sqrt{S^2 + B^2}} \f$, curve for given number
2960/// of signal and background events; returns cut for maximum significance
2961/// also returned via reference is the maximum significance
2962
2966{
2967 Results* results = Data()->GetResults( GetMethodName(), Types::kTesting, Types::kMaxAnalysisType );
2968
2970 Double_t effS(0),effB(0),significance(0);
2971 TH1D *temp_histogram = new TH1D("temp", "temp", fNbinsH, fXmin, fXmax );
2972
2973 if (SignalEvents <= 0 || BackgroundEvents <= 0) {
2974 Log() << kFATAL <<Form("Dataset[%s] : ",DataInfo().GetName())<< "<GetMaximumSignificance> "
2975 << "Number of signal or background events is <= 0 ==> abort"
2976 << Endl;
2977 }
2978
2979 Log() << kINFO <<Form("Dataset[%s] : ",DataInfo().GetName())<< "Using ratio SignalEvents/BackgroundEvents = "
2981
2982 TH1* eff_s = results->GetHist("MVA_EFF_S");
2983 TH1* eff_b = results->GetHist("MVA_EFF_B");
2984
2985 if ( (eff_s==0) || (eff_b==0) ) {
2986 Log() << kWARNING <<Form("Dataset[%s] : ",DataInfo().GetName())<< "Efficiency histograms empty !" << Endl;
2987 Log() << kWARNING <<Form("Dataset[%s] : ",DataInfo().GetName())<< "no maximum cut found, return 0" << Endl;
2988 return 0;
2989 }
2990
2991 for (Int_t bin=1; bin<=fNbinsH; bin++) {
2992 effS = eff_s->GetBinContent( bin );
2993 effB = eff_b->GetBinContent( bin );
2994
2995 // put significance into a histogram
2997
2998 temp_histogram->SetBinContent(bin,significance);
2999 }
3000
3001 // find maximum in histogram
3002 max_significance = temp_histogram->GetBinCenter( temp_histogram->GetMaximumBin() );
3003 max_significance_value = temp_histogram->GetBinContent( temp_histogram->GetMaximumBin() );
3004
3005 // delete
3006 delete temp_histogram;
3007
3008 Log() << kINFO <<Form("Dataset[%s] : ",DataInfo().GetName())<< "Optimal cut at : " << max_significance << Endl;
3009 Log() << kINFO<<Form("Dataset[%s] : ",DataInfo().GetName()) << "Maximum significance: " << max_significance_value << Endl;
3010
3011 return max_significance;
3012}
3013
3014////////////////////////////////////////////////////////////////////////////////
3015/// calculates rms,mean, xmin, xmax of the event variable
3016/// this can be either done for the variables as they are or for
3017/// normalised variables (in the range of 0-1) if "norm" is set to kTRUE
3018
3023{
3024 Types::ETreeType previousTreeType = Data()->GetCurrentType();
3025 Data()->SetCurrentType(treeType);
3026
3027 Long64_t entries = Data()->GetNEvents();
3028
3029 // sanity check
3030 if (entries <=0)
3031 Log() << kFATAL <<Form("Dataset[%s] : ",DataInfo().GetName())<< "<CalculateEstimator> Wrong tree type: " << treeType << Endl;
3032
3033 // index of the wanted variable
3034 UInt_t varIndex = DataInfo().FindVarIndex( theVarName );
3035
3036 // first fill signal and background in arrays before analysis
3037 xmin = +DBL_MAX;
3038 xmax = -DBL_MAX;
3039
3040 // take into account event weights
3041 meanS = 0;
3042 meanB = 0;
3043 rmsS = 0;
3044 rmsB = 0;
3045 Double_t sumwS = 0, sumwB = 0;
3046
3047 // loop over all training events
3048 for (Int_t ievt = 0; ievt < entries; ievt++) {
3049
3050 const Event* ev = GetEvent(ievt);
3051
3052 Double_t theVar = ev->GetValue(varIndex);
3053 Double_t weight = ev->GetWeight();
3054
3055 if (DataInfo().IsSignal(ev)) {
3056 sumwS += weight;
3057 meanS += weight*theVar;
3058 rmsS += weight*theVar*theVar;
3059 }
3060 else {
3061 sumwB += weight;
3062 meanB += weight*theVar;
3063 rmsB += weight*theVar*theVar;
3064 }
3065 xmin = TMath::Min( xmin, theVar );
3066 xmax = TMath::Max( xmax, theVar );
3067 }
3068
3069 meanS = meanS/sumwS;
3070 meanB = meanB/sumwB;
3073
3074 Data()->SetCurrentType(previousTreeType);
3075}
3076
3077////////////////////////////////////////////////////////////////////////////////
3078/// create reader class for method (classification only at present)
3079
3081{
3082 // the default consists of
3084 if (theClassFileName == "")
3085 classFileName = GetWeightFileDir() + "/" + GetJobName() + "_" + GetMethodName() + ".class.C";
3086 else
3088
3089 TString className = TString("Read") + GetMethodName();
3090
3092 Log() << kINFO //<<Form("Dataset[%s] : ",DataInfo().GetName())
3093 << "Creating standalone class: "
3094 << gTools().Color("lightblue") << classFileName << gTools().Color("reset") << Endl;
3095
3096 std::ofstream fout( classFileName );
3097 if (!fout.good()) { // file could not be opened --> Error
3098 Log() << kFATAL << "<MakeClass> Unable to open file: " << classFileName << Endl;
3099 }
3100
3101 // now create the class
3102 // preamble
3103 fout << "// Class: " << className << std::endl;
3104 fout << "// Automatically generated by MethodBase::MakeClass" << std::endl << "//" << std::endl;
3105
3106 // print general information and configuration state
3107 fout << std::endl;
3108 fout << "/* configuration options =====================================================" << std::endl << std::endl;
3109 WriteStateToStream( fout );
3110 fout << std::endl;
3111 fout << "============================================================================ */" << std::endl;
3112
3113 // generate the class
3114 fout << "" << std::endl;
3115 fout << "#include <array>" << std::endl;
3116 fout << "#include <vector>" << std::endl;
3117 fout << "#include <cmath>" << std::endl;
3118 fout << "#include <string>" << std::endl;
3119 fout << "#include <iostream>" << std::endl;
3120 fout << "" << std::endl;
3121 // now if the classifier needs to write some additional classes for its response implementation
3122 // this code goes here: (at least the header declarations need to come before the main class
3123 this->MakeClassSpecificHeader( fout, className );
3124
3125 fout << "#ifndef IClassifierReader__def" << std::endl;
3126 fout << "#define IClassifierReader__def" << std::endl;
3127 fout << std::endl;
3128 fout << "class IClassifierReader {" << std::endl;
3129 fout << std::endl;
3130 fout << " public:" << std::endl;
3131 fout << std::endl;
3132 fout << " // constructor" << std::endl;
3133 fout << " IClassifierReader() : fStatusIsClean( true ) {}" << std::endl;
3134 fout << " virtual ~IClassifierReader() {}" << std::endl;
3135 fout << std::endl;
3136 fout << " // return classifier response" << std::endl;
3137 if(GetAnalysisType() == Types::kMulticlass) {
3138 fout << " virtual std::vector<double> GetMulticlassValues( const std::vector<double>& inputValues ) const = 0;" << std::endl;
3139 } else {
3140 fout << " virtual double GetMvaValue( const std::vector<double>& inputValues ) const = 0;" << std::endl;
3141 }
3142 fout << std::endl;
3143 fout << " // returns classifier status" << std::endl;
3144 fout << " bool IsStatusClean() const { return fStatusIsClean; }" << std::endl;
3145 fout << std::endl;
3146 fout << " protected:" << std::endl;
3147 fout << std::endl;
3148 fout << " bool fStatusIsClean;" << std::endl;
3149 fout << "};" << std::endl;
3150 fout << std::endl;
3151 fout << "#endif" << std::endl;
3152 fout << std::endl;
3153 fout << "class " << className << " : public IClassifierReader {" << std::endl;
3154 fout << std::endl;
3155 fout << " public:" << std::endl;
3156 fout << std::endl;
3157 fout << " // constructor" << std::endl;
3158 fout << " " << className << "( std::vector<std::string>& theInputVars )" << std::endl;
3159 fout << " : IClassifierReader()," << std::endl;
3160 fout << " fClassName( \"" << className << "\" )," << std::endl;
3161 fout << " fNvars( " << GetNvar() << " )" << std::endl;
3162 fout << " {" << std::endl;
3163 fout << " // the training input variables" << std::endl;
3164 fout << " const char* inputVars[] = { ";
3165 for (UInt_t ivar=0; ivar<GetNvar(); ivar++) {
3166 fout << "\"" << GetOriginalVarName(ivar) << "\"";
3167 if (ivar<GetNvar()-1) fout << ", ";
3168 }
3169 fout << " };" << std::endl;
3170 fout << std::endl;
3171 fout << " // sanity checks" << std::endl;
3172 fout << " if (theInputVars.size() <= 0) {" << std::endl;
3173 fout << " std::cout << \"Problem in class \\\"\" << fClassName << \"\\\": empty input vector\" << std::endl;" << std::endl;
3174 fout << " fStatusIsClean = false;" << std::endl;
3175 fout << " }" << std::endl;
3176 fout << std::endl;
3177 fout << " if (theInputVars.size() != fNvars) {" << std::endl;
3178 fout << " std::cout << \"Problem in class \\\"\" << fClassName << \"\\\": mismatch in number of input values: \"" << std::endl;
3179 fout << " << theInputVars.size() << \" != \" << fNvars << std::endl;" << std::endl;
3180 fout << " fStatusIsClean = false;" << std::endl;
3181 fout << " }" << std::endl;
3182 fout << std::endl;
3183 fout << " // validate input variables" << std::endl;
3184 fout << " for (size_t ivar = 0; ivar < theInputVars.size(); ivar++) {" << std::endl;
3185 fout << " if (theInputVars[ivar] != inputVars[ivar]) {" << std::endl;
3186 fout << " std::cout << \"Problem in class \\\"\" << fClassName << \"\\\": mismatch in input variable names\" << std::endl" << std::endl;
3187 fout << " << \" for variable [\" << ivar << \"]: \" << theInputVars[ivar].c_str() << \" != \" << inputVars[ivar] << std::endl;" << std::endl;
3188 fout << " fStatusIsClean = false;" << std::endl;
3189 fout << " }" << std::endl;
3190 fout << " }" << std::endl;
3191 fout << std::endl;
3192 fout << " // initialize min and max vectors (for normalisation)" << std::endl;
3193 for (UInt_t ivar = 0; ivar < GetNvar(); ivar++) {
3194 fout << " fVmin[" << ivar << "] = " << std::setprecision(15) << GetXmin( ivar ) << ";" << std::endl;
3195 fout << " fVmax[" << ivar << "] = " << std::setprecision(15) << GetXmax( ivar ) << ";" << std::endl;
3196 }
3197 fout << std::endl;
3198 fout << " // initialize input variable types" << std::endl;
3199 for (UInt_t ivar=0; ivar<GetNvar(); ivar++) {
3200 fout << " fType[" << ivar << "] = \'" << DataInfo().GetVariableInfo(ivar).GetVarType() << "\';" << std::endl;
3201 }
3202 fout << std::endl;
3203 fout << " // initialize constants" << std::endl;
3204 fout << " Initialize();" << std::endl;
3205 fout << std::endl;
3206 if (GetTransformationHandler().GetTransformationList().GetSize() != 0) {
3207 fout << " // initialize transformation" << std::endl;
3208 fout << " InitTransform();" << std::endl;
3209 }
3210 fout << " }" << std::endl;
3211 fout << std::endl;
3212 fout << " // destructor" << std::endl;
3213 fout << " virtual ~" << className << "() {" << std::endl;
3214 fout << " Clear(); // method-specific" << std::endl;
3215 fout << " }" << std::endl;
3216 fout << std::endl;
3217 fout << " // the classifier response" << std::endl;
3218 fout << " // \"inputValues\" is a vector of input values in the same order as the" << std::endl;
3219 fout << " // variables given to the constructor" << std::endl;
3220 if(GetAnalysisType() == Types::kMulticlass) {
3221 fout << " std::vector<double> GetMulticlassValues( const std::vector<double>& inputValues ) const override;" << std::endl;
3222 } else {
3223 fout << " double GetMvaValue( const std::vector<double>& inputValues ) const override;" << std::endl;
3224 }
3225 fout << std::endl;
3226 fout << " private:" << std::endl;
3227 fout << std::endl;
3228 fout << " // method-specific destructor" << std::endl;
3229 fout << " void Clear();" << std::endl;
3230 fout << std::endl;
3231 if (GetTransformationHandler().GetTransformationList().GetSize()!=0) {
3232 fout << " // input variable transformation" << std::endl;
3233 GetTransformationHandler().MakeFunction(fout, className,1);
3234 fout << " void InitTransform();" << std::endl;
3235 fout << " void Transform( std::vector<double> & iv, int sigOrBgd ) const;" << std::endl;
3236 fout << std::endl;
3237 }
3238 fout << " // common member variables" << std::endl;
3239 fout << " const char* fClassName;" << std::endl;
3240 fout << std::endl;
3241 fout << " const size_t fNvars;" << std::endl;
3242 fout << " size_t GetNvar() const { return fNvars; }" << std::endl;
3243 fout << " char GetType( int ivar ) const { return fType[ivar]; }" << std::endl;
3244 fout << std::endl;
3245 fout << " // normalisation of input variables" << std::endl;
3246 fout << " double fVmin[" << GetNvar() << "];" << std::endl;
3247 fout << " double fVmax[" << GetNvar() << "];" << std::endl;
3248 fout << " double NormVariable( double x, double xmin, double xmax ) const {" << std::endl;
3249 fout << " // normalise to output range: [-1, 1]" << std::endl;
3250 fout << " return 2*(x - xmin)/(xmax - xmin) - 1.0;" << std::endl;
3251 fout << " }" << std::endl;
3252 fout << std::endl;
3253 fout << " // type of input variable: 'F' or 'I'" << std::endl;
3254 fout << " char fType[" << GetNvar() << "];" << std::endl;
3255 fout << std::endl;
3256 fout << " // initialize internal variables" << std::endl;
3257 fout << " void Initialize();" << std::endl;
3258 if(GetAnalysisType() == Types::kMulticlass) {
3259 fout << " std::vector<double> GetMulticlassValues__( const std::vector<double>& inputValues ) const;" << std::endl;
3260 } else {
3261 fout << " double GetMvaValue__( const std::vector<double>& inputValues ) const;" << std::endl;
3262 }
3263 fout << "" << std::endl;
3264 fout << " // private members (method specific)" << std::endl;
3265
3266 // call the classifier specific output (the classifier must close the class !)
3267 MakeClassSpecific( fout, className );
3268
3269 if(GetAnalysisType() == Types::kMulticlass) {
3270 fout << "inline std::vector<double> " << className << "::GetMulticlassValues( const std::vector<double>& inputValues ) const" << std::endl;
3271 } else {
3272 fout << "inline double " << className << "::GetMvaValue( const std::vector<double>& inputValues ) const" << std::endl;
3273 }
3274 fout << "{" << std::endl;
3275 fout << " // classifier response value" << std::endl;
3276 if(GetAnalysisType() == Types::kMulticlass) {
3277 fout << " std::vector<double> retval;" << std::endl;
3278 } else {
3279 fout << " double retval = 0;" << std::endl;
3280 }
3281 fout << std::endl;
3282 fout << " // classifier response, sanity check first" << std::endl;
3283 fout << " if (!IsStatusClean()) {" << std::endl;
3284 fout << " std::cout << \"Problem in class \\\"\" << fClassName << \"\\\": cannot return classifier response\"" << std::endl;
3285 fout << " << \" because status is dirty\" << std::endl;" << std::endl;
3286 fout << " }" << std::endl;
3287 fout << " else {" << std::endl;
3288 if (IsNormalised()) {
3289 fout << " // normalise variables" << std::endl;
3290 fout << " std::vector<double> iV;" << std::endl;
3291 fout << " iV.reserve(inputValues.size());" << std::endl;
3292 fout << " int ivar = 0;" << std::endl;
3293 fout << " for (std::vector<double>::const_iterator varIt = inputValues.begin();" << std::endl;
3294 fout << " varIt != inputValues.end(); varIt++, ivar++) {" << std::endl;
3295 fout << " iV.push_back(NormVariable( *varIt, fVmin[ivar], fVmax[ivar] ));" << std::endl;
3296 fout << " }" << std::endl;
3297 if (GetTransformationHandler().GetTransformationList().GetSize() != 0 && GetMethodType() != Types::kLikelihood &&
3298 GetMethodType() != Types::kHMatrix) {
3299 fout << " Transform( iV, -1 );" << std::endl;
3300 }
3301
3302 if(GetAnalysisType() == Types::kMulticlass) {
3303 fout << " retval = GetMulticlassValues__( iV );" << std::endl;
3304 } else {
3305 fout << " retval = GetMvaValue__( iV );" << std::endl;
3306 }
3307 } else {
3308 if (GetTransformationHandler().GetTransformationList().GetSize() != 0 && GetMethodType() != Types::kLikelihood &&
3309 GetMethodType() != Types::kHMatrix) {
3310 fout << " std::vector<double> iV(inputValues);" << std::endl;
3311 fout << " Transform( iV, -1 );" << std::endl;
3312 if(GetAnalysisType() == Types::kMulticlass) {
3313 fout << " retval = GetMulticlassValues__( iV );" << std::endl;
3314 } else {
3315 fout << " retval = GetMvaValue__( iV );" << std::endl;
3316 }
3317 } else {
3318 if(GetAnalysisType() == Types::kMulticlass) {
3319 fout << " retval = GetMulticlassValues__( inputValues );" << std::endl;
3320 } else {
3321 fout << " retval = GetMvaValue__( inputValues );" << std::endl;
3322 }
3323 }
3324 }
3325 fout << " }" << std::endl;
3326 fout << std::endl;
3327 fout << " return retval;" << std::endl;
3328 fout << "}" << std::endl;
3329
3330 // create output for transformation - if any
3331 if (GetTransformationHandler().GetTransformationList().GetSize()!=0)
3332 GetTransformationHandler().MakeFunction(fout, className,2);
3333
3334 // close the file
3335 fout.close();
3336}
3337
3338////////////////////////////////////////////////////////////////////////////////
3339/// prints out method-specific help method
3340
3342{
3343 // if options are written to reference file, also append help info
3344 std::streambuf* cout_sbuf = std::cout.rdbuf(); // save original sbuf
3345 std::ofstream* o = 0;
3346 if (gConfig().WriteOptionsReference()) {
3347 Log() << kINFO << "Print Help message for class " << GetName() << " into file: " << GetReferenceFile() << Endl;
3348 o = new std::ofstream( GetReferenceFile(), std::ios::app );
3349 if (!o->good()) { // file could not be opened --> Error
3350 Log() << kFATAL << "<PrintHelpMessage> Unable to append to output file: " << GetReferenceFile() << Endl;
3351 }
3352 std::cout.rdbuf( o->rdbuf() ); // redirect 'std::cout' to file
3353 }
3354
3355 // "|--------------------------------------------------------------|"
3356 if (!o) {
3357 Log() << kINFO << Endl;
3358 Log() << gTools().Color("bold")
3359 << "================================================================"
3360 << gTools().Color( "reset" )
3361 << Endl;
3362 Log() << gTools().Color("bold")
3363 << "H e l p f o r M V A m e t h o d [ " << GetName() << " ] :"
3364 << gTools().Color( "reset" )
3365 << Endl;
3366 }
3367 else {
3368 Log() << "Help for MVA method [ " << GetName() << " ] :" << Endl;
3369 }
3370
3371 // print method-specific help message
3372 GetHelpMessage();
3373
3374 if (!o) {
3375 Log() << Endl;
3376 Log() << "<Suppress this message by specifying \"!H\" in the booking option>" << Endl;
3377 Log() << gTools().Color("bold")
3378 << "================================================================"
3379 << gTools().Color( "reset" )
3380 << Endl;
3381 Log() << Endl;
3382 }
3383 else {
3384 // indicate END
3385 Log() << "# End of Message___" << Endl;
3386 }
3387
3388 std::cout.rdbuf( cout_sbuf ); // restore the original stream buffer
3389 if (o) o->close();
3390}
3391
3392// ----------------------- r o o t f i n d i n g ----------------------------
3393
3394////////////////////////////////////////////////////////////////////////////////
3395/// returns efficiency as function of cut
3396
3398{
3399 Double_t retval=0;
3400
3401 // retrieve the class object
3403 retval = fSplRefS->Eval( theCut );
3404 }
3405 else retval = fEffS->GetBinContent( fEffS->FindBin( theCut ) );
3406
3407 // caution: here we take some "forbidden" action to hide a problem:
3408 // in some cases, in particular for likelihood, the binned efficiency distributions
3409 // do not equal 1, at xmin, and 0 at xmax; of course, in principle we have the
3410 // unbinned information available in the trees, but the unbinned minimization is
3411 // too slow, and we don't need to do a precision measurement here. Hence, we force
3412 // this property.
3413 Double_t eps = 1.0e-5;
3414 if (theCut-fXmin < eps) retval = (GetCutOrientation() == kPositive) ? 1.0 : 0.0;
3415 else if (fXmax-theCut < eps) retval = (GetCutOrientation() == kPositive) ? 0.0 : 1.0;
3416
3417 return retval;
3418}
3419
3420////////////////////////////////////////////////////////////////////////////////
3421/// returns the event collection (i.e. the dataset) TRANSFORMED using the
3422/// classifiers specific Variable Transformation (e.g. Decorr or Decorr:Gauss:Decorr)
3423
3425{
3426 // if there's no variable transformation for this classifier, just hand back the
3427 // event collection of the data set
3428 if (GetTransformationHandler().GetTransformationList().GetEntries() <= 0) {
3429 return (Data()->GetEventCollection(type));
3430 }
3431
3432 // otherwise, transform ALL the events and hand back the vector of the pointers to the
3433 // transformed events. If the pointer is already != 0, i.e. the whole thing has been
3434 // done before, I don't need to do it again, but just "hand over" the pointer to those events.
3435 Int_t idx = Data()->TreeIndex(type); //index indicating Training,Testing,... events/datasets
3436 if (fEventCollections.at(idx) == 0) {
3437 fEventCollections.at(idx) = &(Data()->GetEventCollection(type));
3438 fEventCollections.at(idx) = GetTransformationHandler().CalcTransformations(*(fEventCollections.at(idx)),kTRUE);
3439 }
3440 return *(fEventCollections.at(idx));
3441}
3442
3443////////////////////////////////////////////////////////////////////////////////
3444/// calculates the TMVA version string from the training version code on the fly
3445
3447{
3448 UInt_t a = GetTrainingTMVAVersionCode() & 0xff0000; a>>=16;
3449 UInt_t b = GetTrainingTMVAVersionCode() & 0x00ff00; b>>=8;
3450 UInt_t c = GetTrainingTMVAVersionCode() & 0x0000ff;
3451
3452 return TString::Format("%i.%i.%i",a,b,c);
3453}
3454
3455////////////////////////////////////////////////////////////////////////////////
3456/// calculates the ROOT version string from the training version code on the fly
3457
3459{
3460 UInt_t a = GetTrainingROOTVersionCode() & 0xff0000; a>>=16;
3461 UInt_t b = GetTrainingROOTVersionCode() & 0x00ff00; b>>=8;
3462 UInt_t c = GetTrainingROOTVersionCode() & 0x0000ff;
3463
3464 return TString::Format("%i.%02i/%02i",a,b,c);
3465}
3466
3467////////////////////////////////////////////////////////////////////////////////
3468
3471 ( Data()->GetResults(GetMethodName(),Types::kTesting, Types::kClassification) );
3472
3473 if (mvaRes != NULL) {
3474 TH1D *mva_s = dynamic_cast<TH1D*> (mvaRes->GetHist("MVA_S"));
3475 TH1D *mva_b = dynamic_cast<TH1D*> (mvaRes->GetHist("MVA_B"));
3476 TH1D *mva_s_tr = dynamic_cast<TH1D*> (mvaRes->GetHist("MVA_TRAIN_S"));
3477 TH1D *mva_b_tr = dynamic_cast<TH1D*> (mvaRes->GetHist("MVA_TRAIN_B"));
3478
3479 if ( !mva_s || !mva_b || !mva_s_tr || !mva_b_tr) return -1;
3480
3481 if (SorB == 's' || SorB == 'S')
3482 return mva_s->KolmogorovTest( mva_s_tr, opt.Data() );
3483 else
3484 return mva_b->KolmogorovTest( mva_b_tr, opt.Data() );
3485 }
3486 return -1;
3487}
const Bool_t Use_Splines_for_Eff_
const Int_t NBIN_HIST_HIGH
#define d(i)
Definition RSha256.hxx:102
#define b(i)
Definition RSha256.hxx:100
#define c(i)
Definition RSha256.hxx:101
#define a(i)
Definition RSha256.hxx:99
#define s1(x)
Definition RSha256.hxx:91
#define ROOT_VERSION_CODE
Definition RVersion.hxx:24
bool Bool_t
Boolean (0=false, 1=true) (bool)
Definition RtypesCore.h:77
int Int_t
Signed integer 4 bytes (int)
Definition RtypesCore.h:59
char Char_t
Character 1 byte (char)
Definition RtypesCore.h:51
float Float_t
Float 4 bytes (float)
Definition RtypesCore.h:71
constexpr Bool_t kFALSE
Definition RtypesCore.h:108
double Double_t
Double 8 bytes.
Definition RtypesCore.h:73
long long Long64_t
Portable signed long integer 8 bytes.
Definition RtypesCore.h:83
constexpr Bool_t kTRUE
Definition RtypesCore.h:107
ROOT::Detail::TRangeCast< T, true > TRangeDynCast
TRangeDynCast is an adapter class that allows the typed iteration through a TCollection.
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void data
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void char Point_t Rectangle_t WindowAttributes_t Float_t r
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void char Point_t Rectangle_t WindowAttributes_t Float_t Float_t Float_t Int_t Int_t UInt_t UInt_t Rectangle_t result
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void char Point_t Rectangle_t WindowAttributes_t Float_t Float_t Float_t Int_t Int_t UInt_t UInt_t Rectangle_t Int_t Int_t Window_t TString Int_t GCValues_t GetPrimarySelectionOwner GetDisplay GetScreen GetColormap GetNativeEvent const char const char dpyName wid window const char font_name cursor keysym reg const char only_if_exist regb h Point_t winding char text const char depth char const char Int_t count const char ColorStruct_t color const char Pixmap_t Pixmap_t PictureAttributes_t attr const char char ret_data h unsigned char height h length
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void value
Option_t Option_t TPoint TPoint const char y2
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void char Point_t Rectangle_t WindowAttributes_t Float_t Float_t Float_t Int_t Int_t UInt_t UInt_t Rectangle_t Int_t Int_t Window_t TString Int_t GCValues_t GetPrimarySelectionOwner GetDisplay GetScreen GetColormap GetNativeEvent const char const char dpyName wid window const char font_name cursor keysym reg const char only_if_exist regb h Point_t winding char text const char depth char const char Int_t count const char ColorStruct_t color const char Pixmap_t Pixmap_t PictureAttributes_t attr const char char ret_data h unsigned char height h Atom_t Int_t ULong_t ULong_t unsigned char prop_list Atom_t Atom_t Atom_t Time_t type
Option_t Option_t TPoint TPoint const char y1
char name[80]
Definition TGX11.cxx:110
float xmin
float xmax
TMatrixT< Double_t > TMatrixD
Definition TMatrixDfwd.h:23
char * Form(const char *fmt,...)
Formats a string in a circular formatting buffer.
Definition TString.cxx:2495
R__EXTERN TSystem * gSystem
Definition TSystem.h:572
#define TMVA_VERSION_CODE
Definition Version.h:47
const_iterator begin() const
const_iterator end() const
Class to manage histogram axis.
Definition TAxis.h:32
Double_t GetXmax() const
Definition TAxis.h:142
Double_t GetXmin() const
Definition TAxis.h:141
This class stores the date and time with a precision of one second in an unsigned 32 bit word (950130...
Definition TDatime.h:37
Describe directory structure in memory.
Definition TDirectory.h:45
A ROOT file is an on-disk file, usually with extension .root, that stores objects in a file-system-li...
Definition TFile.h:130
static TFile * Open(const char *name, Option_t *option="", const char *ftitle="", Int_t compress=ROOT::RCompressionSetting::EDefaults::kUseCompiledDefault, Int_t netopt=0)
Create / open a file.
Definition TFile.cxx:3764
A TGraph is an object made of two arrays X and Y with npoints each.
Definition TGraph.h:41
1-D histogram with a double per channel (see TH1 documentation)
Definition TH1.h:926
1-D histogram with a float per channel (see TH1 documentation)
Definition TH1.h:878
TH1 is the base class of all histogram classes in ROOT.
Definition TH1.h:109
virtual Double_t GetMean(Int_t axis=1) const
For axis = 1,2 or 3 returns the mean value of the histogram along X,Y or Z axis.
Definition TH1.cxx:7586
static void AddDirectory(Bool_t add=kTRUE)
Sets the flag controlling the automatic add of histograms in memory.
Definition TH1.cxx:1263
virtual Int_t GetQuantiles(Int_t n, Double_t *xp, const Double_t *p=nullptr)
Compute Quantiles for this histogram.
Definition TH1.cxx:4608
static Bool_t AddDirectoryStatus()
Static function: cannot be inlined on Windows/NT.
Definition TH1.cxx:741
2-D histogram with a float per channel (see TH1 documentation)
Definition TH2.h:345
Int_t Fill(Double_t) override
Invalid Fill method.
Definition TH2.cxx:363
A doubly linked list.
Definition TList.h:38
Class that contains all the information of a class.
Definition ClassInfo.h:49
TString fWeightFileExtension
Definition Config.h:125
VariablePlotting & GetVariablePlotting()
Definition Config.h:97
class TMVA::Config::VariablePlotting fVariablePlotting
IONames & GetIONames()
Definition Config.h:98
MsgLogger * fLogger
! message logger
Class that contains all the data information.
Definition DataSetInfo.h:62
Class that contains all the data information.
Definition DataSet.h:58
static void SetIsTraining(Bool_t)
when this static function is called, it sets the flag whether events with negative event weight shoul...
Definition Event.cxx:399
static void SetIgnoreNegWeightsInTraining(Bool_t)
when this static function is called, it sets the flag whether events with negative event weight shoul...
Definition Event.cxx:408
Interface for all concrete MVA method implementations.
Definition IMethod.h:53
void Init(std::vector< TString > &graphTitles)
This function gets some title and it creates a TGraph for every title.
IPythonInteractive()
standard constructor
~IPythonInteractive()
standard destructor
void ClearGraphs()
This function sets the point number to 0 for all graphs.
void AddPoint(Double_t x, Double_t y1, Double_t y2)
This function is used only in 2 TGraph case, and it will add new data points to graphs.
Virtual base Class for all MVA method.
Definition MethodBase.h:111
TDirectory * MethodBaseDir() const
returns the ROOT directory where all instances of the corresponding MVA method are stored
virtual Double_t GetKSTrainingVsTest(Char_t SorB, TString opt="X")
MethodBase(const TString &jobName, Types::EMVA methodType, const TString &methodTitle, DataSetInfo &dsi, const TString &theOption="")
standard constructor
void PrintHelpMessage() const override
prints out method-specific help method
virtual std::vector< Float_t > GetAllMulticlassValues()
Get all multi-class values.
virtual Double_t GetSeparation(TH1 *, TH1 *) const
compute "separation" defined as
const char * GetName() const override
Definition MethodBase.h:337
void ReadClassesFromXML(void *clsnode)
read number of classes from XML
void SetWeightFileDir(TString fileDir)
set directory of weight file
void WriteStateToXML(void *parent) const
general method used in writing the header of the weight files where the used variables,...
void DeclareBaseOptions()
define the options (their key words) that can be set in the option string here the options valid for ...
virtual void TestRegression(Double_t &bias, Double_t &biasT, Double_t &dev, Double_t &devT, Double_t &rms, Double_t &rmsT, Double_t &mInf, Double_t &mInfT, Double_t &corr, Types::ETreeType type)
calculate <sum-of-deviation-squared> of regression output versus "true" value from test sample
virtual void DeclareCompatibilityOptions()
options that are used ONLY for the READER to ensure backward compatibility they are hence without any...
virtual Double_t GetSignificance() const
compute significance of mean difference
virtual Double_t GetProba(const Event *ev)
virtual TMatrixD GetMulticlassConfusionMatrix(Double_t effB, Types::ETreeType type)
Construct a confusion matrix for a multiclass classifier.
virtual void WriteEvaluationHistosToFile(Types::ETreeType treetype)
writes all MVA evaluation histograms to file
virtual void TestMulticlass()
test multiclass classification
const std::vector< TMVA::Event * > & GetEventCollection(Types::ETreeType type)
returns the event collection (i.e.
virtual std::vector< Double_t > GetDataMvaValues(DataSet *data=nullptr, Long64_t firstEvt=0, Long64_t lastEvt=-1, Bool_t logProgress=false)
get all the MVA values for the events of the given Data type
void SetupMethod()
setup of methods
TDirectory * BaseDir() const
returns the ROOT directory where info/histograms etc of the corresponding MVA method instance are sto...
virtual std::vector< Float_t > GetMulticlassEfficiency(std::vector< std::vector< Float_t > > &purity)
void AddInfoItem(void *gi, const TString &name, const TString &value) const
xml writing
virtual void AddClassifierOutputProb(Types::ETreeType type)
prepare tree branch with the method's discriminating variable
virtual Double_t GetEfficiency(const TString &, Types::ETreeType, Double_t &err)
fill background efficiency (resp.
TString GetTrainingTMVAVersionString() const
calculates the TMVA version string from the training version code on the fly
void Statistics(Types::ETreeType treeType, const TString &theVarName, Double_t &, Double_t &, Double_t &, Double_t &, Double_t &, Double_t &)
calculates rms,mean, xmin, xmax of the event variable this can be either done for the variables as th...
Bool_t GetLine(std::istream &fin, char *buf)
reads one line from the input stream checks for certain keywords and interprets the line if keywords ...
void ProcessSetup()
process all options the "CheckForUnusedOptions" is done in an independent call, since it may be overr...
virtual std::vector< Double_t > GetMvaValues(Long64_t firstEvt=0, Long64_t lastEvt=-1, Bool_t logProgress=false)
get all the MVA values for the events of the current Data type
virtual Bool_t IsSignalLike()
uses a pre-set cut on the MVA output (SetSignalReferenceCut and SetSignalReferenceCutOrientation) for...
virtual ~MethodBase()
destructor
void WriteMonitoringHistosToFile() const override
write special monitoring histograms to file dummy implementation here --------------—
virtual Double_t GetMaximumSignificance(Double_t SignalEvents, Double_t BackgroundEvents, Double_t &optimal_significance_value) const
plot significance, , curve for given number of signal and background events; returns cut for maximum ...
virtual Double_t GetTrainingEfficiency(const TString &)
void SetWeightFileName(TString)
set the weight file name (depreciated)
TString GetWeightFileName() const
retrieve weight file name
virtual void TestClassification()
initialization
void AddOutput(Types::ETreeType type, Types::EAnalysisType analysisType)
virtual void AddRegressionOutput(Types::ETreeType type)
prepare tree branch with the method's discriminating variable
void InitBase()
default initialization called by all constructors
virtual void GetRegressionDeviation(UInt_t tgtNum, Types::ETreeType type, Double_t &stddev, Double_t &stddev90Percent) const
void ReadStateFromXMLString(const char *xmlstr)
for reading from memory
void MakeClass(const TString &classFileName=TString("")) const override
create reader class for method (classification only at present)
void CreateMVAPdfs()
Create PDFs of the MVA output variables.
TString GetTrainingROOTVersionString() const
calculates the ROOT version string from the training version code on the fly
virtual Double_t GetValueForRoot(Double_t)
returns efficiency as function of cut
void ReadStateFromFile()
Function to write options and weights to file.
void WriteVarsToStream(std::ostream &tf, const TString &prefix="") const
write the list of variables (name, min, max) for a given data transformation method to the stream
void ReadVarsFromStream(std::istream &istr)
Read the variables (name, min, max) for a given data transformation method from the stream.
void ReadSpectatorsFromXML(void *specnode)
read spectator info from XML
void SetTestvarName(const TString &v="")
Definition MethodBase.h:344
void ReadVariablesFromXML(void *varnode)
read variable info from XML
virtual std::map< TString, Double_t > OptimizeTuningParameters(TString fomType="ROCIntegral", TString fitType="FitGA")
call the Optimizer with the set of parameters and ranges that are meant to be tuned.
virtual std::vector< Float_t > GetMulticlassTrainingEfficiency(std::vector< std::vector< Float_t > > &purity)
void WriteStateToStream(std::ostream &tf) const
general method used in writing the header of the weight files where the used variables,...
virtual Double_t GetRarity(Double_t mvaVal, Types::ESBType reftype=Types::kBackground) const
compute rarity:
virtual void SetTuneParameters(std::map< TString, Double_t > tuneParameters)
set the tuning parameters according to the argument This is just a dummy .
void ReadStateFromStream(std::istream &tf)
read the header from the weight files of the different MVA methods
void AddVarsXMLTo(void *parent) const
write variable info to XML
Double_t GetMvaValue(Double_t *errLower=nullptr, Double_t *errUpper=nullptr) override=0
void AddTargetsXMLTo(void *parent) const
write target info to XML
void ReadTargetsFromXML(void *tarnode)
read target info from XML
void ProcessBaseOptions()
the option string is decoded, for available options see "DeclareOptions"
void ReadStateFromXML(void *parent)
virtual std::vector< Float_t > GetAllRegressionValues()
Get al regression values in one call.
void NoErrorCalc(Double_t *const err, Double_t *const errUpper)
void WriteStateToFile() const
write options and weights to file note that each one text file for the main configuration information...
void AddClassesXMLTo(void *parent) const
write class info to XML
virtual void AddClassifierOutput(Types::ETreeType type)
prepare tree branch with the method's discriminating variable
void AddSpectatorsXMLTo(void *parent) const
write spectator info to XML
virtual Double_t GetROCIntegral(TH1D *histS, TH1D *histB) const
calculate the area (integral) under the ROC curve as a overall quality measure of the classification
virtual void AddMulticlassOutput(Types::ETreeType type)
prepare tree branch with the method's discriminating variable
virtual void CheckSetup()
check may be overridden by derived class (sometimes, eg, fitters are used which can only be implement...
void SetSource(const std::string &source)
Definition MsgLogger.h:68
PDF wrapper for histograms; uses user-defined spline interpolation.
Definition PDF.h:63
@ kSpline3
Definition PDF.h:70
@ kSpline2
Definition PDF.h:70
Class that is the base-class for a vector of result.
Class which takes the results of a multiclass classification.
Class that is the base-class for a vector of result.
Class that is the base-class for a vector of result.
Definition Results.h:57
Root finding using Brents algorithm (translated from CERNLIB function RZERO)
Definition RootFinder.h:48
Linear interpolation of TGraph.
Definition TSpline1.h:43
Timing information for training and evaluation of MVA methods.
Definition Timer.h:58
void ComputeStat(const std::vector< TMVA::Event * > &, std::vector< Float_t > *, Double_t &, Double_t &, Double_t &, Double_t &, Double_t &, Double_t &, Int_t signalClass, Bool_t norm=kFALSE)
sanity check
Definition Tools.cxx:203
TList * ParseFormatLine(TString theString, const char *sep=":")
Parse the string and cut into labels separated by ":".
Definition Tools.cxx:376
Double_t GetSeparation(TH1 *S, TH1 *B) const
compute "separation" defined as
Definition Tools.cxx:122
Double_t GetMutualInformation(const TH2F &)
Mutual Information method for non-linear correlations estimates in 2D histogram Author: Moritz Backes...
Definition Tools.cxx:564
const TString & Color(const TString &)
human readable color strings
Definition Tools.cxx:803
TXMLEngine & xmlengine()
Definition Tools.h:262
Bool_t CheckSplines(const TH1 *, const TSpline *)
check quality of splining by comparing splines and histograms in each bin
Definition Tools.cxx:454
void ReadAttr(void *node, const char *, T &value)
read attribute from xml
Definition Tools.h:329
void * GetChild(void *parent, const char *childname=nullptr)
get child node
Definition Tools.cxx:1125
void AddAttr(void *node, const char *, const T &value, Int_t precision=16)
add attribute to xml
Definition Tools.h:347
Double_t NormHist(TH1 *theHist, Double_t norm=1.0)
normalises histogram
Definition Tools.cxx:358
void * AddChild(void *parent, const char *childname, const char *content=nullptr, bool isRootNode=false)
add child node
Definition Tools.cxx:1099
void * GetNextChild(void *prevchild, const char *childname=nullptr)
XML helpers.
Definition Tools.cxx:1137
Singleton class for Global types used by TMVA.
Definition Types.h:71
@ kSignal
Never change this number - it is elsewhere assumed to be zero !
Definition Types.h:135
@ kBackground
Definition Types.h:136
@ kLikelihood
Definition Types.h:79
@ kHMatrix
Definition Types.h:81
@ kMulticlass
Definition Types.h:129
@ kNoAnalysisType
Definition Types.h:130
@ kClassification
Definition Types.h:127
@ kMaxAnalysisType
Definition Types.h:131
@ kRegression
Definition Types.h:128
@ kTraining
Definition Types.h:143
Linear interpolation class.
Gaussian Transformation of input variables.
Class for type info of MVA input variable.
Linear interpolation class.
Linear interpolation class.
A TMultiGraph is a collection of TGraph (or derived) objects.
Definition TMultiGraph.h:34
Collectable string class.
Definition TObjString.h:28
Basic string class.
Definition TString.h:138
void ToLower()
Change string to lower-case.
Definition TString.cxx:1189
Int_t Atoi() const
Return integer value of string.
Definition TString.cxx:1994
TSubString Strip(EStripType s=kTrailing, char c=' ') const
Return a substring of self stripped at beginning and/or end.
Definition TString.cxx:1170
const char * Data() const
Definition TString.h:384
@ kLeading
Definition TString.h:284
static TString Format(const char *fmt,...)
Static method which formats a string using a printf style format descriptor and return a TString.
Definition TString.cxx:2384
Ssiz_t Index(const char *pat, Ssiz_t i=0, ECaseCompare cmp=kExact) const
Definition TString.h:660
virtual const char * GetBuildNode() const
Return the build node name.
Definition TSystem.cxx:3937
virtual int mkdir(const char *name, Bool_t recursive=kFALSE)
Make a file system directory.
Definition TSystem.cxx:916
virtual const char * WorkingDirectory()
Return working directory.
Definition TSystem.cxx:881
virtual UserGroup_t * GetUserInfo(Int_t uid)
Returns all user info in the UserGroup_t structure.
Definition TSystem.cxx:1612
void SaveDoc(XMLDocPointer_t xmldoc, const char *filename, Int_t layout=1)
store document content to file if layout<=0, no any spaces or newlines will be placed between xmlnode...
void FreeDoc(XMLDocPointer_t xmldoc)
frees allocated document data and deletes document itself
XMLNodePointer_t DocGetRootElement(XMLDocPointer_t xmldoc)
returns root node of document
XMLDocPointer_t NewDoc(const char *version="1.0")
creates new xml document with provided version
XMLDocPointer_t ParseFile(const char *filename, Int_t maxbuf=100000)
Parses content of file and tries to produce xml structures.
XMLDocPointer_t ParseString(const char *xmlstring)
parses content of string and tries to produce xml structures
void DocSetRootElement(XMLDocPointer_t xmldoc, XMLNodePointer_t xmlnode)
set main (root) node for document
TLine * line
Double_t x[n]
Definition legend1.C:17
TH1F * h1
Definition legend1.C:5
Config & gConfig()
Tools & gTools()
void CreateVariableTransforms(const TString &trafoDefinition, TMVA::DataSetInfo &dataInfo, TMVA::TransformationHandler &transformationHandler, TMVA::MsgLogger &log)
MsgLogger & Endl(MsgLogger &ml)
Definition MsgLogger.h:148
Short_t Max(Short_t a, Short_t b)
Returns the largest of a and b.
Definition TMathBase.h:249
Double_t Sqrt(Double_t x)
Returns the square root of x.
Definition TMath.h:673
Short_t Min(Short_t a, Short_t b)
Returns the smallest of a and b.
Definition TMathBase.h:197
Short_t Abs(Short_t d)
Returns the absolute value of parameter Short_t d.
Definition TMathBase.h:122
static void output()