Logo ROOT  
Reference Guide
DataLoader.cxx
Go to the documentation of this file.
1// @(#)root/tmva $Id$
2// Author: Omar Zapata
3// Mentors: Lorenzo Moneta, Sergei Gleyzer
4//NOTE: Based on TMVA::Factory
5
6/**********************************************************************************
7 * Project: TMVA - a Root-integrated toolkit for multivariate data analysis *
8 * Package: TMVA *
9 * Class : DataLoader *
10 * Web : http://tmva.sourceforge.net *
11 * *
12 * Description: *
13 * This is a class to load datasets into every booked method *
14 * *
15 * Authors (alphabetical): *
16 * Lorenzo Moneta <Lorenzo.Moneta@cern.ch> - CERN, Switzerland *
17 * Omar Zapata <Omar.Zapata@cern.ch> - ITM/UdeA, Colombia *
18 * Sergei Gleyzer<sergei.gleyzer@cern.ch> - CERN, Switzerland *
19 * *
20 * Copyright (c) 2005-2015: *
21 * CERN, Switzerland *
22 * ITM/UdeA, Colombia *
23 * *
24 * Redistribution and use in source and binary forms, with or without *
25 * modification, are permitted according to the terms listed in LICENSE *
26 * (http://tmva.sourceforge.net/LICENSE) *
27 **********************************************************************************/
28
29
30/*! \class TMVA::DataLoader
31\ingroup TMVA
32
33*/
34
35#include "TFile.h"
36#include "TTree.h"
37#include "TH2.h"
38#include "TMath.h"
39#include "TMatrixD.h"
40
41#include "TMVA/DataLoader.h"
42#include "TMVA/Config.h"
43#include "TMVA/CvSplit.h"
44#include "TMVA/Tools.h"
45#include "TMVA/IMethod.h"
46#include "TMVA/MethodBase.h"
48#include "TMVA/DataSetManager.h"
49#include "TMVA/DataSetInfo.h"
50#include "TMVA/MethodBoost.h"
51#include "TMVA/MethodCategory.h"
52
53#include "TMVA/VariableInfo.h"
60
62
63
64////////////////////////////////////////////////////////////////////////////////
65
67: Configurable( ),
68 fDataSetManager ( NULL ), //DSMTEST
69 fDataInputHandler ( new DataInputHandler ),
70 fTransformations ( "I" ),
71 fVerbose ( kFALSE ),
72 fDataAssignType ( kAssignEvents ),
73 fATreeEvent (0)
74{
76 SetName(thedlName.Data());
77 fLogger->SetSource("DataLoader");
78}
79
80////////////////////////////////////////////////////////////////////////////////
81
83{
84 // destructor
85
86 std::vector<TMVA::VariableTransformBase*>::iterator trfIt = fDefaultTrfs.begin();
87 for (;trfIt != fDefaultTrfs.end(); ++trfIt) delete (*trfIt);
88
89 delete fDataInputHandler;
90
91 // destroy singletons
92 // DataSetManager::DestroyInstance(); // DSMTEST replaced by following line
93 delete fDataSetManager; // DSMTEST
94
95 // problem with call of REGISTER_METHOD macro ...
96 // ClassifierDataLoader::DestroyInstance();
97 // Types::DestroyInstance();
98 //Tools::DestroyInstance();
99 //Config::DestroyInstance();
100}
101
102
103////////////////////////////////////////////////////////////////////////////////
104
106{
107 return fDataSetManager->AddDataSetInfo(dsi); // DSMTEST
108}
109
110////////////////////////////////////////////////////////////////////////////////
111
113{
114 DataSetInfo* dsi = fDataSetManager->GetDataSetInfo(dsiName); // DSMTEST
115
116 if (dsi!=0) return *dsi;
117
118 return fDataSetManager->AddDataSetInfo(*(new DataSetInfo(dsiName))); // DSMTEST
119}
120
121////////////////////////////////////////////////////////////////////////////////
122
124{
125 return DefaultDataSetInfo(); // DSMTEST
126}
127
128////////////////////////////////////////////////////////////////////////////////
129/// Transforms the variables and return a new DataLoader with the transformed
130/// variables
131
133{
134 TString trOptions = "0";
135 TString trName = "None";
136 if (trafoDefinition.Contains("(")) {
137
138 // contains transformation parameters
139 Ssiz_t parStart = trafoDefinition.Index( "(" );
140 Ssiz_t parLen = trafoDefinition.Index( ")", parStart )-parStart+1;
141
142 trName = trafoDefinition(0,parStart);
143 trOptions = trafoDefinition(parStart,parLen);
144 trOptions.Remove(parLen-1,1);
145 trOptions.Remove(0,1);
146 }
147 else
148 trName = trafoDefinition;
149
150 VarTransformHandler* handler = new VarTransformHandler(this);
151 // variance threshold variable transformation
152 if (trName == "VT") {
153
154 // find threshold value from given input
155 Double_t threshold = 0.0;
156 if (!trOptions.IsFloat()){
157 Log() << kFATAL << " VT transformation must be passed a floating threshold value" << Endl;
158 delete handler;
159 return this;
160 }
161 else
162 threshold = trOptions.Atof();
163 TMVA::DataLoader *transformedLoader = handler->VarianceThreshold(threshold);
164 delete handler;
165 return transformedLoader;
166 }
167 else {
168 Log() << kFATAL << "Incorrect transformation string provided, please check" << Endl;
169 }
170 Log() << kINFO << "No transformation applied, returning original loader" << Endl;
171 return this;
172}
173
174////////////////////////////////////////////////////////////////////////////////
175// the next functions are to assign events directly
176
177////////////////////////////////////////////////////////////////////////////////
178/// create the data assignment tree (for event-wise data assignment by user)
179
181{
182 TTree * assignTree = new TTree( name, name );
183 assignTree->SetDirectory(0);
184 assignTree->Branch( "type", &fATreeType, "ATreeType/I" );
185 assignTree->Branch( "weight", &fATreeWeight, "ATreeWeight/F" );
186
187 std::vector<VariableInfo>& vars = DefaultDataSetInfo().GetVariableInfos();
188 std::vector<VariableInfo>& tgts = DefaultDataSetInfo().GetTargetInfos();
189 std::vector<VariableInfo>& spec = DefaultDataSetInfo().GetSpectatorInfos();
190
191 if (fATreeEvent.size()==0) fATreeEvent.resize(vars.size()+tgts.size()+spec.size());
192 // add variables
193 for (UInt_t ivar=0; ivar<vars.size(); ivar++) {
194 TString vname = vars[ivar].GetExpression();
195 assignTree->Branch( vname, &fATreeEvent[ivar], vname + "/F" );
196 }
197 // add targets
198 for (UInt_t itgt=0; itgt<tgts.size(); itgt++) {
199 TString vname = tgts[itgt].GetExpression();
200 assignTree->Branch( vname, &fATreeEvent[vars.size()+itgt], vname + "/F" );
201 }
202 // add spectators
203 for (UInt_t ispc=0; ispc<spec.size(); ispc++) {
204 TString vname = spec[ispc].GetExpression();
205 assignTree->Branch( vname, &fATreeEvent[vars.size()+tgts.size()+ispc], vname + "/F" );
206 }
207 return assignTree;
208}
209
210////////////////////////////////////////////////////////////////////////////////
211/// add signal training event
212
213void TMVA::DataLoader::AddSignalTrainingEvent( const std::vector<Double_t>& event, Double_t weight )
214{
215 AddEvent( "Signal", Types::kTraining, event, weight );
216}
217
218////////////////////////////////////////////////////////////////////////////////
219/// add signal testing event
220
221void TMVA::DataLoader::AddSignalTestEvent( const std::vector<Double_t>& event, Double_t weight )
222{
223 AddEvent( "Signal", Types::kTesting, event, weight );
224}
225
226////////////////////////////////////////////////////////////////////////////////
227/// add signal training event
228
229void TMVA::DataLoader::AddBackgroundTrainingEvent( const std::vector<Double_t>& event, Double_t weight )
230{
231 AddEvent( "Background", Types::kTraining, event, weight );
232}
233
234////////////////////////////////////////////////////////////////////////////////
235/// add signal training event
236
237void TMVA::DataLoader::AddBackgroundTestEvent( const std::vector<Double_t>& event, Double_t weight )
238{
239 AddEvent( "Background", Types::kTesting, event, weight );
240}
241
242////////////////////////////////////////////////////////////////////////////////
243/// add signal training event
244
245void TMVA::DataLoader::AddTrainingEvent( const TString& className, const std::vector<Double_t>& event, Double_t weight )
246{
247 AddEvent( className, Types::kTraining, event, weight );
248}
249
250////////////////////////////////////////////////////////////////////////////////
251/// add signal test event
252
253void TMVA::DataLoader::AddTestEvent( const TString& className, const std::vector<Double_t>& event, Double_t weight )
254{
255 AddEvent( className, Types::kTesting, event, weight );
256}
257
258////////////////////////////////////////////////////////////////////////////////
259/// add event
260/// vector event : the order of values is: variables + targets + spectators
261
263 const std::vector<Double_t>& event, Double_t weight )
264{
265 ClassInfo* theClass = DefaultDataSetInfo().AddClass(className); // returns class (creates it if necessary)
266 UInt_t clIndex = theClass->GetNumber();
267
268
269 // set analysistype to "kMulticlass" if more than two classes and analysistype == kNoAnalysisType
270 if( fAnalysisType == Types::kNoAnalysisType && DefaultDataSetInfo().GetNClasses() > 2 )
271 fAnalysisType = Types::kMulticlass;
272
273
274 if (clIndex>=fTrainAssignTree.size()) {
275 fTrainAssignTree.resize(clIndex+1, 0);
276 fTestAssignTree.resize(clIndex+1, 0);
277 }
278
279 if (fTrainAssignTree[clIndex]==0) { // does not exist yet
280 fTrainAssignTree[clIndex] = CreateEventAssignTrees( Form("TrainAssignTree_%s", className.Data()) );
281 fTestAssignTree[clIndex] = CreateEventAssignTrees( Form("TestAssignTree_%s", className.Data()) );
282 }
283
284 fATreeType = clIndex;
285 fATreeWeight = weight;
286 for (UInt_t ivar=0; ivar<event.size(); ivar++) fATreeEvent[ivar] = event[ivar];
287
288 if(tt==Types::kTraining) fTrainAssignTree[clIndex]->Fill();
289 else fTestAssignTree[clIndex]->Fill();
290
291}
292
293////////////////////////////////////////////////////////////////////////////////
294///
295
297{
298 return fTrainAssignTree[clIndex]!=0;
299}
300
301////////////////////////////////////////////////////////////////////////////////
302/// assign event-wise local trees to data set
303
305{
306 UInt_t size = fTrainAssignTree.size();
307 for(UInt_t i=0; i<size; i++) {
308 if(!UserAssignEvents(i)) continue;
309 const TString& className = DefaultDataSetInfo().GetClassInfo(i)->GetName();
310 SetWeightExpression( "weight", className );
311 AddTree(fTrainAssignTree[i], className, 1.0, TCut(""), Types::kTraining );
312 AddTree(fTestAssignTree[i], className, 1.0, TCut(""), Types::kTesting );
313 }
314}
315
316////////////////////////////////////////////////////////////////////////////////
317/// number of signal events (used to compute significance)
318
319void TMVA::DataLoader::AddTree( TTree* tree, const TString& className, Double_t weight,
320 const TCut& cut, const TString& treetype )
321{
323 TString tmpTreeType = treetype; tmpTreeType.ToLower();
324 if (tmpTreeType.Contains( "train" ) && tmpTreeType.Contains( "test" )) tt = Types::kMaxTreeType;
325 else if (tmpTreeType.Contains( "train" )) tt = Types::kTraining;
326 else if (tmpTreeType.Contains( "test" )) tt = Types::kTesting;
327 else {
328 Log() << kFATAL << "<AddTree> cannot interpret tree type: \"" << treetype
329 << "\" should be \"Training\" or \"Test\" or \"Training and Testing\"" << Endl;
330 }
331 AddTree( tree, className, weight, cut, tt );
332}
333
334////////////////////////////////////////////////////////////////////////////////
335
336void TMVA::DataLoader::AddTree( TTree* tree, const TString& className, Double_t weight,
337 const TCut& cut, Types::ETreeType tt )
338{
339 if(!tree)
340 Log() << kFATAL << "Tree does not exist (empty pointer)." << Endl;
341
342 DefaultDataSetInfo().AddClass( className );
343
344 // set analysistype to "kMulticlass" if more than two classes and analysistype == kNoAnalysisType
345 if( fAnalysisType == Types::kNoAnalysisType && DefaultDataSetInfo().GetNClasses() > 2 )
346 fAnalysisType = Types::kMulticlass;
347
348 Log() << kINFO<< "Add Tree " << tree->GetName() << " of type " << className
349 << " with " << tree->GetEntries() << " events" << Endl;
350 DataInput().AddTree( tree, className, weight, cut, tt );
351}
352
353////////////////////////////////////////////////////////////////////////////////
354/// number of signal events (used to compute significance)
355
357{
358 AddTree( signal, "Signal", weight, TCut(""), treetype );
359}
360
361////////////////////////////////////////////////////////////////////////////////
362/// add signal tree from text file
363
365{
366 // create trees from these ascii files
367 TTree* signalTree = new TTree( "TreeS", "Tree (S)" );
368 signalTree->ReadFile( datFileS );
369
370 Log() << kINFO << "Create TTree objects from ASCII input files ... \n- Signal file : \""
371 << datFileS << Endl;
372
373 // number of signal events (used to compute significance)
374 AddTree( signalTree, "Signal", weight, TCut(""), treetype );
375}
376
377////////////////////////////////////////////////////////////////////////////////
378
379void TMVA::DataLoader::AddSignalTree( TTree* signal, Double_t weight, const TString& treetype )
380{
381 AddTree( signal, "Signal", weight, TCut(""), treetype );
382}
383
384////////////////////////////////////////////////////////////////////////////////
385/// number of signal events (used to compute significance)
386
388{
389 AddTree( signal, "Background", weight, TCut(""), treetype );
390}
391
392////////////////////////////////////////////////////////////////////////////////
393/// add background tree from text file
394
396{
397 // create trees from these ascii files
398 TTree* bkgTree = new TTree( "TreeB", "Tree (B)" );
399 bkgTree->ReadFile( datFileB );
400
401 Log() << kINFO << "Create TTree objects from ASCII input files ... \n- Background file : \""
402 << datFileB << Endl;
403
404 // number of signal events (used to compute significance)
405 AddTree( bkgTree, "Background", weight, TCut(""), treetype );
406}
407
408////////////////////////////////////////////////////////////////////////////////
409
410void TMVA::DataLoader::AddBackgroundTree( TTree* signal, Double_t weight, const TString& treetype )
411{
412 AddTree( signal, "Background", weight, TCut(""), treetype );
413}
414
415////////////////////////////////////////////////////////////////////////////////
416
418{
419 AddTree( tree, "Signal", weight );
420}
421
422////////////////////////////////////////////////////////////////////////////////
423
425{
426 AddTree( tree, "Background", weight );
427}
428
429////////////////////////////////////////////////////////////////////////////////
430/// set background tree
431
432void TMVA::DataLoader::SetTree( TTree* tree, const TString& className, Double_t weight )
433{
434 AddTree( tree, className, weight, TCut(""), Types::kMaxTreeType );
435}
436
437////////////////////////////////////////////////////////////////////////////////
438/// define the input trees for signal and background; no cuts are applied
439
440void TMVA::DataLoader::SetInputTrees( TTree* signal, TTree* background,
441 Double_t signalWeight, Double_t backgroundWeight )
442{
443 AddTree( signal, "Signal", signalWeight, TCut(""), Types::kMaxTreeType );
444 AddTree( background, "Background", backgroundWeight, TCut(""), Types::kMaxTreeType );
445}
446
447////////////////////////////////////////////////////////////////////////////////
448
449void TMVA::DataLoader::SetInputTrees( const TString& datFileS, const TString& datFileB,
450 Double_t signalWeight, Double_t backgroundWeight )
451{
452 DataInput().AddTree( datFileS, "Signal", signalWeight );
453 DataInput().AddTree( datFileB, "Background", backgroundWeight );
454}
455
456////////////////////////////////////////////////////////////////////////////////
457/// define the input trees for signal and background from single input tree,
458/// containing both signal and background events distinguished by the type
459/// identifiers: SigCut and BgCut
460
461void TMVA::DataLoader::SetInputTrees( TTree* inputTree, const TCut& SigCut, const TCut& BgCut )
462{
463 AddTree( inputTree, "Signal", 1.0, SigCut, Types::kMaxTreeType );
464 AddTree( inputTree, "Background", 1.0, BgCut , Types::kMaxTreeType );
465}
466
467////////////////////////////////////////////////////////////////////////////////
468/// user inserts discriminating variable in data set info
469
470void TMVA::DataLoader::AddVariable( const TString& expression, const TString& title, const TString& unit,
471 char type, Double_t min, Double_t max )
472{
473 DefaultDataSetInfo().AddVariable( expression, title, unit, min, max, type );
474}
475
476////////////////////////////////////////////////////////////////////////////////
477/// user inserts discriminating variable in data set info
478
479void TMVA::DataLoader::AddVariable( const TString& expression, char type,
480 Double_t min, Double_t max )
481{
482 DefaultDataSetInfo().AddVariable( expression, "", "", min, max, type );
483}
484
485////////////////////////////////////////////////////////////////////////////////
486/// user inserts discriminating array of variables in data set info
487/// in case input tree provides an array of values
488
489void TMVA::DataLoader::AddVariablesArray(const TString &expression, int size, char type,
490 Double_t min, Double_t max)
491{
492 DefaultDataSetInfo().AddVariablesArray(expression, size, "", "", min, max, type);
493}
494////////////////////////////////////////////////////////////////////////////////
495/// user inserts target in data set info
496
497void TMVA::DataLoader::AddTarget( const TString& expression, const TString& title, const TString& unit,
498 Double_t min, Double_t max )
499{
500 if( fAnalysisType == Types::kNoAnalysisType )
501 fAnalysisType = Types::kRegression;
502
503 DefaultDataSetInfo().AddTarget( expression, title, unit, min, max );
504}
505
506////////////////////////////////////////////////////////////////////////////////
507/// user inserts target in data set info
508
509void TMVA::DataLoader::AddSpectator( const TString& expression, const TString& title, const TString& unit,
510 Double_t min, Double_t max )
511{
512 DefaultDataSetInfo().AddSpectator( expression, title, unit, min, max );
513}
514
515////////////////////////////////////////////////////////////////////////////////
516/// default creation
517
519{
520 return AddDataSet( fName );
521}
522
523////////////////////////////////////////////////////////////////////////////////
524/// fill input variables in data set
525
526void TMVA::DataLoader::SetInputVariables( std::vector<TString>* theVariables )
527{
528 for (std::vector<TString>::iterator it=theVariables->begin();
529 it!=theVariables->end(); ++it) AddVariable(*it);
530}
531
532////////////////////////////////////////////////////////////////////////////////
533
535{
536 DefaultDataSetInfo().SetWeightExpression(variable, "Signal");
537}
538
539////////////////////////////////////////////////////////////////////////////////
540
542{
543 DefaultDataSetInfo().SetWeightExpression(variable, "Background");
544}
545
546////////////////////////////////////////////////////////////////////////////////
547
548void TMVA::DataLoader::SetWeightExpression( const TString& variable, const TString& className )
549{
550 //Log() << kWarning << DefaultDataSetInfo().GetNClasses() /*fClasses.size()*/ << Endl;
551 if (className=="") {
552 SetSignalWeightExpression(variable);
553 SetBackgroundWeightExpression(variable);
554 }
555 else DefaultDataSetInfo().SetWeightExpression( variable, className );
556}
557
558////////////////////////////////////////////////////////////////////////////////
559
560void TMVA::DataLoader::SetCut( const TString& cut, const TString& className ) {
561 SetCut( TCut(cut), className );
562}
563
564////////////////////////////////////////////////////////////////////////////////
565
566void TMVA::DataLoader::SetCut( const TCut& cut, const TString& className )
567{
568 DefaultDataSetInfo().SetCut( cut, className );
569}
570
571////////////////////////////////////////////////////////////////////////////////
572
573void TMVA::DataLoader::AddCut( const TString& cut, const TString& className )
574{
575 AddCut( TCut(cut), className );
576}
577
578////////////////////////////////////////////////////////////////////////////////
579void TMVA::DataLoader::AddCut( const TCut& cut, const TString& className )
580{
581 DefaultDataSetInfo().AddCut( cut, className );
582}
583
584////////////////////////////////////////////////////////////////////////////////
585/// prepare the training and test trees
586
588 Int_t NsigTrain, Int_t NbkgTrain, Int_t NsigTest, Int_t NbkgTest,
589 const TString& otherOpt )
590{
591 SetInputTreesFromEventAssignTrees();
592
593 AddCut( cut );
594
595 DefaultDataSetInfo().SetSplitOptions( Form("nTrain_Signal=%i:nTrain_Background=%i:nTest_Signal=%i:nTest_Background=%i:%s",
596 NsigTrain, NbkgTrain, NsigTest, NbkgTest, otherOpt.Data()) );
597}
598
599////////////////////////////////////////////////////////////////////////////////
600/// prepare the training and test trees
601/// kept for backward compatibility
602
604{
605 SetInputTreesFromEventAssignTrees();
606
607 AddCut( cut );
608
609 DefaultDataSetInfo().SetSplitOptions( Form("nTrain_Signal=%i:nTrain_Background=%i:nTest_Signal=%i:nTest_Background=%i:SplitMode=Random:EqualTrainSample:!V",
610 Ntrain, Ntrain, Ntest, Ntest) );
611}
612
613////////////////////////////////////////////////////////////////////////////////
614/// prepare the training and test trees
615/// -> same cuts for signal and background
616
618{
619 SetInputTreesFromEventAssignTrees();
620
621 DefaultDataSetInfo().PrintClasses();
622 AddCut( cut );
623 DefaultDataSetInfo().SetSplitOptions( opt );
624}
625
626////////////////////////////////////////////////////////////////////////////////
627/// prepare the training and test trees
628
629void TMVA::DataLoader::PrepareTrainingAndTestTree( TCut sigcut, TCut bkgcut, const TString& splitOpt )
630{
631 // if event-wise data assignment, add local trees to dataset first
632 SetInputTreesFromEventAssignTrees();
633
634 //Log() << kINFO <<"Preparing trees for training and testing..."<< Endl;
635 AddCut( sigcut, "Signal" );
636 AddCut( bkgcut, "Background" );
637
638 DefaultDataSetInfo().SetSplitOptions( splitOpt );
639}
640
641////////////////////////////////////////////////////////////////////////////////
642/// Function required to split the training and testing datasets into a
643/// number of folds. Required by the CrossValidation and HyperParameterOptimisation
644/// classes. The option to split the training dataset into a training set and
645/// a validation set is implemented but not currently used.
646
648{
649 s.MakeKFoldDataSet( DefaultDataSetInfo() );
650}
651
652////////////////////////////////////////////////////////////////////////////////
653/// Function for assigning the correct folds to the testing or training set.
654
656{
657 s.PrepareFoldDataSet( DefaultDataSetInfo(), foldNumber, tt );
658}
659
660
661////////////////////////////////////////////////////////////////////////////////
662/// Recombines the dataset. The precise semantics depend on the actual split.
663///
664/// Similar to the inverse operation of `MakeKFoldDataSet` but _will_ differ.
665/// See documentation for each particular split for more information.
666///
667
669{
670 s.RecombineKFoldDataSet( DefaultDataSetInfo(), tt );
671}
672
673////////////////////////////////////////////////////////////////////////////////
674/// Copy method use in VI and CV
675
677{
679 DataLoaderCopy(des,this);
680 return des;
681}
682
683////////////////////////////////////////////////////////////////////////////////
684///Loading Dataset from DataInputHandler for subseed
685
687{
688 for( std::vector<TreeInfo>::const_iterator treeinfo=src->DataInput().Sbegin();treeinfo!=src->DataInput().Send();++treeinfo)
689 {
690 des->AddSignalTree( (*treeinfo).GetTree(), (*treeinfo).GetWeight(),(*treeinfo).GetTreeType());
691 }
692
693 for( std::vector<TreeInfo>::const_iterator treeinfo=src->DataInput().Bbegin();treeinfo!=src->DataInput().Bend();++treeinfo)
694 {
695 des->AddBackgroundTree( (*treeinfo).GetTree(), (*treeinfo).GetWeight(),(*treeinfo).GetTreeType());
696 }
697}
698
699////////////////////////////////////////////////////////////////////////////////
700/// returns the correlation matrix of datasets
701
703{
704 const TMatrixD * m = DefaultDataSetInfo().CorrelationMatrix(className);
705 return DefaultDataSetInfo().CreateCorrelationMatrixHist(m,
706 "CorrelationMatrix"+className, "Correlation Matrix ("+className+")");
707}
int Int_t
Definition: RtypesCore.h:41
int Ssiz_t
Definition: RtypesCore.h:63
unsigned int UInt_t
Definition: RtypesCore.h:42
const Bool_t kFALSE
Definition: RtypesCore.h:88
bool Bool_t
Definition: RtypesCore.h:59
double Double_t
Definition: RtypesCore.h:55
#define ClassImp(name)
Definition: Rtypes.h:365
char name[80]
Definition: TGX11.cxx:109
int type
Definition: TGX11.cxx:120
char * Form(const char *fmt,...)
A specialized string object used for TTree selections.
Definition: TCut.h:25
Service class for 2-Dim histogram classes.
Definition: TH2.h:30
Class that contains all the information of a class.
Definition: ClassInfo.h:49
UInt_t GetNumber() const
Definition: ClassInfo.h:65
MsgLogger * fLogger
Definition: Configurable.h:128
Class that contains all the data information.
std::vector< TreeInfo >::const_iterator Send() const
std::vector< TreeInfo >::const_iterator Sbegin() const
std::vector< TreeInfo >::const_iterator Bbegin() const
std::vector< TreeInfo >::const_iterator Bend() const
DataInputHandler * fDataInputHandler
Definition: DataLoader.h:191
TTree * CreateEventAssignTrees(const TString &name)
create the data assignment tree (for event-wise data assignment by user)
Definition: DataLoader.cxx:180
void AddVariablesArray(const TString &expression, int size, char type='F', Double_t min=0, Double_t max=0)
user inserts discriminating array of variables in data set info in case input tree provides an array ...
Definition: DataLoader.cxx:489
void SetBackgroundTree(TTree *background, Double_t weight=1.0)
Definition: DataLoader.cxx:424
void AddSignalTree(TTree *signal, Double_t weight=1.0, Types::ETreeType treetype=Types::kMaxTreeType)
number of signal events (used to compute significance)
Definition: DataLoader.cxx:356
DataSetInfo & AddDataSet(DataSetInfo &)
Definition: DataLoader.cxx:105
void AddSpectator(const TString &expression, const TString &title="", const TString &unit="", Double_t min=0, Double_t max=0)
user inserts target in data set info
Definition: DataLoader.cxx:509
void SetInputTreesFromEventAssignTrees()
assign event-wise local trees to data set
Definition: DataLoader.cxx:304
void AddTrainingEvent(const TString &className, const std::vector< Double_t > &event, Double_t weight)
add signal training event
Definition: DataLoader.cxx:245
void SetTree(TTree *tree, const TString &className, Double_t weight)
set background tree
Definition: DataLoader.cxx:432
void AddSignalTestEvent(const std::vector< Double_t > &event, Double_t weight=1.0)
add signal testing event
Definition: DataLoader.cxx:221
DataSetInfo & DefaultDataSetInfo()
default creation
Definition: DataLoader.cxx:518
void AddBackgroundTestEvent(const std::vector< Double_t > &event, Double_t weight=1.0)
add signal training event
Definition: DataLoader.cxx:237
DataSetManager * fDataSetManager
Definition: DataLoader.h:188
DataLoader * MakeCopy(TString name)
Copy method use in VI and CV.
Definition: DataLoader.cxx:676
void SetSignalWeightExpression(const TString &variable)
Definition: DataLoader.cxx:534
void MakeKFoldDataSet(CvSplit &s)
Function required to split the training and testing datasets into a number of folds.
Definition: DataLoader.cxx:647
void SetWeightExpression(const TString &variable, const TString &className="")
Definition: DataLoader.cxx:548
void AddBackgroundTrainingEvent(const std::vector< Double_t > &event, Double_t weight=1.0)
add signal training event
Definition: DataLoader.cxx:229
void RecombineKFoldDataSet(CvSplit &s, Types::ETreeType tt=Types::kTraining)
Recombines the dataset.
Definition: DataLoader.cxx:668
DataLoader * VarTransform(TString trafoDefinition)
Transforms the variables and return a new DataLoader with the transformed variables.
Definition: DataLoader.cxx:132
void SetBackgroundWeightExpression(const TString &variable)
Definition: DataLoader.cxx:541
void AddCut(const TString &cut, const TString &className="")
Definition: DataLoader.cxx:573
void AddEvent(const TString &className, Types::ETreeType tt, const std::vector< Double_t > &event, Double_t weight)
add event vector event : the order of values is: variables + targets + spectators
Definition: DataLoader.cxx:262
DataLoader(TString thedlName="default")
Definition: DataLoader.cxx:66
void PrepareTrainingAndTestTree(const TCut &cut, const TString &splitOpt)
prepare the training and test trees -> same cuts for signal and background
Definition: DataLoader.cxx:617
DataInputHandler & DataInput()
Definition: DataLoader.h:174
void AddBackgroundTree(TTree *background, Double_t weight=1.0, Types::ETreeType treetype=Types::kMaxTreeType)
number of signal events (used to compute significance)
Definition: DataLoader.cxx:387
DataSetInfo & GetDataSetInfo()
Definition: DataLoader.cxx:123
void AddTarget(const TString &expression, const TString &title="", const TString &unit="", Double_t min=0, Double_t max=0)
user inserts target in data set info
Definition: DataLoader.cxx:497
TH2 * GetCorrelationMatrix(const TString &className)
returns the correlation matrix of datasets
Definition: DataLoader.cxx:702
Bool_t UserAssignEvents(UInt_t clIndex)
Definition: DataLoader.cxx:296
void AddSignalTrainingEvent(const std::vector< Double_t > &event, Double_t weight=1.0)
add signal training event
Definition: DataLoader.cxx:213
void AddTestEvent(const TString &className, const std::vector< Double_t > &event, Double_t weight)
add signal test event
Definition: DataLoader.cxx:253
void SetSignalTree(TTree *signal, Double_t weight=1.0)
Definition: DataLoader.cxx:417
void SetInputTrees(const TString &signalFileName, const TString &backgroundFileName, Double_t signalWeight=1.0, Double_t backgroundWeight=1.0)
Definition: DataLoader.cxx:449
virtual ~DataLoader()
Definition: DataLoader.cxx:82
void AddTree(TTree *tree, const TString &className, Double_t weight=1.0, const TCut &cut="", Types::ETreeType tt=Types::kMaxTreeType)
Definition: DataLoader.cxx:336
void SetInputVariables(std::vector< TString > *theVariables)
fill input variables in data set
Definition: DataLoader.cxx:526
void SetCut(const TString &cut, const TString &className="")
Definition: DataLoader.cxx:560
void AddVariable(const TString &expression, const TString &title, const TString &unit, char type='F', Double_t min=0, Double_t max=0)
user inserts discriminating variable in data set info
Definition: DataLoader.cxx:470
void PrepareFoldDataSet(CvSplit &s, UInt_t foldNumber, Types::ETreeType tt=Types::kTraining)
Function for assigning the correct folds to the testing or training set.
Definition: DataLoader.cxx:655
Class that contains all the data information.
Definition: DataSetInfo.h:60
Class that contains all the data information.
void SetSource(const std::string &source)
Definition: MsgLogger.h:70
@ kMulticlass
Definition: Types.h:130
@ kNoAnalysisType
Definition: Types.h:131
@ kRegression
Definition: Types.h:129
@ kMaxTreeType
Definition: Types.h:146
@ kTraining
Definition: Types.h:144
@ kTesting
Definition: Types.h:145
TMVA::DataLoader * VarianceThreshold(Double_t threshold)
Computes variance of all the variables and returns a new DataLoader with the selected variables whose...
virtual void SetName(const char *name)
Set the name of the TNamed.
Definition: TNamed.cxx:140
Basic string class.
Definition: TString.h:131
void ToLower()
Change string to lower-case.
Definition: TString.cxx:1125
Double_t Atof() const
Return floating-point value contained in string.
Definition: TString.cxx:1987
Bool_t IsFloat() const
Returns kTRUE if string contains a floating point or integer number.
Definition: TString.cxx:1791
const char * Data() const
Definition: TString.h:364
TString & Remove(Ssiz_t pos)
Definition: TString.h:668
Bool_t Contains(const char *pat, ECaseCompare cmp=kExact) const
Definition: TString.h:619
Ssiz_t Index(const char *pat, Ssiz_t i=0, ECaseCompare cmp=kExact) const
Definition: TString.h:634
A TTree represents a columnar dataset.
Definition: TTree.h:72
virtual void SetDirectory(TDirectory *dir)
Change the tree's directory.
Definition: TTree.cxx:8703
TBranch * Branch(const char *name, T *obj, Int_t bufsize=32000, Int_t splitlevel=99)
Add a new branch, and infer the data type from the type of obj being passed.
Definition: TTree.h:341
virtual Long64_t ReadFile(const char *filename, const char *branchDescriptor="", char delimiter=' ')
Create or simply read branches from filename.
Definition: TTree.cxx:7379
static constexpr double s
void DataLoaderCopy(TMVA::DataLoader *des, TMVA::DataLoader *src)
MsgLogger & Endl(MsgLogger &ml)
Definition: MsgLogger.h:158
Double_t Log(Double_t x)
Definition: TMath.h:750
Definition: tree.py:1
auto * m
Definition: textangle.C:8
auto * tt
Definition: textangle.C:16