Logo ROOT  
Reference Guide
DataLoader.cxx
Go to the documentation of this file.
1// @(#)root/tmva $Id$
2// Author: Omar Zapata
3// Mentors: Lorenzo Moneta, Sergei Gleyzer
4//NOTE: Based on TMVA::Factory
5
6/**********************************************************************************
7 * Project: TMVA - a Root-integrated toolkit for multivariate data analysis *
8 * Package: TMVA *
9 * Class : DataLoader *
10 * Web : http://tmva.sourceforge.net *
11 * *
12 * Description: *
13 * This is a class to load datasets into every booked method *
14 * *
15 * Authors (alphabetical): *
16 * Lorenzo Moneta <Lorenzo.Moneta@cern.ch> - CERN, Switzerland *
17 * Omar Zapata <Omar.Zapata@cern.ch> - ITM/UdeA, Colombia *
18 * Sergei Gleyzer<sergei.gleyzer@cern.ch> - CERN, Switzerland *
19 * *
20 * Copyright (c) 2005-2015: *
21 * CERN, Switzerland *
22 * ITM/UdeA, Colombia *
23 * *
24 * Redistribution and use in source and binary forms, with or without *
25 * modification, are permitted according to the terms listed in LICENSE *
26 * (http://tmva.sourceforge.net/LICENSE) *
27 **********************************************************************************/
28
29
30/*! \class TMVA::DataLoader
31\ingroup TMVA
32
33*/
34
35#include "TTree.h"
36#include "TH2.h"
37#include "TMatrixD.h"
38
39#include "TMVA/DataLoader.h"
40#include "TMVA/Config.h"
41#include "TMVA/CvSplit.h"
42#include "TMVA/Tools.h"
43#include "TMVA/IMethod.h"
44#include "TMVA/MethodBase.h"
46#include "TMVA/DataSetManager.h"
47#include "TMVA/DataSetInfo.h"
48#include "TMVA/MethodBoost.h"
49#include "TMVA/MethodCategory.h"
50
51#include "TMVA/VariableInfo.h"
58
60
61////////////////////////////////////////////////////////////////////////////////
62/*** Create a data loader
63 \param[in] thedlName name of DataLoader object. This name will be used as the
64 top directory name where the training results
65 (weights, i.e .XML and .C files) will be stored.
66 The results will be stored by default in the `theDlName/weights`
67 directory and relative to the current directory. If the directory is not existing,
68 a new one will be created automatically.
69 For using a different location (i.e. a different path to the current directory) one
70 can set an absolute path location in `TMVA::gConfig()::GetIONames().fWeightFileDirPrefix`
71 For example, by setting
72~~~~~~~~~~~~~~~{.cpp}
73 TMVA::gConfig()::GetIONames().fWeightFileDirPrefix = "/tmp";
74 TMVA::gConfig()::GetIONames().fWeightFileDir = "myTrainigResults";
75~~~~~~~~~~~~~~~
76 The training results will be stored in the `/tmp/thedlName/myTrainingResults`
77 directory.
78**/
79
81: Configurable( ),
82 fDataSetManager ( NULL ), //DSMTEST
83 fDataInputHandler ( new DataInputHandler ),
84 fTransformations ( "I" ),
85 fVerbose ( kFALSE ),
86 fDataAssignType ( kAssignEvents ),
87 fATreeEvent (0)
88{
90 SetName(thedlName.Data());
91 fLogger->SetSource("DataLoader");
92}
93
94////////////////////////////////////////////////////////////////////////////////
95
97{
98 // destructor
99
100 std::vector<TMVA::VariableTransformBase*>::iterator trfIt = fDefaultTrfs.begin();
101 for (;trfIt != fDefaultTrfs.end(); ++trfIt) delete (*trfIt);
102
103 delete fDataInputHandler;
104
105 // destroy singletons
106 // DataSetManager::DestroyInstance(); // DSMTEST replaced by following line
107 delete fDataSetManager; // DSMTEST
108
109 // problem with call of REGISTER_METHOD macro ...
110 // ClassifierDataLoader::DestroyInstance();
111 // Types::DestroyInstance();
112 //Tools::DestroyInstance();
113 //Config::DestroyInstance();
114}
115
116
117////////////////////////////////////////////////////////////////////////////////
118
120{
121 return fDataSetManager->AddDataSetInfo(dsi); // DSMTEST
122}
123
124////////////////////////////////////////////////////////////////////////////////
125
127{
128 DataSetInfo* dsi = fDataSetManager->GetDataSetInfo(dsiName); // DSMTEST
129
130 if (dsi!=0) return *dsi;
131
132 return fDataSetManager->AddDataSetInfo(*(new DataSetInfo(dsiName))); // DSMTEST
133}
134
135////////////////////////////////////////////////////////////////////////////////
136
138{
139 return DefaultDataSetInfo(); // DSMTEST
140}
141
142////////////////////////////////////////////////////////////////////////////////
143/// Transforms the variables and return a new DataLoader with the transformed
144/// variables
145
147{
148 TString trOptions = "0";
149 TString trName = "None";
150 if (trafoDefinition.Contains("(")) {
151
152 // contains transformation parameters
153 Ssiz_t parStart = trafoDefinition.Index( "(" );
154 Ssiz_t parLen = trafoDefinition.Index( ")", parStart )-parStart+1;
155
156 trName = trafoDefinition(0,parStart);
157 trOptions = trafoDefinition(parStart,parLen);
158 trOptions.Remove(parLen-1,1);
159 trOptions.Remove(0,1);
160 }
161 else
162 trName = trafoDefinition;
163
164 VarTransformHandler* handler = new VarTransformHandler(this);
165 // variance threshold variable transformation
166 if (trName == "VT") {
167
168 // find threshold value from given input
169 Double_t threshold = 0.0;
170 if (!trOptions.IsFloat()){
171 Log() << kFATAL << " VT transformation must be passed a floating threshold value" << Endl;
172 delete handler;
173 return this;
174 }
175 else
176 threshold = trOptions.Atof();
177 TMVA::DataLoader *transformedLoader = handler->VarianceThreshold(threshold);
178 delete handler;
179 return transformedLoader;
180 }
181 else {
182 delete handler;
183 Log() << kFATAL << "Incorrect transformation string provided, please check" << Endl;
184 }
185 Log() << kINFO << "No transformation applied, returning original loader" << Endl;
186 return this;
187}
188
189////////////////////////////////////////////////////////////////////////////////
190// the next functions are to assign events directly
191
192////////////////////////////////////////////////////////////////////////////////
193/// create the data assignment tree (for event-wise data assignment by user)
194
196{
197 TTree * assignTree = new TTree( name, name );
198 assignTree->SetDirectory(0);
199 assignTree->Branch( "type", &fATreeType, "ATreeType/I" );
200 assignTree->Branch( "weight", &fATreeWeight, "ATreeWeight/F" );
201
202 std::vector<VariableInfo>& vars = DefaultDataSetInfo().GetVariableInfos();
203 std::vector<VariableInfo>& tgts = DefaultDataSetInfo().GetTargetInfos();
204 std::vector<VariableInfo>& spec = DefaultDataSetInfo().GetSpectatorInfos();
205
206 if (fATreeEvent.size()==0) fATreeEvent.resize(vars.size()+tgts.size()+spec.size());
207 // add variables
208 for (UInt_t ivar=0; ivar<vars.size(); ivar++) {
209 TString vname = vars[ivar].GetExpression();
210 assignTree->Branch( vname, &fATreeEvent[ivar], vname + "/F" );
211 }
212 // add targets
213 for (UInt_t itgt=0; itgt<tgts.size(); itgt++) {
214 TString vname = tgts[itgt].GetExpression();
215 assignTree->Branch( vname, &fATreeEvent[vars.size()+itgt], vname + "/F" );
216 }
217 // add spectators
218 for (UInt_t ispc=0; ispc<spec.size(); ispc++) {
219 TString vname = spec[ispc].GetExpression();
220 assignTree->Branch( vname, &fATreeEvent[vars.size()+tgts.size()+ispc], vname + "/F" );
221 }
222 return assignTree;
223}
224
225////////////////////////////////////////////////////////////////////////////////
226/// add signal training event
227
228void TMVA::DataLoader::AddSignalTrainingEvent( const std::vector<Double_t>& event, Double_t weight )
229{
230 AddEvent( "Signal", Types::kTraining, event, weight );
231}
232
233////////////////////////////////////////////////////////////////////////////////
234/// add signal testing event
235
236void TMVA::DataLoader::AddSignalTestEvent( const std::vector<Double_t>& event, Double_t weight )
237{
238 AddEvent( "Signal", Types::kTesting, event, weight );
239}
240
241////////////////////////////////////////////////////////////////////////////////
242/// add signal training event
243
244void TMVA::DataLoader::AddBackgroundTrainingEvent( const std::vector<Double_t>& event, Double_t weight )
245{
246 AddEvent( "Background", Types::kTraining, event, weight );
247}
248
249////////////////////////////////////////////////////////////////////////////////
250/// add signal training event
251
252void TMVA::DataLoader::AddBackgroundTestEvent( const std::vector<Double_t>& event, Double_t weight )
253{
254 AddEvent( "Background", Types::kTesting, event, weight );
255}
256
257////////////////////////////////////////////////////////////////////////////////
258/// add signal training event
259
260void TMVA::DataLoader::AddTrainingEvent( const TString& className, const std::vector<Double_t>& event, Double_t weight )
261{
262 AddEvent( className, Types::kTraining, event, weight );
263}
264
265////////////////////////////////////////////////////////////////////////////////
266/// add signal test event
267
268void TMVA::DataLoader::AddTestEvent( const TString& className, const std::vector<Double_t>& event, Double_t weight )
269{
270 AddEvent( className, Types::kTesting, event, weight );
271}
272
273////////////////////////////////////////////////////////////////////////////////
274/// add event
275/// vector event : the order of values is: variables + targets + spectators
276
278 const std::vector<Double_t>& event, Double_t weight )
279{
280 ClassInfo* theClass = DefaultDataSetInfo().AddClass(className); // returns class (creates it if necessary)
281 UInt_t clIndex = theClass->GetNumber();
282
283
284 // set analysistype to "kMulticlass" if more than two classes and analysistype == kNoAnalysisType
285 if( fAnalysisType == Types::kNoAnalysisType && DefaultDataSetInfo().GetNClasses() > 2 )
286 fAnalysisType = Types::kMulticlass;
287
288
289 if (clIndex>=fTrainAssignTree.size()) {
290 fTrainAssignTree.resize(clIndex+1, 0);
291 fTestAssignTree.resize(clIndex+1, 0);
292 }
293
294 if (fTrainAssignTree[clIndex]==0) { // does not exist yet
295 fTrainAssignTree[clIndex] = CreateEventAssignTrees( Form("TrainAssignTree_%s", className.Data()) );
296 fTestAssignTree[clIndex] = CreateEventAssignTrees( Form("TestAssignTree_%s", className.Data()) );
297 }
298
299 fATreeType = clIndex;
300 fATreeWeight = weight;
301 for (UInt_t ivar=0; ivar<event.size(); ivar++) fATreeEvent[ivar] = event[ivar];
302
303 if(tt==Types::kTraining) fTrainAssignTree[clIndex]->Fill();
304 else fTestAssignTree[clIndex]->Fill();
305
306}
307
308////////////////////////////////////////////////////////////////////////////////
309///
310
312{
313 return fTrainAssignTree[clIndex]!=0;
314}
315
316////////////////////////////////////////////////////////////////////////////////
317/// assign event-wise local trees to data set
318
320{
321 UInt_t size = fTrainAssignTree.size();
322 for(UInt_t i=0; i<size; i++) {
323 if(!UserAssignEvents(i)) continue;
324 const TString& className = DefaultDataSetInfo().GetClassInfo(i)->GetName();
325 SetWeightExpression( "weight", className );
326 AddTree(fTrainAssignTree[i], className, 1.0, TCut(""), Types::kTraining );
327 AddTree(fTestAssignTree[i], className, 1.0, TCut(""), Types::kTesting );
328 }
329}
330
331////////////////////////////////////////////////////////////////////////////////
332/// number of signal events (used to compute significance)
333
334void TMVA::DataLoader::AddTree( TTree* tree, const TString& className, Double_t weight,
335 const TCut& cut, const TString& treetype )
336{
338 TString tmpTreeType = treetype; tmpTreeType.ToLower();
339 if (tmpTreeType.Contains( "train" ) && tmpTreeType.Contains( "test" )) tt = Types::kMaxTreeType;
340 else if (tmpTreeType.Contains( "train" )) tt = Types::kTraining;
341 else if (tmpTreeType.Contains( "test" )) tt = Types::kTesting;
342 else {
343 Log() << kFATAL << "<AddTree> cannot interpret tree type: \"" << treetype
344 << "\" should be \"Training\" or \"Test\" or \"Training and Testing\"" << Endl;
345 }
346 AddTree( tree, className, weight, cut, tt );
347}
348
349////////////////////////////////////////////////////////////////////////////////
350
351void TMVA::DataLoader::AddTree( TTree* tree, const TString& className, Double_t weight,
352 const TCut& cut, Types::ETreeType tt )
353{
354 if(!tree)
355 Log() << kFATAL << "Tree does not exist (empty pointer)." << Endl;
356
357 DefaultDataSetInfo().AddClass( className );
358
359 // set analysistype to "kMulticlass" if more than two classes and analysistype == kNoAnalysisType
360 if( fAnalysisType == Types::kNoAnalysisType && DefaultDataSetInfo().GetNClasses() > 2 )
361 fAnalysisType = Types::kMulticlass;
362
363 Log() << kINFO<< "Add Tree " << tree->GetName() << " of type " << className
364 << " with " << tree->GetEntries() << " events" << Endl;
365 DataInput().AddTree( tree, className, weight, cut, tt );
366}
367
368////////////////////////////////////////////////////////////////////////////////
369/// number of signal events (used to compute significance)
370
372{
373 AddTree( signal, "Signal", weight, TCut(""), treetype );
374}
375
376////////////////////////////////////////////////////////////////////////////////
377/// add signal tree from text file
378
380{
381 // create trees from these ascii files
382 TTree* signalTree = new TTree( "TreeS", "Tree (S)" );
383 signalTree->ReadFile( datFileS );
384
385 Log() << kINFO << "Create TTree objects from ASCII input files ... \n- Signal file : \""
386 << datFileS << Endl;
387
388 // number of signal events (used to compute significance)
389 AddTree( signalTree, "Signal", weight, TCut(""), treetype );
390}
391
392////////////////////////////////////////////////////////////////////////////////
393
394void TMVA::DataLoader::AddSignalTree( TTree* signal, Double_t weight, const TString& treetype )
395{
396 AddTree( signal, "Signal", weight, TCut(""), treetype );
397}
398
399////////////////////////////////////////////////////////////////////////////////
400/// number of signal events (used to compute significance)
401
403{
404 AddTree( signal, "Background", weight, TCut(""), treetype );
405}
406
407////////////////////////////////////////////////////////////////////////////////
408/// add background tree from text file
409
411{
412 // create trees from these ascii files
413 TTree* bkgTree = new TTree( "TreeB", "Tree (B)" );
414 bkgTree->ReadFile( datFileB );
415
416 Log() << kINFO << "Create TTree objects from ASCII input files ... \n- Background file : \""
417 << datFileB << Endl;
418
419 // number of signal events (used to compute significance)
420 AddTree( bkgTree, "Background", weight, TCut(""), treetype );
421}
422
423////////////////////////////////////////////////////////////////////////////////
424
425void TMVA::DataLoader::AddBackgroundTree( TTree* signal, Double_t weight, const TString& treetype )
426{
427 AddTree( signal, "Background", weight, TCut(""), treetype );
428}
429
430////////////////////////////////////////////////////////////////////////////////
431
433{
434 AddTree( tree, "Signal", weight );
435}
436
437////////////////////////////////////////////////////////////////////////////////
438
440{
441 AddTree( tree, "Background", weight );
442}
443
444////////////////////////////////////////////////////////////////////////////////
445/// set background tree
446
447void TMVA::DataLoader::SetTree( TTree* tree, const TString& className, Double_t weight )
448{
449 AddTree( tree, className, weight, TCut(""), Types::kMaxTreeType );
450}
451
452////////////////////////////////////////////////////////////////////////////////
453/// define the input trees for signal and background; no cuts are applied
454
455void TMVA::DataLoader::SetInputTrees( TTree* signal, TTree* background,
456 Double_t signalWeight, Double_t backgroundWeight )
457{
458 AddTree( signal, "Signal", signalWeight, TCut(""), Types::kMaxTreeType );
459 AddTree( background, "Background", backgroundWeight, TCut(""), Types::kMaxTreeType );
460}
461
462////////////////////////////////////////////////////////////////////////////////
463
464void TMVA::DataLoader::SetInputTrees( const TString& datFileS, const TString& datFileB,
465 Double_t signalWeight, Double_t backgroundWeight )
466{
467 DataInput().AddTree( datFileS, "Signal", signalWeight );
468 DataInput().AddTree( datFileB, "Background", backgroundWeight );
469}
470
471////////////////////////////////////////////////////////////////////////////////
472/// define the input trees for signal and background from single input tree,
473/// containing both signal and background events distinguished by the type
474/// identifiers: SigCut and BgCut
475
476void TMVA::DataLoader::SetInputTrees( TTree* inputTree, const TCut& SigCut, const TCut& BgCut )
477{
478 AddTree( inputTree, "Signal", 1.0, SigCut, Types::kMaxTreeType );
479 AddTree( inputTree, "Background", 1.0, BgCut , Types::kMaxTreeType );
480}
481
482////////////////////////////////////////////////////////////////////////////////
483/// user inserts discriminating variable in data set info
484
485void TMVA::DataLoader::AddVariable( const TString& expression, const TString& title, const TString& unit,
486 char type, Double_t min, Double_t max )
487{
488 DefaultDataSetInfo().AddVariable( expression, title, unit, min, max, type );
489}
490
491////////////////////////////////////////////////////////////////////////////////
492/// user inserts discriminating variable in data set info
493
494void TMVA::DataLoader::AddVariable( const TString& expression, char type,
495 Double_t min, Double_t max )
496{
497 DefaultDataSetInfo().AddVariable( expression, "", "", min, max, type );
498}
499
500////////////////////////////////////////////////////////////////////////////////
501/// user inserts discriminating array of variables in data set info
502/// in case input tree provides an array of values
503
504void TMVA::DataLoader::AddVariablesArray(const TString &expression, int size, char type,
505 Double_t min, Double_t max)
506{
507 DefaultDataSetInfo().AddVariablesArray(expression, size, "", "", min, max, type);
508}
509////////////////////////////////////////////////////////////////////////////////
510/// user inserts target in data set info
511
512void TMVA::DataLoader::AddTarget( const TString& expression, const TString& title, const TString& unit,
513 Double_t min, Double_t max )
514{
515 if( fAnalysisType == Types::kNoAnalysisType )
516 fAnalysisType = Types::kRegression;
517
518 DefaultDataSetInfo().AddTarget( expression, title, unit, min, max );
519}
520
521////////////////////////////////////////////////////////////////////////////////
522/// user inserts target in data set info
523
524void TMVA::DataLoader::AddSpectator( const TString& expression, const TString& title, const TString& unit,
525 Double_t min, Double_t max )
526{
527 DefaultDataSetInfo().AddSpectator( expression, title, unit, min, max );
528}
529
530////////////////////////////////////////////////////////////////////////////////
531/// default creation
532
534{
535 return AddDataSet( fName );
536}
537
538////////////////////////////////////////////////////////////////////////////////
539/// fill input variables in data set
540
541void TMVA::DataLoader::SetInputVariables( std::vector<TString>* theVariables )
542{
543 for (std::vector<TString>::iterator it=theVariables->begin();
544 it!=theVariables->end(); ++it) AddVariable(*it);
545}
546
547////////////////////////////////////////////////////////////////////////////////
548
550{
551 DefaultDataSetInfo().SetWeightExpression(variable, "Signal");
552}
553
554////////////////////////////////////////////////////////////////////////////////
555
557{
558 DefaultDataSetInfo().SetWeightExpression(variable, "Background");
559}
560
561////////////////////////////////////////////////////////////////////////////////
562
563void TMVA::DataLoader::SetWeightExpression( const TString& variable, const TString& className )
564{
565 //Log() << kWarning << DefaultDataSetInfo().GetNClasses() /*fClasses.size()*/ << Endl;
566 if (className=="") {
567 SetSignalWeightExpression(variable);
568 SetBackgroundWeightExpression(variable);
569 }
570 else DefaultDataSetInfo().SetWeightExpression( variable, className );
571}
572
573////////////////////////////////////////////////////////////////////////////////
574
575void TMVA::DataLoader::SetCut( const TString& cut, const TString& className ) {
576 SetCut( TCut(cut), className );
577}
578
579////////////////////////////////////////////////////////////////////////////////
580
581void TMVA::DataLoader::SetCut( const TCut& cut, const TString& className )
582{
583 DefaultDataSetInfo().SetCut( cut, className );
584}
585
586////////////////////////////////////////////////////////////////////////////////
587
588void TMVA::DataLoader::AddCut( const TString& cut, const TString& className )
589{
590 AddCut( TCut(cut), className );
591}
592
593////////////////////////////////////////////////////////////////////////////////
594void TMVA::DataLoader::AddCut( const TCut& cut, const TString& className )
595{
596 DefaultDataSetInfo().AddCut( cut, className );
597}
598
599////////////////////////////////////////////////////////////////////////////////
600/// prepare the training and test trees
601
603 Int_t NsigTrain, Int_t NbkgTrain, Int_t NsigTest, Int_t NbkgTest,
604 const TString& otherOpt )
605{
606 SetInputTreesFromEventAssignTrees();
607
608 AddCut( cut );
609
610 DefaultDataSetInfo().SetSplitOptions( Form("nTrain_Signal=%i:nTrain_Background=%i:nTest_Signal=%i:nTest_Background=%i:%s",
611 NsigTrain, NbkgTrain, NsigTest, NbkgTest, otherOpt.Data()) );
612}
613
614////////////////////////////////////////////////////////////////////////////////
615/// prepare the training and test trees
616/// kept for backward compatibility
617
619{
620 SetInputTreesFromEventAssignTrees();
621
622 AddCut( cut );
623
624 DefaultDataSetInfo().SetSplitOptions( Form("nTrain_Signal=%i:nTrain_Background=%i:nTest_Signal=%i:nTest_Background=%i:SplitMode=Random:EqualTrainSample:!V",
625 Ntrain, Ntrain, Ntest, Ntest) );
626}
627
628////////////////////////////////////////////////////////////////////////////////
629/// prepare the training and test trees
630/// -> same cuts for signal and background
631
633{
634 SetInputTreesFromEventAssignTrees();
635
636 DefaultDataSetInfo().PrintClasses();
637 AddCut( cut );
638 DefaultDataSetInfo().SetSplitOptions( opt );
639}
640
641////////////////////////////////////////////////////////////////////////////////
642/// prepare the training and test trees
643
644void TMVA::DataLoader::PrepareTrainingAndTestTree( TCut sigcut, TCut bkgcut, const TString& splitOpt )
645{
646 // if event-wise data assignment, add local trees to dataset first
647 SetInputTreesFromEventAssignTrees();
648
649 //Log() << kINFO <<"Preparing trees for training and testing..."<< Endl;
650 AddCut( sigcut, "Signal" );
651 AddCut( bkgcut, "Background" );
652
653 DefaultDataSetInfo().SetSplitOptions( splitOpt );
654}
655
656////////////////////////////////////////////////////////////////////////////////
657/// Function required to split the training and testing datasets into a
658/// number of folds. Required by the CrossValidation and HyperParameterOptimisation
659/// classes. The option to split the training dataset into a training set and
660/// a validation set is implemented but not currently used.
661
663{
664 s.MakeKFoldDataSet( DefaultDataSetInfo() );
665}
666
667////////////////////////////////////////////////////////////////////////////////
668/// Function for assigning the correct folds to the testing or training set.
669
671{
672 s.PrepareFoldDataSet( DefaultDataSetInfo(), foldNumber, tt );
673}
674
675
676////////////////////////////////////////////////////////////////////////////////
677/// Recombines the dataset. The precise semantics depend on the actual split.
678///
679/// Similar to the inverse operation of `MakeKFoldDataSet` but _will_ differ.
680/// See documentation for each particular split for more information.
681///
682
684{
685 s.RecombineKFoldDataSet( DefaultDataSetInfo(), tt );
686}
687
688////////////////////////////////////////////////////////////////////////////////
689/// Copy method use in VI and CV
690
692{
694 DataLoaderCopy(des,this);
695 return des;
696}
697
698////////////////////////////////////////////////////////////////////////////////
699///Loading Dataset from DataInputHandler for subseed
700
702{
703 for( std::vector<TreeInfo>::const_iterator treeinfo=src->DataInput().Sbegin();treeinfo!=src->DataInput().Send();++treeinfo)
704 {
705 des->AddSignalTree( (*treeinfo).GetTree(), (*treeinfo).GetWeight(),(*treeinfo).GetTreeType());
706 }
707
708 for( std::vector<TreeInfo>::const_iterator treeinfo=src->DataInput().Bbegin();treeinfo!=src->DataInput().Bend();++treeinfo)
709 {
710 des->AddBackgroundTree( (*treeinfo).GetTree(), (*treeinfo).GetWeight(),(*treeinfo).GetTreeType());
711 }
712}
713
714////////////////////////////////////////////////////////////////////////////////
715/// returns the correlation matrix of datasets
716
718{
719 const TMatrixD * m = DefaultDataSetInfo().CorrelationMatrix(className);
720 return DefaultDataSetInfo().CreateCorrelationMatrixHist(m,
721 "CorrelationMatrix"+className, "Correlation Matrix ("+className+")");
722}
int Int_t
Definition: CPyCppyy.h:43
unsigned int UInt_t
Definition: CPyCppyy.h:44
size_t size(const MatrixT &matrix)
retrieve the size of a square matrix
int Ssiz_t
Definition: RtypesCore.h:67
const Bool_t kFALSE
Definition: RtypesCore.h:101
bool Bool_t
Definition: RtypesCore.h:63
double Double_t
Definition: RtypesCore.h:59
#define ClassImp(name)
Definition: Rtypes.h:364
int type
Definition: TGX11.cxx:121
char * Form(const char *fmt,...)
A specialized string object used for TTree selections.
Definition: TCut.h:25
Service class for 2-D histogram classes.
Definition: TH2.h:30
Class that contains all the information of a class.
Definition: ClassInfo.h:49
UInt_t GetNumber() const
Definition: ClassInfo.h:65
MsgLogger * fLogger
Definition: Configurable.h:128
Class that contains all the data information.
std::vector< TreeInfo >::const_iterator Send() const
std::vector< TreeInfo >::const_iterator Bbegin() const
std::vector< TreeInfo >::const_iterator Sbegin() const
std::vector< TreeInfo >::const_iterator Bend() const
DataInputHandler * fDataInputHandler
Definition: DataLoader.h:189
TTree * CreateEventAssignTrees(const TString &name)
create the data assignment tree (for event-wise data assignment by user)
Definition: DataLoader.cxx:195
void AddVariablesArray(const TString &expression, int size, char type='F', Double_t min=0, Double_t max=0)
user inserts discriminating array of variables in data set info in case input tree provides an array ...
Definition: DataLoader.cxx:504
void SetBackgroundTree(TTree *background, Double_t weight=1.0)
Definition: DataLoader.cxx:439
void AddSignalTree(TTree *signal, Double_t weight=1.0, Types::ETreeType treetype=Types::kMaxTreeType)
number of signal events (used to compute significance)
Definition: DataLoader.cxx:371
DataSetInfo & AddDataSet(DataSetInfo &)
Definition: DataLoader.cxx:119
void AddSpectator(const TString &expression, const TString &title="", const TString &unit="", Double_t min=0, Double_t max=0)
user inserts target in data set info
Definition: DataLoader.cxx:524
void SetInputTreesFromEventAssignTrees()
assign event-wise local trees to data set
Definition: DataLoader.cxx:319
void AddTrainingEvent(const TString &className, const std::vector< Double_t > &event, Double_t weight)
add signal training event
Definition: DataLoader.cxx:260
void SetTree(TTree *tree, const TString &className, Double_t weight)
set background tree
Definition: DataLoader.cxx:447
void AddSignalTestEvent(const std::vector< Double_t > &event, Double_t weight=1.0)
add signal testing event
Definition: DataLoader.cxx:236
DataSetInfo & DefaultDataSetInfo()
default creation
Definition: DataLoader.cxx:533
void AddBackgroundTestEvent(const std::vector< Double_t > &event, Double_t weight=1.0)
add signal training event
Definition: DataLoader.cxx:252
DataSetManager * fDataSetManager
Definition: DataLoader.h:186
DataLoader * MakeCopy(TString name)
Copy method use in VI and CV.
Definition: DataLoader.cxx:691
void SetSignalWeightExpression(const TString &variable)
Definition: DataLoader.cxx:549
void MakeKFoldDataSet(CvSplit &s)
Function required to split the training and testing datasets into a number of folds.
Definition: DataLoader.cxx:662
void SetWeightExpression(const TString &variable, const TString &className="")
Definition: DataLoader.cxx:563
void AddBackgroundTrainingEvent(const std::vector< Double_t > &event, Double_t weight=1.0)
add signal training event
Definition: DataLoader.cxx:244
void RecombineKFoldDataSet(CvSplit &s, Types::ETreeType tt=Types::kTraining)
Recombines the dataset.
Definition: DataLoader.cxx:683
DataLoader * VarTransform(TString trafoDefinition)
Transforms the variables and return a new DataLoader with the transformed variables.
Definition: DataLoader.cxx:146
void SetBackgroundWeightExpression(const TString &variable)
Definition: DataLoader.cxx:556
void AddCut(const TString &cut, const TString &className="")
Definition: DataLoader.cxx:588
void AddEvent(const TString &className, Types::ETreeType tt, const std::vector< Double_t > &event, Double_t weight)
add event vector event : the order of values is: variables + targets + spectators
Definition: DataLoader.cxx:277
DataLoader(TString thedlName="default")
Definition: DataLoader.cxx:80
void PrepareTrainingAndTestTree(const TCut &cut, const TString &splitOpt)
prepare the training and test trees -> same cuts for signal and background
Definition: DataLoader.cxx:632
void AddBackgroundTree(TTree *background, Double_t weight=1.0, Types::ETreeType treetype=Types::kMaxTreeType)
number of signal events (used to compute significance)
Definition: DataLoader.cxx:402
DataSetInfo & GetDataSetInfo()
Definition: DataLoader.cxx:137
void AddTarget(const TString &expression, const TString &title="", const TString &unit="", Double_t min=0, Double_t max=0)
user inserts target in data set info
Definition: DataLoader.cxx:512
TH2 * GetCorrelationMatrix(const TString &className)
returns the correlation matrix of datasets
Definition: DataLoader.cxx:717
Bool_t UserAssignEvents(UInt_t clIndex)
Definition: DataLoader.cxx:311
void AddSignalTrainingEvent(const std::vector< Double_t > &event, Double_t weight=1.0)
add signal training event
Definition: DataLoader.cxx:228
void AddTestEvent(const TString &className, const std::vector< Double_t > &event, Double_t weight)
add signal test event
Definition: DataLoader.cxx:268
void SetSignalTree(TTree *signal, Double_t weight=1.0)
Definition: DataLoader.cxx:432
void SetInputTrees(const TString &signalFileName, const TString &backgroundFileName, Double_t signalWeight=1.0, Double_t backgroundWeight=1.0)
Definition: DataLoader.cxx:464
virtual ~DataLoader()
Definition: DataLoader.cxx:96
void AddTree(TTree *tree, const TString &className, Double_t weight=1.0, const TCut &cut="", Types::ETreeType tt=Types::kMaxTreeType)
Definition: DataLoader.cxx:351
DataInputHandler & DataInput()
Definition: DataLoader.h:172
void SetInputVariables(std::vector< TString > *theVariables)
fill input variables in data set
Definition: DataLoader.cxx:541
void SetCut(const TString &cut, const TString &className="")
Definition: DataLoader.cxx:575
void AddVariable(const TString &expression, const TString &title, const TString &unit, char type='F', Double_t min=0, Double_t max=0)
user inserts discriminating variable in data set info
Definition: DataLoader.cxx:485
void PrepareFoldDataSet(CvSplit &s, UInt_t foldNumber, Types::ETreeType tt=Types::kTraining)
Function for assigning the correct folds to the testing or training set.
Definition: DataLoader.cxx:670
Class that contains all the data information.
Definition: DataSetInfo.h:62
Class that contains all the data information.
void SetSource(const std::string &source)
Definition: MsgLogger.h:70
@ kMulticlass
Definition: Types.h:131
@ kNoAnalysisType
Definition: Types.h:132
@ kRegression
Definition: Types.h:130
@ kMaxTreeType
Definition: Types.h:147
@ kTraining
Definition: Types.h:145
@ kTesting
Definition: Types.h:146
TMVA::DataLoader * VarianceThreshold(Double_t threshold)
Computes variance of all the variables and returns a new DataLoader with the selected variables whose...
@ kINFO
Definition: Types.h:60
@ kFATAL
Definition: Types.h:63
virtual void SetName(const char *name)
Set the name of the TNamed.
Definition: TNamed.cxx:140
Basic string class.
Definition: TString.h:136
void ToLower()
Change string to lower-case.
Definition: TString.cxx:1150
Double_t Atof() const
Return floating-point value contained in string.
Definition: TString.cxx:2012
Bool_t IsFloat() const
Returns kTRUE if string contains a floating point or integer number.
Definition: TString.cxx:1816
const char * Data() const
Definition: TString.h:369
TString & Remove(Ssiz_t pos)
Definition: TString.h:673
Bool_t Contains(const char *pat, ECaseCompare cmp=kExact) const
Definition: TString.h:624
Ssiz_t Index(const char *pat, Ssiz_t i=0, ECaseCompare cmp=kExact) const
Definition: TString.h:639
A TTree represents a columnar dataset.
Definition: TTree.h:79
virtual void SetDirectory(TDirectory *dir)
Change the tree's directory.
Definition: TTree.cxx:8853
virtual Long64_t ReadFile(const char *filename, const char *branchDescriptor="", char delimiter=' ')
Create or simply read branches from filename.
Definition: TTree.cxx:7507
TBranch * Branch(const char *name, T *obj, Int_t bufsize=32000, Int_t splitlevel=99)
Add a new branch, and infer the data type from the type of obj being passed.
Definition: TTree.h:349
static const std::string name("name")
static constexpr double s
void DataLoaderCopy(TMVA::DataLoader *des, TMVA::DataLoader *src)
MsgLogger & Endl(MsgLogger &ml)
Definition: MsgLogger.h:158
Double_t Log(Double_t x)
Definition: TMath.h:760
Definition: tree.py:1
auto * m
Definition: textangle.C:8
auto * tt
Definition: textangle.C:16