Logo ROOT  
Reference Guide
 
Loading...
Searching...
No Matches
DataLoader.cxx
Go to the documentation of this file.
1// @(#)root/tmva $Id$
2// Author: Omar Zapata
3// Mentors: Lorenzo Moneta, Sergei Gleyzer
4//NOTE: Based on TMVA::Factory
5
6/**********************************************************************************
7 * Project: TMVA - a Root-integrated toolkit for multivariate data analysis *
8 * Package: TMVA *
9 * Class : DataLoader *
10 * Web : http://tmva.sourceforge.net *
11 * *
12 * Description: *
13 * This is a class to load datasets into every booked method *
14 * *
15 * Authors (alphabetical): *
16 * Lorenzo Moneta <Lorenzo.Moneta@cern.ch> - CERN, Switzerland *
17 * Omar Zapata <Omar.Zapata@cern.ch> - ITM/UdeA, Colombia *
18 * Sergei Gleyzer<sergei.gleyzer@cern.ch> - CERN, Switzerland *
19 * *
20 * Copyright (c) 2005-2015: *
21 * CERN, Switzerland *
22 * ITM/UdeA, Colombia *
23 * *
24 * Redistribution and use in source and binary forms, with or without *
25 * modification, are permitted according to the terms listed in LICENSE *
26 * (http://tmva.sourceforge.net/LICENSE) *
27 **********************************************************************************/
28
29
30/*! \class TMVA::DataLoader
31\ingroup TMVA
32
33*/
34
35#include "TTree.h"
36#include "TH2.h"
37#include "TMatrixD.h"
38
39#include "TMVA/DataLoader.h"
40#include "TMVA/Config.h"
41#include "TMVA/CvSplit.h"
42#include "TMVA/Tools.h"
43#include "TMVA/IMethod.h"
44#include "TMVA/MethodBase.h"
46#include "TMVA/DataSetManager.h"
47#include "TMVA/DataSetInfo.h"
48#include "TMVA/MethodBoost.h"
49#include "TMVA/MethodCategory.h"
50
51#include "TMVA/VariableInfo.h"
58
60
61////////////////////////////////////////////////////////////////////////////////
62/*** Create a data loader
63 \param[in] thedlName name of DataLoader object. This name will be used as the
64 top directory name where the training results
65 (weights, i.e .XML and .C files) will be stored.
66 The results will be stored by default in the `theDlName/weights`
67 directory and relative to the current directory. If the directory is not existing,
68 a new one will be created automatically.
69 For using a different location (i.e. a different path to the current directory) one
70 can set an absolute path location in `TMVA::gConfig()::GetIONames().fWeightFileDirPrefix`
71 For example, by setting
72~~~~~~~~~~~~~~~{.cpp}
73 TMVA::gConfig()::GetIONames().fWeightFileDirPrefix = "/tmp";
74 TMVA::gConfig()::GetIONames().fWeightFileDir = "myTrainigResults";
75~~~~~~~~~~~~~~~
76 The training results will be stored in the `/tmp/thedlName/myTrainingResults`
77 directory.
78**/
79
81: Configurable( ),
82 fDataSetManager ( NULL ), //DSMTEST
83 fDataInputHandler ( new DataInputHandler ),
84 fTransformations ( "I" ),
85 fVerbose ( kFALSE ),
86 fDataAssignType ( kAssignEvents ),
87 fATreeEvent (0)
88{
90 SetName(thedlName.Data());
91 fLogger->SetSource("DataLoader");
92}
93
94////////////////////////////////////////////////////////////////////////////////
95
97{
98 // destructor
99
100 std::vector<TMVA::VariableTransformBase*>::iterator trfIt = fDefaultTrfs.begin();
101 for (;trfIt != fDefaultTrfs.end(); ++trfIt) delete (*trfIt);
102
103 delete fDataInputHandler;
104
105 // destroy singletons
106 // DataSetManager::DestroyInstance(); // DSMTEST replaced by following line
107 delete fDataSetManager; // DSMTEST
108
109 // problem with call of REGISTER_METHOD macro ...
110 // ClassifierDataLoader::DestroyInstance();
111 // Types::DestroyInstance();
112 //Tools::DestroyInstance();
113 //Config::DestroyInstance();
114}
115
116
117////////////////////////////////////////////////////////////////////////////////
118
120{
121 return fDataSetManager->AddDataSetInfo(dsi); // DSMTEST
122}
123
124////////////////////////////////////////////////////////////////////////////////
125
127{
128 DataSetInfo* dsi = fDataSetManager->GetDataSetInfo(dsiName); // DSMTEST
129
130 if (dsi!=0) return *dsi;
131
132 return fDataSetManager->AddDataSetInfo(*(new DataSetInfo(dsiName))); // DSMTEST
133}
134
135////////////////////////////////////////////////////////////////////////////////
136
138{
139 return DefaultDataSetInfo(); // DSMTEST
140}
141
142////////////////////////////////////////////////////////////////////////////////
143/// Transforms the variables and return a new DataLoader with the transformed
144/// variables
145
147{
148 TString trOptions = "0";
149 TString trName = "None";
150 if (trafoDefinition.Contains("(")) {
151
152 // contains transformation parameters
153 Ssiz_t parStart = trafoDefinition.Index( "(" );
154 Ssiz_t parLen = trafoDefinition.Index( ")", parStart )-parStart+1;
155
156 trName = trafoDefinition(0,parStart);
157 trOptions = trafoDefinition(parStart,parLen);
158 trOptions.Remove(parLen-1,1);
159 trOptions.Remove(0,1);
160 }
161 else
162 trName = trafoDefinition;
163
164 VarTransformHandler* handler = new VarTransformHandler(this);
165 // variance threshold variable transformation
166 if (trName == "VT") {
167
168 // find threshold value from given input
169 Double_t threshold = 0.0;
170 if (!trOptions.IsFloat()){
171 Log() << kFATAL << " VT transformation must be passed a floating threshold value" << Endl;
172 delete handler;
173 return this;
174 }
175 else
176 threshold = trOptions.Atof();
177 TMVA::DataLoader *transformedLoader = handler->VarianceThreshold(threshold);
178 delete handler;
179 return transformedLoader;
180 }
181 else {
182 Log() << kFATAL << "Incorrect transformation string provided, please check" << Endl;
183 }
184 Log() << kINFO << "No transformation applied, returning original loader" << Endl;
185 return this;
186}
187
188////////////////////////////////////////////////////////////////////////////////
189// the next functions are to assign events directly
190
191////////////////////////////////////////////////////////////////////////////////
192/// create the data assignment tree (for event-wise data assignment by user)
193
195{
196 TTree * assignTree = new TTree( name, name );
197 assignTree->SetDirectory(0);
198 assignTree->Branch( "type", &fATreeType, "ATreeType/I" );
199 assignTree->Branch( "weight", &fATreeWeight, "ATreeWeight/F" );
200
201 std::vector<VariableInfo>& vars = DefaultDataSetInfo().GetVariableInfos();
202 std::vector<VariableInfo>& tgts = DefaultDataSetInfo().GetTargetInfos();
203 std::vector<VariableInfo>& spec = DefaultDataSetInfo().GetSpectatorInfos();
204
205 if (fATreeEvent.size()==0) fATreeEvent.resize(vars.size()+tgts.size()+spec.size());
206 // add variables
207 for (UInt_t ivar=0; ivar<vars.size(); ivar++) {
208 TString vname = vars[ivar].GetExpression();
209 assignTree->Branch( vname, &fATreeEvent[ivar], vname + "/F" );
210 }
211 // add targets
212 for (UInt_t itgt=0; itgt<tgts.size(); itgt++) {
213 TString vname = tgts[itgt].GetExpression();
214 assignTree->Branch( vname, &fATreeEvent[vars.size()+itgt], vname + "/F" );
215 }
216 // add spectators
217 for (UInt_t ispc=0; ispc<spec.size(); ispc++) {
218 TString vname = spec[ispc].GetExpression();
219 assignTree->Branch( vname, &fATreeEvent[vars.size()+tgts.size()+ispc], vname + "/F" );
220 }
221 return assignTree;
222}
223
224////////////////////////////////////////////////////////////////////////////////
225/// add signal training event
226
227void TMVA::DataLoader::AddSignalTrainingEvent( const std::vector<Double_t>& event, Double_t weight )
228{
229 AddEvent( "Signal", Types::kTraining, event, weight );
230}
231
232////////////////////////////////////////////////////////////////////////////////
233/// add signal testing event
234
235void TMVA::DataLoader::AddSignalTestEvent( const std::vector<Double_t>& event, Double_t weight )
236{
237 AddEvent( "Signal", Types::kTesting, event, weight );
238}
239
240////////////////////////////////////////////////////////////////////////////////
241/// add signal training event
242
243void TMVA::DataLoader::AddBackgroundTrainingEvent( const std::vector<Double_t>& event, Double_t weight )
244{
245 AddEvent( "Background", Types::kTraining, event, weight );
246}
247
248////////////////////////////////////////////////////////////////////////////////
249/// add signal training event
250
251void TMVA::DataLoader::AddBackgroundTestEvent( const std::vector<Double_t>& event, Double_t weight )
252{
253 AddEvent( "Background", Types::kTesting, event, weight );
254}
255
256////////////////////////////////////////////////////////////////////////////////
257/// add signal training event
258
259void TMVA::DataLoader::AddTrainingEvent( const TString& className, const std::vector<Double_t>& event, Double_t weight )
260{
261 AddEvent( className, Types::kTraining, event, weight );
262}
263
264////////////////////////////////////////////////////////////////////////////////
265/// add signal test event
266
267void TMVA::DataLoader::AddTestEvent( const TString& className, const std::vector<Double_t>& event, Double_t weight )
268{
269 AddEvent( className, Types::kTesting, event, weight );
270}
271
272////////////////////////////////////////////////////////////////////////////////
273/// add event
274/// vector event : the order of values is: variables + targets + spectators
275
277 const std::vector<Double_t>& event, Double_t weight )
278{
279 ClassInfo* theClass = DefaultDataSetInfo().AddClass(className); // returns class (creates it if necessary)
280 UInt_t clIndex = theClass->GetNumber();
281
282
283 // set analysistype to "kMulticlass" if more than two classes and analysistype == kNoAnalysisType
284 if( fAnalysisType == Types::kNoAnalysisType && DefaultDataSetInfo().GetNClasses() > 2 )
285 fAnalysisType = Types::kMulticlass;
286
287
288 if (clIndex>=fTrainAssignTree.size()) {
289 fTrainAssignTree.resize(clIndex+1, 0);
290 fTestAssignTree.resize(clIndex+1, 0);
291 }
292
293 if (fTrainAssignTree[clIndex]==0) { // does not exist yet
294 fTrainAssignTree[clIndex] = CreateEventAssignTrees( Form("TrainAssignTree_%s", className.Data()) );
295 fTestAssignTree[clIndex] = CreateEventAssignTrees( Form("TestAssignTree_%s", className.Data()) );
296 }
297
298 fATreeType = clIndex;
299 fATreeWeight = weight;
300 for (UInt_t ivar=0; ivar<event.size(); ivar++) fATreeEvent[ivar] = event[ivar];
301
302 if(tt==Types::kTraining) fTrainAssignTree[clIndex]->Fill();
303 else fTestAssignTree[clIndex]->Fill();
304
305}
306
307////////////////////////////////////////////////////////////////////////////////
308///
309
311{
312 return fTrainAssignTree[clIndex]!=0;
313}
314
315////////////////////////////////////////////////////////////////////////////////
316/// assign event-wise local trees to data set
317
319{
320 UInt_t size = fTrainAssignTree.size();
321 for(UInt_t i=0; i<size; i++) {
322 if(!UserAssignEvents(i)) continue;
323 const TString& className = DefaultDataSetInfo().GetClassInfo(i)->GetName();
324 SetWeightExpression( "weight", className );
325 AddTree(fTrainAssignTree[i], className, 1.0, TCut(""), Types::kTraining );
326 AddTree(fTestAssignTree[i], className, 1.0, TCut(""), Types::kTesting );
327 }
328}
329
330////////////////////////////////////////////////////////////////////////////////
331/// number of signal events (used to compute significance)
332
333void TMVA::DataLoader::AddTree( TTree* tree, const TString& className, Double_t weight,
334 const TCut& cut, const TString& treetype )
335{
337 TString tmpTreeType = treetype; tmpTreeType.ToLower();
338 if (tmpTreeType.Contains( "train" ) && tmpTreeType.Contains( "test" )) tt = Types::kMaxTreeType;
339 else if (tmpTreeType.Contains( "train" )) tt = Types::kTraining;
340 else if (tmpTreeType.Contains( "test" )) tt = Types::kTesting;
341 else {
342 Log() << kFATAL << "<AddTree> cannot interpret tree type: \"" << treetype
343 << "\" should be \"Training\" or \"Test\" or \"Training and Testing\"" << Endl;
344 }
345 AddTree( tree, className, weight, cut, tt );
346}
347
348////////////////////////////////////////////////////////////////////////////////
349
350void TMVA::DataLoader::AddTree( TTree* tree, const TString& className, Double_t weight,
351 const TCut& cut, Types::ETreeType tt )
352{
353 if(!tree)
354 Log() << kFATAL << "Tree does not exist (empty pointer)." << Endl;
355
356 DefaultDataSetInfo().AddClass( className );
357
358 // set analysistype to "kMulticlass" if more than two classes and analysistype == kNoAnalysisType
359 if( fAnalysisType == Types::kNoAnalysisType && DefaultDataSetInfo().GetNClasses() > 2 )
360 fAnalysisType = Types::kMulticlass;
361
362 Log() << kINFO<< "Add Tree " << tree->GetName() << " of type " << className
363 << " with " << tree->GetEntries() << " events" << Endl;
364 DataInput().AddTree( tree, className, weight, cut, tt );
365}
366
367////////////////////////////////////////////////////////////////////////////////
368/// number of signal events (used to compute significance)
369
371{
372 AddTree( signal, "Signal", weight, TCut(""), treetype );
373}
374
375////////////////////////////////////////////////////////////////////////////////
376/// add signal tree from text file
377
379{
380 // create trees from these ascii files
381 TTree* signalTree = new TTree( "TreeS", "Tree (S)" );
382 signalTree->ReadFile( datFileS );
383
384 Log() << kINFO << "Create TTree objects from ASCII input files ... \n- Signal file : \""
385 << datFileS << Endl;
386
387 // number of signal events (used to compute significance)
388 AddTree( signalTree, "Signal", weight, TCut(""), treetype );
389}
390
391////////////////////////////////////////////////////////////////////////////////
392
393void TMVA::DataLoader::AddSignalTree( TTree* signal, Double_t weight, const TString& treetype )
394{
395 AddTree( signal, "Signal", weight, TCut(""), treetype );
396}
397
398////////////////////////////////////////////////////////////////////////////////
399/// number of signal events (used to compute significance)
400
402{
403 AddTree( signal, "Background", weight, TCut(""), treetype );
404}
405
406////////////////////////////////////////////////////////////////////////////////
407/// add background tree from text file
408
410{
411 // create trees from these ascii files
412 TTree* bkgTree = new TTree( "TreeB", "Tree (B)" );
413 bkgTree->ReadFile( datFileB );
414
415 Log() << kINFO << "Create TTree objects from ASCII input files ... \n- Background file : \""
416 << datFileB << Endl;
417
418 // number of signal events (used to compute significance)
419 AddTree( bkgTree, "Background", weight, TCut(""), treetype );
420}
421
422////////////////////////////////////////////////////////////////////////////////
423
424void TMVA::DataLoader::AddBackgroundTree( TTree* signal, Double_t weight, const TString& treetype )
425{
426 AddTree( signal, "Background", weight, TCut(""), treetype );
427}
428
429////////////////////////////////////////////////////////////////////////////////
430
432{
433 AddTree( tree, "Signal", weight );
434}
435
436////////////////////////////////////////////////////////////////////////////////
437
439{
440 AddTree( tree, "Background", weight );
441}
442
443////////////////////////////////////////////////////////////////////////////////
444/// set background tree
445
446void TMVA::DataLoader::SetTree( TTree* tree, const TString& className, Double_t weight )
447{
448 AddTree( tree, className, weight, TCut(""), Types::kMaxTreeType );
449}
450
451////////////////////////////////////////////////////////////////////////////////
452/// define the input trees for signal and background; no cuts are applied
453
454void TMVA::DataLoader::SetInputTrees( TTree* signal, TTree* background,
455 Double_t signalWeight, Double_t backgroundWeight )
456{
457 AddTree( signal, "Signal", signalWeight, TCut(""), Types::kMaxTreeType );
458 AddTree( background, "Background", backgroundWeight, TCut(""), Types::kMaxTreeType );
459}
460
461////////////////////////////////////////////////////////////////////////////////
462
463void TMVA::DataLoader::SetInputTrees( const TString& datFileS, const TString& datFileB,
464 Double_t signalWeight, Double_t backgroundWeight )
465{
466 DataInput().AddTree( datFileS, "Signal", signalWeight );
467 DataInput().AddTree( datFileB, "Background", backgroundWeight );
468}
469
470////////////////////////////////////////////////////////////////////////////////
471/// define the input trees for signal and background from single input tree,
472/// containing both signal and background events distinguished by the type
473/// identifiers: SigCut and BgCut
474
475void TMVA::DataLoader::SetInputTrees( TTree* inputTree, const TCut& SigCut, const TCut& BgCut )
476{
477 AddTree( inputTree, "Signal", 1.0, SigCut, Types::kMaxTreeType );
478 AddTree( inputTree, "Background", 1.0, BgCut , Types::kMaxTreeType );
479}
480
481////////////////////////////////////////////////////////////////////////////////
482/// user inserts discriminating variable in data set info
483
484void TMVA::DataLoader::AddVariable( const TString& expression, const TString& title, const TString& unit,
485 char type, Double_t min, Double_t max )
486{
487 DefaultDataSetInfo().AddVariable( expression, title, unit, min, max, type );
488}
489
490////////////////////////////////////////////////////////////////////////////////
491/// user inserts discriminating variable in data set info
492
493void TMVA::DataLoader::AddVariable( const TString& expression, char type,
494 Double_t min, Double_t max )
495{
496 DefaultDataSetInfo().AddVariable( expression, "", "", min, max, type );
497}
498
499////////////////////////////////////////////////////////////////////////////////
500/// user inserts discriminating array of variables in data set info
501/// in case input tree provides an array of values
502
503void TMVA::DataLoader::AddVariablesArray(const TString &expression, int size, char type,
504 Double_t min, Double_t max)
505{
506 DefaultDataSetInfo().AddVariablesArray(expression, size, "", "", min, max, type);
507}
508////////////////////////////////////////////////////////////////////////////////
509/// user inserts target in data set info
510
511void TMVA::DataLoader::AddTarget( const TString& expression, const TString& title, const TString& unit,
512 Double_t min, Double_t max )
513{
514 if( fAnalysisType == Types::kNoAnalysisType )
515 fAnalysisType = Types::kRegression;
516
517 DefaultDataSetInfo().AddTarget( expression, title, unit, min, max );
518}
519
520////////////////////////////////////////////////////////////////////////////////
521/// user inserts target in data set info
522
523void TMVA::DataLoader::AddSpectator( const TString& expression, const TString& title, const TString& unit,
524 Double_t min, Double_t max )
525{
526 DefaultDataSetInfo().AddSpectator( expression, title, unit, min, max );
527}
528
529////////////////////////////////////////////////////////////////////////////////
530/// default creation
531
533{
534 return AddDataSet( fName );
535}
536
537////////////////////////////////////////////////////////////////////////////////
538/// fill input variables in data set
539
540void TMVA::DataLoader::SetInputVariables( std::vector<TString>* theVariables )
541{
542 for (std::vector<TString>::iterator it=theVariables->begin();
543 it!=theVariables->end(); ++it) AddVariable(*it);
544}
545
546////////////////////////////////////////////////////////////////////////////////
547
549{
550 DefaultDataSetInfo().SetWeightExpression(variable, "Signal");
551}
552
553////////////////////////////////////////////////////////////////////////////////
554
556{
557 DefaultDataSetInfo().SetWeightExpression(variable, "Background");
558}
559
560////////////////////////////////////////////////////////////////////////////////
561
562void TMVA::DataLoader::SetWeightExpression( const TString& variable, const TString& className )
563{
564 //Log() << kWarning << DefaultDataSetInfo().GetNClasses() /*fClasses.size()*/ << Endl;
565 if (className=="") {
566 SetSignalWeightExpression(variable);
567 SetBackgroundWeightExpression(variable);
568 }
569 else DefaultDataSetInfo().SetWeightExpression( variable, className );
570}
571
572////////////////////////////////////////////////////////////////////////////////
573
574void TMVA::DataLoader::SetCut( const TString& cut, const TString& className ) {
575 SetCut( TCut(cut), className );
576}
577
578////////////////////////////////////////////////////////////////////////////////
579
580void TMVA::DataLoader::SetCut( const TCut& cut, const TString& className )
581{
582 DefaultDataSetInfo().SetCut( cut, className );
583}
584
585////////////////////////////////////////////////////////////////////////////////
586
587void TMVA::DataLoader::AddCut( const TString& cut, const TString& className )
588{
589 AddCut( TCut(cut), className );
590}
591
592////////////////////////////////////////////////////////////////////////////////
593void TMVA::DataLoader::AddCut( const TCut& cut, const TString& className )
594{
595 DefaultDataSetInfo().AddCut( cut, className );
596}
597
598////////////////////////////////////////////////////////////////////////////////
599/// prepare the training and test trees
600
602 Int_t NsigTrain, Int_t NbkgTrain, Int_t NsigTest, Int_t NbkgTest,
603 const TString& otherOpt )
604{
605 SetInputTreesFromEventAssignTrees();
606
607 AddCut( cut );
608
609 DefaultDataSetInfo().SetSplitOptions( Form("nTrain_Signal=%i:nTrain_Background=%i:nTest_Signal=%i:nTest_Background=%i:%s",
610 NsigTrain, NbkgTrain, NsigTest, NbkgTest, otherOpt.Data()) );
611}
612
613////////////////////////////////////////////////////////////////////////////////
614/// prepare the training and test trees
615/// kept for backward compatibility
616
618{
619 SetInputTreesFromEventAssignTrees();
620
621 AddCut( cut );
622
623 DefaultDataSetInfo().SetSplitOptions( Form("nTrain_Signal=%i:nTrain_Background=%i:nTest_Signal=%i:nTest_Background=%i:SplitMode=Random:EqualTrainSample:!V",
624 Ntrain, Ntrain, Ntest, Ntest) );
625}
626
627////////////////////////////////////////////////////////////////////////////////
628/// prepare the training and test trees
629/// -> same cuts for signal and background
630
632{
633 SetInputTreesFromEventAssignTrees();
634
635 DefaultDataSetInfo().PrintClasses();
636 AddCut( cut );
637 DefaultDataSetInfo().SetSplitOptions( opt );
638}
639
640////////////////////////////////////////////////////////////////////////////////
641/// prepare the training and test trees
642
643void TMVA::DataLoader::PrepareTrainingAndTestTree( TCut sigcut, TCut bkgcut, const TString& splitOpt )
644{
645 // if event-wise data assignment, add local trees to dataset first
646 SetInputTreesFromEventAssignTrees();
647
648 //Log() << kINFO <<"Preparing trees for training and testing..."<< Endl;
649 AddCut( sigcut, "Signal" );
650 AddCut( bkgcut, "Background" );
651
652 DefaultDataSetInfo().SetSplitOptions( splitOpt );
653}
654
655////////////////////////////////////////////////////////////////////////////////
656/// Function required to split the training and testing datasets into a
657/// number of folds. Required by the CrossValidation and HyperParameterOptimisation
658/// classes. The option to split the training dataset into a training set and
659/// a validation set is implemented but not currently used.
660
662{
663 s.MakeKFoldDataSet( DefaultDataSetInfo() );
664}
665
666////////////////////////////////////////////////////////////////////////////////
667/// Function for assigning the correct folds to the testing or training set.
668
670{
671 s.PrepareFoldDataSet( DefaultDataSetInfo(), foldNumber, tt );
672}
673
674
675////////////////////////////////////////////////////////////////////////////////
676/// Recombines the dataset. The precise semantics depend on the actual split.
677///
678/// Similar to the inverse operation of `MakeKFoldDataSet` but _will_ differ.
679/// See documentation for each particular split for more information.
680///
681
683{
684 s.RecombineKFoldDataSet( DefaultDataSetInfo(), tt );
685}
686
687////////////////////////////////////////////////////////////////////////////////
688/// Copy method use in VI and CV
689
691{
693 DataLoaderCopy(des,this);
694 return des;
695}
696
697////////////////////////////////////////////////////////////////////////////////
698///Loading Dataset from DataInputHandler for subseed
699
701{
702 for( std::vector<TreeInfo>::const_iterator treeinfo=src->DataInput().Sbegin();treeinfo!=src->DataInput().Send();++treeinfo)
703 {
704 des->AddSignalTree( (*treeinfo).GetTree(), (*treeinfo).GetWeight(),(*treeinfo).GetTreeType());
705 }
706
707 for( std::vector<TreeInfo>::const_iterator treeinfo=src->DataInput().Bbegin();treeinfo!=src->DataInput().Bend();++treeinfo)
708 {
709 des->AddBackgroundTree( (*treeinfo).GetTree(), (*treeinfo).GetWeight(),(*treeinfo).GetTreeType());
710 }
711}
712
713////////////////////////////////////////////////////////////////////////////////
714/// returns the correlation matrix of datasets
715
717{
718 const TMatrixD * m = DefaultDataSetInfo().CorrelationMatrix(className);
719 return DefaultDataSetInfo().CreateCorrelationMatrixHist(m,
720 "CorrelationMatrix"+className, "Correlation Matrix ("+className+")");
721}
const Bool_t kFALSE
Definition RtypesCore.h:92
double Double_t
Definition RtypesCore.h:59
#define ClassImp(name)
Definition Rtypes.h:364
char name[80]
Definition TGX11.cxx:110
int type
Definition TGX11.cxx:121
char * Form(const char *fmt,...)
A specialized string object used for TTree selections.
Definition TCut.h:25
Service class for 2-Dim histogram classes.
Definition TH2.h:30
Class that contains all the information of a class.
Definition ClassInfo.h:49
UInt_t GetNumber() const
Definition ClassInfo.h:65
MsgLogger * fLogger
virtual void RecombineKFoldDataSet(DataSetInfo &dsi, Types::ETreeType tt=Types::kTraining)
Definition CvSplit.cxx:114
virtual void MakeKFoldDataSet(DataSetInfo &dsi)=0
virtual void PrepareFoldDataSet(DataSetInfo &dsi, UInt_t foldNumber, Types::ETreeType tt)
Set training and test set vectors of dataset described by dsi.
Definition CvSplit.cxx:57
Class that contains all the data information.
std::vector< TreeInfo >::const_iterator Send() const
std::vector< TreeInfo >::const_iterator Sbegin() const
std::vector< TreeInfo >::const_iterator Bbegin() const
std::vector< TreeInfo >::const_iterator Bend() const
DataInputHandler * fDataInputHandler
Definition DataLoader.h:189
TTree * CreateEventAssignTrees(const TString &name)
create the data assignment tree (for event-wise data assignment by user)
void AddVariablesArray(const TString &expression, int size, char type='F', Double_t min=0, Double_t max=0)
user inserts discriminating array of variables in data set info in case input tree provides an array ...
void SetBackgroundTree(TTree *background, Double_t weight=1.0)
void AddSignalTree(TTree *signal, Double_t weight=1.0, Types::ETreeType treetype=Types::kMaxTreeType)
number of signal events (used to compute significance)
DataSetInfo & AddDataSet(DataSetInfo &)
void AddSpectator(const TString &expression, const TString &title="", const TString &unit="", Double_t min=0, Double_t max=0)
user inserts target in data set info
void SetInputTreesFromEventAssignTrees()
assign event-wise local trees to data set
void AddTrainingEvent(const TString &className, const std::vector< Double_t > &event, Double_t weight)
add signal training event
void SetTree(TTree *tree, const TString &className, Double_t weight)
set background tree
void AddSignalTestEvent(const std::vector< Double_t > &event, Double_t weight=1.0)
add signal testing event
DataSetInfo & DefaultDataSetInfo()
default creation
void AddBackgroundTestEvent(const std::vector< Double_t > &event, Double_t weight=1.0)
add signal training event
DataSetManager * fDataSetManager
Definition DataLoader.h:186
DataLoader * MakeCopy(TString name)
Copy method use in VI and CV.
void SetSignalWeightExpression(const TString &variable)
void MakeKFoldDataSet(CvSplit &s)
Function required to split the training and testing datasets into a number of folds.
void SetWeightExpression(const TString &variable, const TString &className="")
void AddBackgroundTrainingEvent(const std::vector< Double_t > &event, Double_t weight=1.0)
add signal training event
void RecombineKFoldDataSet(CvSplit &s, Types::ETreeType tt=Types::kTraining)
Recombines the dataset.
DataLoader * VarTransform(TString trafoDefinition)
Transforms the variables and return a new DataLoader with the transformed variables.
void SetBackgroundWeightExpression(const TString &variable)
void AddCut(const TString &cut, const TString &className="")
void AddEvent(const TString &className, Types::ETreeType tt, const std::vector< Double_t > &event, Double_t weight)
add event vector event : the order of values is: variables + targets + spectators
DataLoader(TString thedlName="default")
void PrepareTrainingAndTestTree(const TCut &cut, const TString &splitOpt)
prepare the training and test trees -> same cuts for signal and background
DataInputHandler & DataInput()
Definition DataLoader.h:172
void AddBackgroundTree(TTree *background, Double_t weight=1.0, Types::ETreeType treetype=Types::kMaxTreeType)
number of signal events (used to compute significance)
DataSetInfo & GetDataSetInfo()
void AddTarget(const TString &expression, const TString &title="", const TString &unit="", Double_t min=0, Double_t max=0)
user inserts target in data set info
TH2 * GetCorrelationMatrix(const TString &className)
returns the correlation matrix of datasets
Bool_t UserAssignEvents(UInt_t clIndex)
void AddSignalTrainingEvent(const std::vector< Double_t > &event, Double_t weight=1.0)
add signal training event
void AddTestEvent(const TString &className, const std::vector< Double_t > &event, Double_t weight)
add signal test event
void SetSignalTree(TTree *signal, Double_t weight=1.0)
void SetInputTrees(const TString &signalFileName, const TString &backgroundFileName, Double_t signalWeight=1.0, Double_t backgroundWeight=1.0)
virtual ~DataLoader()
void AddTree(TTree *tree, const TString &className, Double_t weight=1.0, const TCut &cut="", Types::ETreeType tt=Types::kMaxTreeType)
void SetInputVariables(std::vector< TString > *theVariables)
fill input variables in data set
void SetCut(const TString &cut, const TString &className="")
void AddVariable(const TString &expression, const TString &title, const TString &unit, char type='F', Double_t min=0, Double_t max=0)
user inserts discriminating variable in data set info
void PrepareFoldDataSet(CvSplit &s, UInt_t foldNumber, Types::ETreeType tt=Types::kTraining)
Function for assigning the correct folds to the testing or training set.
Class that contains all the data information.
Definition DataSetInfo.h:62
Class that contains all the data information.
void SetSource(const std::string &source)
Definition MsgLogger.h:70
@ kMulticlass
Definition Types.h:131
@ kNoAnalysisType
Definition Types.h:132
@ kRegression
Definition Types.h:130
@ kMaxTreeType
Definition Types.h:147
@ kTraining
Definition Types.h:145
TMVA::DataLoader * VarianceThreshold(Double_t threshold)
Computes variance of all the variables and returns a new DataLoader with the selected variables whose...
virtual void SetName(const char *name)
Set the name of the TNamed.
Definition TNamed.cxx:140
Basic string class.
Definition TString.h:136
void ToLower()
Change string to lower-case.
Definition TString.cxx:1145
Double_t Atof() const
Return floating-point value contained in string.
Definition TString.cxx:2007
Bool_t IsFloat() const
Returns kTRUE if string contains a floating point or integer number.
Definition TString.cxx:1811
const char * Data() const
Definition TString.h:369
TString & Remove(Ssiz_t pos)
Definition TString.h:673
Bool_t Contains(const char *pat, ECaseCompare cmp=kExact) const
Definition TString.h:624
Ssiz_t Index(const char *pat, Ssiz_t i=0, ECaseCompare cmp=kExact) const
Definition TString.h:639
A TTree represents a columnar dataset.
Definition TTree.h:79
virtual void SetDirectory(TDirectory *dir)
Change the tree's directory.
Definition TTree.cxx:8925
TBranch * Branch(const char *name, T *obj, Int_t bufsize=32000, Int_t splitlevel=99)
Add a new branch, and infer the data type from the type of obj being passed.
Definition TTree.h:350
virtual Long64_t ReadFile(const char *filename, const char *branchDescriptor="", char delimiter=' ')
Create or simply read branches from filename.
Definition TTree.cxx:7545
void DataLoaderCopy(TMVA::DataLoader *des, TMVA::DataLoader *src)
MsgLogger & Endl(MsgLogger &ml)
Definition MsgLogger.h:158
Definition tree.py:1
auto * m
Definition textangle.C:8
auto * tt
Definition textangle.C:16