Logo ROOT  
Reference Guide
DataLoader.cxx
Go to the documentation of this file.
1// @(#)root/tmva $Id$
2// Author: Omar Zapata
3// Mentors: Lorenzo Moneta, Sergei Gleyzer
4//NOTE: Based on TMVA::Factory
5
6/**********************************************************************************
7 * Project: TMVA - a Root-integrated toolkit for multivariate data analysis *
8 * Package: TMVA *
9 * Class : DataLoader *
10 * Web : http://tmva.sourceforge.net *
11 * *
12 * Description: *
13 * This is a class to load datasets into every booked method *
14 * *
15 * Authors (alphabetical): *
16 * Lorenzo Moneta <Lorenzo.Moneta@cern.ch> - CERN, Switzerland *
17 * Omar Zapata <Omar.Zapata@cern.ch> - ITM/UdeA, Colombia *
18 * Sergei Gleyzer<sergei.gleyzer@cern.ch> - CERN, Switzerland *
19 * *
20 * Copyright (c) 2005-2015: *
21 * CERN, Switzerland *
22 * ITM/UdeA, Colombia *
23 * *
24 * Redistribution and use in source and binary forms, with or without *
25 * modification, are permitted according to the terms listed in LICENSE *
26 * (http://tmva.sourceforge.net/LICENSE) *
27 **********************************************************************************/
28
29
30/*! \class TMVA::DataLoader
31\ingroup TMVA
32
33*/
34
35#include "TFile.h"
36#include "TTree.h"
37#include "TH2.h"
38#include "TMath.h"
39#include "TMatrixD.h"
40
41#include "TMVA/DataLoader.h"
42#include "TMVA/Config.h"
43#include "TMVA/CvSplit.h"
44#include "TMVA/Tools.h"
45#include "TMVA/IMethod.h"
46#include "TMVA/MethodBase.h"
48#include "TMVA/DataSetManager.h"
49#include "TMVA/DataSetInfo.h"
50#include "TMVA/MethodBoost.h"
51#include "TMVA/MethodCategory.h"
52
53#include "TMVA/VariableInfo.h"
60
62
63////////////////////////////////////////////////////////////////////////////////
64/*** Create a data loader
65 \param[in] thedlName name of DataLoader object. This name will be used as the
66 top directory name where the training results
67 (weights, i.e .XML and .C files) will be stored.
68 The results will be stored by default in the `theDlName/weights`
69 directory and relative to the current directory. If the directory is not existing,
70 a new one will be created automatically.
71 For using a different location (i.e. a different path to the current directory) one
72 can set an absolute path location in `TMVA::gConfig()::GetIONames().fWeightFileDirPrefix`
73 For example, by setting
74~~~~~~~~~~~~~~~{.cpp}
75 TMVA::gConfig()::GetIONames().fWeightFileDirPrefix = "/tmp";
76 TMVA::gConfig()::GetIONames().fWeightFileDir = "myTrainigResults";
77~~~~~~~~~~~~~~~
78 The training results will be stored in the `/tmp/thedlName/myTrainingResults`
79 directory.
80**/
81
83: Configurable( ),
84 fDataSetManager ( NULL ), //DSMTEST
85 fDataInputHandler ( new DataInputHandler ),
86 fTransformations ( "I" ),
87 fVerbose ( kFALSE ),
88 fDataAssignType ( kAssignEvents ),
89 fATreeEvent (0)
90{
92 SetName(thedlName.Data());
93 fLogger->SetSource("DataLoader");
94}
95
96////////////////////////////////////////////////////////////////////////////////
97
99{
100 // destructor
101
102 std::vector<TMVA::VariableTransformBase*>::iterator trfIt = fDefaultTrfs.begin();
103 for (;trfIt != fDefaultTrfs.end(); ++trfIt) delete (*trfIt);
104
105 delete fDataInputHandler;
106
107 // destroy singletons
108 // DataSetManager::DestroyInstance(); // DSMTEST replaced by following line
109 delete fDataSetManager; // DSMTEST
110
111 // problem with call of REGISTER_METHOD macro ...
112 // ClassifierDataLoader::DestroyInstance();
113 // Types::DestroyInstance();
114 //Tools::DestroyInstance();
115 //Config::DestroyInstance();
116}
117
118
119////////////////////////////////////////////////////////////////////////////////
120
122{
123 return fDataSetManager->AddDataSetInfo(dsi); // DSMTEST
124}
125
126////////////////////////////////////////////////////////////////////////////////
127
129{
130 DataSetInfo* dsi = fDataSetManager->GetDataSetInfo(dsiName); // DSMTEST
131
132 if (dsi!=0) return *dsi;
133
134 return fDataSetManager->AddDataSetInfo(*(new DataSetInfo(dsiName))); // DSMTEST
135}
136
137////////////////////////////////////////////////////////////////////////////////
138
140{
141 return DefaultDataSetInfo(); // DSMTEST
142}
143
144////////////////////////////////////////////////////////////////////////////////
145/// Transforms the variables and return a new DataLoader with the transformed
146/// variables
147
149{
150 TString trOptions = "0";
151 TString trName = "None";
152 if (trafoDefinition.Contains("(")) {
153
154 // contains transformation parameters
155 Ssiz_t parStart = trafoDefinition.Index( "(" );
156 Ssiz_t parLen = trafoDefinition.Index( ")", parStart )-parStart+1;
157
158 trName = trafoDefinition(0,parStart);
159 trOptions = trafoDefinition(parStart,parLen);
160 trOptions.Remove(parLen-1,1);
161 trOptions.Remove(0,1);
162 }
163 else
164 trName = trafoDefinition;
165
166 VarTransformHandler* handler = new VarTransformHandler(this);
167 // variance threshold variable transformation
168 if (trName == "VT") {
169
170 // find threshold value from given input
171 Double_t threshold = 0.0;
172 if (!trOptions.IsFloat()){
173 Log() << kFATAL << " VT transformation must be passed a floating threshold value" << Endl;
174 delete handler;
175 return this;
176 }
177 else
178 threshold = trOptions.Atof();
179 TMVA::DataLoader *transformedLoader = handler->VarianceThreshold(threshold);
180 delete handler;
181 return transformedLoader;
182 }
183 else {
184 Log() << kFATAL << "Incorrect transformation string provided, please check" << Endl;
185 }
186 Log() << kINFO << "No transformation applied, returning original loader" << Endl;
187 return this;
188}
189
190////////////////////////////////////////////////////////////////////////////////
191// the next functions are to assign events directly
192
193////////////////////////////////////////////////////////////////////////////////
194/// create the data assignment tree (for event-wise data assignment by user)
195
197{
198 TTree * assignTree = new TTree( name, name );
199 assignTree->SetDirectory(0);
200 assignTree->Branch( "type", &fATreeType, "ATreeType/I" );
201 assignTree->Branch( "weight", &fATreeWeight, "ATreeWeight/F" );
202
203 std::vector<VariableInfo>& vars = DefaultDataSetInfo().GetVariableInfos();
204 std::vector<VariableInfo>& tgts = DefaultDataSetInfo().GetTargetInfos();
205 std::vector<VariableInfo>& spec = DefaultDataSetInfo().GetSpectatorInfos();
206
207 if (fATreeEvent.size()==0) fATreeEvent.resize(vars.size()+tgts.size()+spec.size());
208 // add variables
209 for (UInt_t ivar=0; ivar<vars.size(); ivar++) {
210 TString vname = vars[ivar].GetExpression();
211 assignTree->Branch( vname, &fATreeEvent[ivar], vname + "/F" );
212 }
213 // add targets
214 for (UInt_t itgt=0; itgt<tgts.size(); itgt++) {
215 TString vname = tgts[itgt].GetExpression();
216 assignTree->Branch( vname, &fATreeEvent[vars.size()+itgt], vname + "/F" );
217 }
218 // add spectators
219 for (UInt_t ispc=0; ispc<spec.size(); ispc++) {
220 TString vname = spec[ispc].GetExpression();
221 assignTree->Branch( vname, &fATreeEvent[vars.size()+tgts.size()+ispc], vname + "/F" );
222 }
223 return assignTree;
224}
225
226////////////////////////////////////////////////////////////////////////////////
227/// add signal training event
228
229void TMVA::DataLoader::AddSignalTrainingEvent( const std::vector<Double_t>& event, Double_t weight )
230{
231 AddEvent( "Signal", Types::kTraining, event, weight );
232}
233
234////////////////////////////////////////////////////////////////////////////////
235/// add signal testing event
236
237void TMVA::DataLoader::AddSignalTestEvent( const std::vector<Double_t>& event, Double_t weight )
238{
239 AddEvent( "Signal", Types::kTesting, event, weight );
240}
241
242////////////////////////////////////////////////////////////////////////////////
243/// add signal training event
244
245void TMVA::DataLoader::AddBackgroundTrainingEvent( const std::vector<Double_t>& event, Double_t weight )
246{
247 AddEvent( "Background", Types::kTraining, event, weight );
248}
249
250////////////////////////////////////////////////////////////////////////////////
251/// add signal training event
252
253void TMVA::DataLoader::AddBackgroundTestEvent( const std::vector<Double_t>& event, Double_t weight )
254{
255 AddEvent( "Background", Types::kTesting, event, weight );
256}
257
258////////////////////////////////////////////////////////////////////////////////
259/// add signal training event
260
261void TMVA::DataLoader::AddTrainingEvent( const TString& className, const std::vector<Double_t>& event, Double_t weight )
262{
263 AddEvent( className, Types::kTraining, event, weight );
264}
265
266////////////////////////////////////////////////////////////////////////////////
267/// add signal test event
268
269void TMVA::DataLoader::AddTestEvent( const TString& className, const std::vector<Double_t>& event, Double_t weight )
270{
271 AddEvent( className, Types::kTesting, event, weight );
272}
273
274////////////////////////////////////////////////////////////////////////////////
275/// add event
276/// vector event : the order of values is: variables + targets + spectators
277
279 const std::vector<Double_t>& event, Double_t weight )
280{
281 ClassInfo* theClass = DefaultDataSetInfo().AddClass(className); // returns class (creates it if necessary)
282 UInt_t clIndex = theClass->GetNumber();
283
284
285 // set analysistype to "kMulticlass" if more than two classes and analysistype == kNoAnalysisType
286 if( fAnalysisType == Types::kNoAnalysisType && DefaultDataSetInfo().GetNClasses() > 2 )
287 fAnalysisType = Types::kMulticlass;
288
289
290 if (clIndex>=fTrainAssignTree.size()) {
291 fTrainAssignTree.resize(clIndex+1, 0);
292 fTestAssignTree.resize(clIndex+1, 0);
293 }
294
295 if (fTrainAssignTree[clIndex]==0) { // does not exist yet
296 fTrainAssignTree[clIndex] = CreateEventAssignTrees( Form("TrainAssignTree_%s", className.Data()) );
297 fTestAssignTree[clIndex] = CreateEventAssignTrees( Form("TestAssignTree_%s", className.Data()) );
298 }
299
300 fATreeType = clIndex;
301 fATreeWeight = weight;
302 for (UInt_t ivar=0; ivar<event.size(); ivar++) fATreeEvent[ivar] = event[ivar];
303
304 if(tt==Types::kTraining) fTrainAssignTree[clIndex]->Fill();
305 else fTestAssignTree[clIndex]->Fill();
306
307}
308
309////////////////////////////////////////////////////////////////////////////////
310///
311
313{
314 return fTrainAssignTree[clIndex]!=0;
315}
316
317////////////////////////////////////////////////////////////////////////////////
318/// assign event-wise local trees to data set
319
321{
322 UInt_t size = fTrainAssignTree.size();
323 for(UInt_t i=0; i<size; i++) {
324 if(!UserAssignEvents(i)) continue;
325 const TString& className = DefaultDataSetInfo().GetClassInfo(i)->GetName();
326 SetWeightExpression( "weight", className );
327 AddTree(fTrainAssignTree[i], className, 1.0, TCut(""), Types::kTraining );
328 AddTree(fTestAssignTree[i], className, 1.0, TCut(""), Types::kTesting );
329 }
330}
331
332////////////////////////////////////////////////////////////////////////////////
333/// number of signal events (used to compute significance)
334
335void TMVA::DataLoader::AddTree( TTree* tree, const TString& className, Double_t weight,
336 const TCut& cut, const TString& treetype )
337{
339 TString tmpTreeType = treetype; tmpTreeType.ToLower();
340 if (tmpTreeType.Contains( "train" ) && tmpTreeType.Contains( "test" )) tt = Types::kMaxTreeType;
341 else if (tmpTreeType.Contains( "train" )) tt = Types::kTraining;
342 else if (tmpTreeType.Contains( "test" )) tt = Types::kTesting;
343 else {
344 Log() << kFATAL << "<AddTree> cannot interpret tree type: \"" << treetype
345 << "\" should be \"Training\" or \"Test\" or \"Training and Testing\"" << Endl;
346 }
347 AddTree( tree, className, weight, cut, tt );
348}
349
350////////////////////////////////////////////////////////////////////////////////
351
352void TMVA::DataLoader::AddTree( TTree* tree, const TString& className, Double_t weight,
353 const TCut& cut, Types::ETreeType tt )
354{
355 if(!tree)
356 Log() << kFATAL << "Tree does not exist (empty pointer)." << Endl;
357
358 DefaultDataSetInfo().AddClass( className );
359
360 // set analysistype to "kMulticlass" if more than two classes and analysistype == kNoAnalysisType
361 if( fAnalysisType == Types::kNoAnalysisType && DefaultDataSetInfo().GetNClasses() > 2 )
362 fAnalysisType = Types::kMulticlass;
363
364 Log() << kINFO<< "Add Tree " << tree->GetName() << " of type " << className
365 << " with " << tree->GetEntries() << " events" << Endl;
366 DataInput().AddTree( tree, className, weight, cut, tt );
367}
368
369////////////////////////////////////////////////////////////////////////////////
370/// number of signal events (used to compute significance)
371
373{
374 AddTree( signal, "Signal", weight, TCut(""), treetype );
375}
376
377////////////////////////////////////////////////////////////////////////////////
378/// add signal tree from text file
379
381{
382 // create trees from these ascii files
383 TTree* signalTree = new TTree( "TreeS", "Tree (S)" );
384 signalTree->ReadFile( datFileS );
385
386 Log() << kINFO << "Create TTree objects from ASCII input files ... \n- Signal file : \""
387 << datFileS << Endl;
388
389 // number of signal events (used to compute significance)
390 AddTree( signalTree, "Signal", weight, TCut(""), treetype );
391}
392
393////////////////////////////////////////////////////////////////////////////////
394
395void TMVA::DataLoader::AddSignalTree( TTree* signal, Double_t weight, const TString& treetype )
396{
397 AddTree( signal, "Signal", weight, TCut(""), treetype );
398}
399
400////////////////////////////////////////////////////////////////////////////////
401/// number of signal events (used to compute significance)
402
404{
405 AddTree( signal, "Background", weight, TCut(""), treetype );
406}
407
408////////////////////////////////////////////////////////////////////////////////
409/// add background tree from text file
410
412{
413 // create trees from these ascii files
414 TTree* bkgTree = new TTree( "TreeB", "Tree (B)" );
415 bkgTree->ReadFile( datFileB );
416
417 Log() << kINFO << "Create TTree objects from ASCII input files ... \n- Background file : \""
418 << datFileB << Endl;
419
420 // number of signal events (used to compute significance)
421 AddTree( bkgTree, "Background", weight, TCut(""), treetype );
422}
423
424////////////////////////////////////////////////////////////////////////////////
425
426void TMVA::DataLoader::AddBackgroundTree( TTree* signal, Double_t weight, const TString& treetype )
427{
428 AddTree( signal, "Background", weight, TCut(""), treetype );
429}
430
431////////////////////////////////////////////////////////////////////////////////
432
434{
435 AddTree( tree, "Signal", weight );
436}
437
438////////////////////////////////////////////////////////////////////////////////
439
441{
442 AddTree( tree, "Background", weight );
443}
444
445////////////////////////////////////////////////////////////////////////////////
446/// set background tree
447
448void TMVA::DataLoader::SetTree( TTree* tree, const TString& className, Double_t weight )
449{
450 AddTree( tree, className, weight, TCut(""), Types::kMaxTreeType );
451}
452
453////////////////////////////////////////////////////////////////////////////////
454/// define the input trees for signal and background; no cuts are applied
455
456void TMVA::DataLoader::SetInputTrees( TTree* signal, TTree* background,
457 Double_t signalWeight, Double_t backgroundWeight )
458{
459 AddTree( signal, "Signal", signalWeight, TCut(""), Types::kMaxTreeType );
460 AddTree( background, "Background", backgroundWeight, TCut(""), Types::kMaxTreeType );
461}
462
463////////////////////////////////////////////////////////////////////////////////
464
465void TMVA::DataLoader::SetInputTrees( const TString& datFileS, const TString& datFileB,
466 Double_t signalWeight, Double_t backgroundWeight )
467{
468 DataInput().AddTree( datFileS, "Signal", signalWeight );
469 DataInput().AddTree( datFileB, "Background", backgroundWeight );
470}
471
472////////////////////////////////////////////////////////////////////////////////
473/// define the input trees for signal and background from single input tree,
474/// containing both signal and background events distinguished by the type
475/// identifiers: SigCut and BgCut
476
477void TMVA::DataLoader::SetInputTrees( TTree* inputTree, const TCut& SigCut, const TCut& BgCut )
478{
479 AddTree( inputTree, "Signal", 1.0, SigCut, Types::kMaxTreeType );
480 AddTree( inputTree, "Background", 1.0, BgCut , Types::kMaxTreeType );
481}
482
483////////////////////////////////////////////////////////////////////////////////
484/// user inserts discriminating variable in data set info
485
486void TMVA::DataLoader::AddVariable( const TString& expression, const TString& title, const TString& unit,
487 char type, Double_t min, Double_t max )
488{
489 DefaultDataSetInfo().AddVariable( expression, title, unit, min, max, type );
490}
491
492////////////////////////////////////////////////////////////////////////////////
493/// user inserts discriminating variable in data set info
494
495void TMVA::DataLoader::AddVariable( const TString& expression, char type,
496 Double_t min, Double_t max )
497{
498 DefaultDataSetInfo().AddVariable( expression, "", "", min, max, type );
499}
500
501////////////////////////////////////////////////////////////////////////////////
502/// user inserts discriminating array of variables in data set info
503/// in case input tree provides an array of values
504
505void TMVA::DataLoader::AddVariablesArray(const TString &expression, int size, char type,
506 Double_t min, Double_t max)
507{
508 DefaultDataSetInfo().AddVariablesArray(expression, size, "", "", min, max, type);
509}
510////////////////////////////////////////////////////////////////////////////////
511/// user inserts target in data set info
512
513void TMVA::DataLoader::AddTarget( const TString& expression, const TString& title, const TString& unit,
514 Double_t min, Double_t max )
515{
516 if( fAnalysisType == Types::kNoAnalysisType )
517 fAnalysisType = Types::kRegression;
518
519 DefaultDataSetInfo().AddTarget( expression, title, unit, min, max );
520}
521
522////////////////////////////////////////////////////////////////////////////////
523/// user inserts target in data set info
524
525void TMVA::DataLoader::AddSpectator( const TString& expression, const TString& title, const TString& unit,
526 Double_t min, Double_t max )
527{
528 DefaultDataSetInfo().AddSpectator( expression, title, unit, min, max );
529}
530
531////////////////////////////////////////////////////////////////////////////////
532/// default creation
533
535{
536 return AddDataSet( fName );
537}
538
539////////////////////////////////////////////////////////////////////////////////
540/// fill input variables in data set
541
542void TMVA::DataLoader::SetInputVariables( std::vector<TString>* theVariables )
543{
544 for (std::vector<TString>::iterator it=theVariables->begin();
545 it!=theVariables->end(); ++it) AddVariable(*it);
546}
547
548////////////////////////////////////////////////////////////////////////////////
549
551{
552 DefaultDataSetInfo().SetWeightExpression(variable, "Signal");
553}
554
555////////////////////////////////////////////////////////////////////////////////
556
558{
559 DefaultDataSetInfo().SetWeightExpression(variable, "Background");
560}
561
562////////////////////////////////////////////////////////////////////////////////
563
564void TMVA::DataLoader::SetWeightExpression( const TString& variable, const TString& className )
565{
566 //Log() << kWarning << DefaultDataSetInfo().GetNClasses() /*fClasses.size()*/ << Endl;
567 if (className=="") {
568 SetSignalWeightExpression(variable);
569 SetBackgroundWeightExpression(variable);
570 }
571 else DefaultDataSetInfo().SetWeightExpression( variable, className );
572}
573
574////////////////////////////////////////////////////////////////////////////////
575
576void TMVA::DataLoader::SetCut( const TString& cut, const TString& className ) {
577 SetCut( TCut(cut), className );
578}
579
580////////////////////////////////////////////////////////////////////////////////
581
582void TMVA::DataLoader::SetCut( const TCut& cut, const TString& className )
583{
584 DefaultDataSetInfo().SetCut( cut, className );
585}
586
587////////////////////////////////////////////////////////////////////////////////
588
589void TMVA::DataLoader::AddCut( const TString& cut, const TString& className )
590{
591 AddCut( TCut(cut), className );
592}
593
594////////////////////////////////////////////////////////////////////////////////
595void TMVA::DataLoader::AddCut( const TCut& cut, const TString& className )
596{
597 DefaultDataSetInfo().AddCut( cut, className );
598}
599
600////////////////////////////////////////////////////////////////////////////////
601/// prepare the training and test trees
602
604 Int_t NsigTrain, Int_t NbkgTrain, Int_t NsigTest, Int_t NbkgTest,
605 const TString& otherOpt )
606{
607 SetInputTreesFromEventAssignTrees();
608
609 AddCut( cut );
610
611 DefaultDataSetInfo().SetSplitOptions( Form("nTrain_Signal=%i:nTrain_Background=%i:nTest_Signal=%i:nTest_Background=%i:%s",
612 NsigTrain, NbkgTrain, NsigTest, NbkgTest, otherOpt.Data()) );
613}
614
615////////////////////////////////////////////////////////////////////////////////
616/// prepare the training and test trees
617/// kept for backward compatibility
618
620{
621 SetInputTreesFromEventAssignTrees();
622
623 AddCut( cut );
624
625 DefaultDataSetInfo().SetSplitOptions( Form("nTrain_Signal=%i:nTrain_Background=%i:nTest_Signal=%i:nTest_Background=%i:SplitMode=Random:EqualTrainSample:!V",
626 Ntrain, Ntrain, Ntest, Ntest) );
627}
628
629////////////////////////////////////////////////////////////////////////////////
630/// prepare the training and test trees
631/// -> same cuts for signal and background
632
634{
635 SetInputTreesFromEventAssignTrees();
636
637 DefaultDataSetInfo().PrintClasses();
638 AddCut( cut );
639 DefaultDataSetInfo().SetSplitOptions( opt );
640}
641
642////////////////////////////////////////////////////////////////////////////////
643/// prepare the training and test trees
644
645void TMVA::DataLoader::PrepareTrainingAndTestTree( TCut sigcut, TCut bkgcut, const TString& splitOpt )
646{
647 // if event-wise data assignment, add local trees to dataset first
648 SetInputTreesFromEventAssignTrees();
649
650 //Log() << kINFO <<"Preparing trees for training and testing..."<< Endl;
651 AddCut( sigcut, "Signal" );
652 AddCut( bkgcut, "Background" );
653
654 DefaultDataSetInfo().SetSplitOptions( splitOpt );
655}
656
657////////////////////////////////////////////////////////////////////////////////
658/// Function required to split the training and testing datasets into a
659/// number of folds. Required by the CrossValidation and HyperParameterOptimisation
660/// classes. The option to split the training dataset into a training set and
661/// a validation set is implemented but not currently used.
662
664{
665 s.MakeKFoldDataSet( DefaultDataSetInfo() );
666}
667
668////////////////////////////////////////////////////////////////////////////////
669/// Function for assigning the correct folds to the testing or training set.
670
672{
673 s.PrepareFoldDataSet( DefaultDataSetInfo(), foldNumber, tt );
674}
675
676
677////////////////////////////////////////////////////////////////////////////////
678/// Recombines the dataset. The precise semantics depend on the actual split.
679///
680/// Similar to the inverse operation of `MakeKFoldDataSet` but _will_ differ.
681/// See documentation for each particular split for more information.
682///
683
685{
686 s.RecombineKFoldDataSet( DefaultDataSetInfo(), tt );
687}
688
689////////////////////////////////////////////////////////////////////////////////
690/// Copy method use in VI and CV
691
693{
695 DataLoaderCopy(des,this);
696 return des;
697}
698
699////////////////////////////////////////////////////////////////////////////////
700///Loading Dataset from DataInputHandler for subseed
701
703{
704 for( std::vector<TreeInfo>::const_iterator treeinfo=src->DataInput().Sbegin();treeinfo!=src->DataInput().Send();++treeinfo)
705 {
706 des->AddSignalTree( (*treeinfo).GetTree(), (*treeinfo).GetWeight(),(*treeinfo).GetTreeType());
707 }
708
709 for( std::vector<TreeInfo>::const_iterator treeinfo=src->DataInput().Bbegin();treeinfo!=src->DataInput().Bend();++treeinfo)
710 {
711 des->AddBackgroundTree( (*treeinfo).GetTree(), (*treeinfo).GetWeight(),(*treeinfo).GetTreeType());
712 }
713}
714
715////////////////////////////////////////////////////////////////////////////////
716/// returns the correlation matrix of datasets
717
719{
720 const TMatrixD * m = DefaultDataSetInfo().CorrelationMatrix(className);
721 return DefaultDataSetInfo().CreateCorrelationMatrixHist(m,
722 "CorrelationMatrix"+className, "Correlation Matrix ("+className+")");
723}
const Bool_t kFALSE
Definition: RtypesCore.h:90
double Double_t
Definition: RtypesCore.h:57
#define ClassImp(name)
Definition: Rtypes.h:361
char name[80]
Definition: TGX11.cxx:109
int type
Definition: TGX11.cxx:120
char * Form(const char *fmt,...)
A specialized string object used for TTree selections.
Definition: TCut.h:25
Service class for 2-Dim histogram classes.
Definition: TH2.h:30
Class that contains all the information of a class.
Definition: ClassInfo.h:49
UInt_t GetNumber() const
Definition: ClassInfo.h:65
MsgLogger * fLogger
Definition: Configurable.h:128
Class that contains all the data information.
std::vector< TreeInfo >::const_iterator Send() const
std::vector< TreeInfo >::const_iterator Sbegin() const
std::vector< TreeInfo >::const_iterator Bbegin() const
std::vector< TreeInfo >::const_iterator Bend() const
DataInputHandler * fDataInputHandler
Definition: DataLoader.h:190
TTree * CreateEventAssignTrees(const TString &name)
create the data assignment tree (for event-wise data assignment by user)
Definition: DataLoader.cxx:196
void AddVariablesArray(const TString &expression, int size, char type='F', Double_t min=0, Double_t max=0)
user inserts discriminating array of variables in data set info in case input tree provides an array ...
Definition: DataLoader.cxx:505
void SetBackgroundTree(TTree *background, Double_t weight=1.0)
Definition: DataLoader.cxx:440
void AddSignalTree(TTree *signal, Double_t weight=1.0, Types::ETreeType treetype=Types::kMaxTreeType)
number of signal events (used to compute significance)
Definition: DataLoader.cxx:372
DataSetInfo & AddDataSet(DataSetInfo &)
Definition: DataLoader.cxx:121
void AddSpectator(const TString &expression, const TString &title="", const TString &unit="", Double_t min=0, Double_t max=0)
user inserts target in data set info
Definition: DataLoader.cxx:525
void SetInputTreesFromEventAssignTrees()
assign event-wise local trees to data set
Definition: DataLoader.cxx:320
void AddTrainingEvent(const TString &className, const std::vector< Double_t > &event, Double_t weight)
add signal training event
Definition: DataLoader.cxx:261
void SetTree(TTree *tree, const TString &className, Double_t weight)
set background tree
Definition: DataLoader.cxx:448
void AddSignalTestEvent(const std::vector< Double_t > &event, Double_t weight=1.0)
add signal testing event
Definition: DataLoader.cxx:237
DataSetInfo & DefaultDataSetInfo()
default creation
Definition: DataLoader.cxx:534
void AddBackgroundTestEvent(const std::vector< Double_t > &event, Double_t weight=1.0)
add signal training event
Definition: DataLoader.cxx:253
DataSetManager * fDataSetManager
Definition: DataLoader.h:187
DataLoader * MakeCopy(TString name)
Copy method use in VI and CV.
Definition: DataLoader.cxx:692
void SetSignalWeightExpression(const TString &variable)
Definition: DataLoader.cxx:550
void MakeKFoldDataSet(CvSplit &s)
Function required to split the training and testing datasets into a number of folds.
Definition: DataLoader.cxx:663
void SetWeightExpression(const TString &variable, const TString &className="")
Definition: DataLoader.cxx:564
void AddBackgroundTrainingEvent(const std::vector< Double_t > &event, Double_t weight=1.0)
add signal training event
Definition: DataLoader.cxx:245
void RecombineKFoldDataSet(CvSplit &s, Types::ETreeType tt=Types::kTraining)
Recombines the dataset.
Definition: DataLoader.cxx:684
DataLoader * VarTransform(TString trafoDefinition)
Transforms the variables and return a new DataLoader with the transformed variables.
Definition: DataLoader.cxx:148
void SetBackgroundWeightExpression(const TString &variable)
Definition: DataLoader.cxx:557
void AddCut(const TString &cut, const TString &className="")
Definition: DataLoader.cxx:589
void AddEvent(const TString &className, Types::ETreeType tt, const std::vector< Double_t > &event, Double_t weight)
add event vector event : the order of values is: variables + targets + spectators
Definition: DataLoader.cxx:278
DataLoader(TString thedlName="default")
Definition: DataLoader.cxx:82
void PrepareTrainingAndTestTree(const TCut &cut, const TString &splitOpt)
prepare the training and test trees -> same cuts for signal and background
Definition: DataLoader.cxx:633
DataInputHandler & DataInput()
Definition: DataLoader.h:173
void AddBackgroundTree(TTree *background, Double_t weight=1.0, Types::ETreeType treetype=Types::kMaxTreeType)
number of signal events (used to compute significance)
Definition: DataLoader.cxx:403
DataSetInfo & GetDataSetInfo()
Definition: DataLoader.cxx:139
void AddTarget(const TString &expression, const TString &title="", const TString &unit="", Double_t min=0, Double_t max=0)
user inserts target in data set info
Definition: DataLoader.cxx:513
TH2 * GetCorrelationMatrix(const TString &className)
returns the correlation matrix of datasets
Definition: DataLoader.cxx:718
Bool_t UserAssignEvents(UInt_t clIndex)
Definition: DataLoader.cxx:312
void AddSignalTrainingEvent(const std::vector< Double_t > &event, Double_t weight=1.0)
add signal training event
Definition: DataLoader.cxx:229
void AddTestEvent(const TString &className, const std::vector< Double_t > &event, Double_t weight)
add signal test event
Definition: DataLoader.cxx:269
void SetSignalTree(TTree *signal, Double_t weight=1.0)
Definition: DataLoader.cxx:433
void SetInputTrees(const TString &signalFileName, const TString &backgroundFileName, Double_t signalWeight=1.0, Double_t backgroundWeight=1.0)
Definition: DataLoader.cxx:465
virtual ~DataLoader()
Definition: DataLoader.cxx:98
void AddTree(TTree *tree, const TString &className, Double_t weight=1.0, const TCut &cut="", Types::ETreeType tt=Types::kMaxTreeType)
Definition: DataLoader.cxx:352
void SetInputVariables(std::vector< TString > *theVariables)
fill input variables in data set
Definition: DataLoader.cxx:542
void SetCut(const TString &cut, const TString &className="")
Definition: DataLoader.cxx:576
void AddVariable(const TString &expression, const TString &title, const TString &unit, char type='F', Double_t min=0, Double_t max=0)
user inserts discriminating variable in data set info
Definition: DataLoader.cxx:486
void PrepareFoldDataSet(CvSplit &s, UInt_t foldNumber, Types::ETreeType tt=Types::kTraining)
Function for assigning the correct folds to the testing or training set.
Definition: DataLoader.cxx:671
Class that contains all the data information.
Definition: DataSetInfo.h:60
Class that contains all the data information.
void SetSource(const std::string &source)
Definition: MsgLogger.h:70
@ kMulticlass
Definition: Types.h:130
@ kNoAnalysisType
Definition: Types.h:131
@ kRegression
Definition: Types.h:129
@ kMaxTreeType
Definition: Types.h:146
@ kTraining
Definition: Types.h:144
@ kTesting
Definition: Types.h:145
TMVA::DataLoader * VarianceThreshold(Double_t threshold)
Computes variance of all the variables and returns a new DataLoader with the selected variables whose...
virtual void SetName(const char *name)
Set the name of the TNamed.
Definition: TNamed.cxx:140
Basic string class.
Definition: TString.h:131
void ToLower()
Change string to lower-case.
Definition: TString.cxx:1125
Double_t Atof() const
Return floating-point value contained in string.
Definition: TString.cxx:1987
Bool_t IsFloat() const
Returns kTRUE if string contains a floating point or integer number.
Definition: TString.cxx:1791
const char * Data() const
Definition: TString.h:364
TString & Remove(Ssiz_t pos)
Definition: TString.h:668
Bool_t Contains(const char *pat, ECaseCompare cmp=kExact) const
Definition: TString.h:619
Ssiz_t Index(const char *pat, Ssiz_t i=0, ECaseCompare cmp=kExact) const
Definition: TString.h:634
A TTree represents a columnar dataset.
Definition: TTree.h:78
virtual void SetDirectory(TDirectory *dir)
Change the tree's directory.
Definition: TTree.cxx:8813
TBranch * Branch(const char *name, T *obj, Int_t bufsize=32000, Int_t splitlevel=99)
Add a new branch, and infer the data type from the type of obj being passed.
Definition: TTree.h:348
virtual Long64_t ReadFile(const char *filename, const char *branchDescriptor="", char delimiter=' ')
Create or simply read branches from filename.
Definition: TTree.cxx:7467
static constexpr double s
void DataLoaderCopy(TMVA::DataLoader *des, TMVA::DataLoader *src)
MsgLogger & Endl(MsgLogger &ml)
Definition: MsgLogger.h:158
Double_t Log(Double_t x)
Definition: TMath.h:750
Definition: tree.py:1
auto * m
Definition: textangle.C:8
auto * tt
Definition: textangle.C:16