Logo ROOT   6.16/01
Reference Guide
DataLoader.cxx
Go to the documentation of this file.
1// @(#)root/tmva $Id$
2// Author: Omar Zapata
3// Mentors: Lorenzo Moneta, Sergei Gleyzer
4//NOTE: Based on TMVA::Factory
5
6/**********************************************************************************
7 * Project: TMVA - a Root-integrated toolkit for multivariate data analysis *
8 * Package: TMVA *
9 * Class : DataLoader *
10 * Web : http://tmva.sourceforge.net *
11 * *
12 * Description: *
13 * This is a class to load datasets into every booked method *
14 * *
15 * Authors (alphabetical): *
16 * Lorenzo Moneta <Lorenzo.Moneta@cern.ch> - CERN, Switzerland *
17 * Omar Zapata <Omar.Zapata@cern.ch> - ITM/UdeA, Colombia *
18 * Sergei Gleyzer<sergei.gleyzer@cern.ch> - CERN, Switzerland *
19 * *
20 * Copyright (c) 2005-2015: *
21 * CERN, Switzerland *
22 * ITM/UdeA, Colombia *
23 * *
24 * Redistribution and use in source and binary forms, with or without *
25 * modification, are permitted according to the terms listed in LICENSE *
26 * (http://tmva.sourceforge.net/LICENSE) *
27 **********************************************************************************/
28
29
30/*! \class TMVA::DataLoader
31\ingroup TMVA
32
33*/
34
35#include "TROOT.h"
36#include "TFile.h"
37#include "TTree.h"
38#include "TLeaf.h"
39#include "TEventList.h"
40#include "TH2.h"
41#include "TText.h"
42#include "TStyle.h"
43#include "TMatrixF.h"
44#include "TMatrixDSym.h"
45#include "TPaletteAxis.h"
46#include "TPrincipal.h"
47#include "TMath.h"
48#include "TObjString.h"
49#include "TRandom3.h"
50
51#include <string.h>
52
53#include "TMVA/Configurable.h"
54#include "TMVA/DataLoader.h"
55#include "TMVA/Config.h"
56#include "TMVA/CvSplit.h"
57#include "TMVA/Tools.h"
58#include "TMVA/Ranking.h"
59#include "TMVA/DataSet.h"
60#include "TMVA/IMethod.h"
61#include "TMVA/MethodBase.h"
63#include "TMVA/DataSetManager.h"
64#include "TMVA/DataSetInfo.h"
65#include "TMVA/MethodBoost.h"
66#include "TMVA/MethodCategory.h"
67
68#include "TMVA/VariableInfo.h"
75
76
80#include "TMVA/Types.h"
81
83
84
85////////////////////////////////////////////////////////////////////////////////
86
88: Configurable( ),
89 fDataSetManager ( NULL ), //DSMTEST
90 fDataInputHandler ( new DataInputHandler ),
91 fTransformations ( "I" ),
92 fVerbose ( kFALSE ),
93 fDataAssignType ( kAssignEvents ),
94 fATreeEvent (0)
95{
97 SetName(thedlName.Data());
98 fLogger->SetSource("DataLoader");
99}
100
101////////////////////////////////////////////////////////////////////////////////
102
104{
105 // destructor
106
107 std::vector<TMVA::VariableTransformBase*>::iterator trfIt = fDefaultTrfs.begin();
108 for (;trfIt != fDefaultTrfs.end(); ++trfIt) delete (*trfIt);
109
110 delete fDataInputHandler;
111
112 // destroy singletons
113 // DataSetManager::DestroyInstance(); // DSMTEST replaced by following line
114 delete fDataSetManager; // DSMTEST
115
116 // problem with call of REGISTER_METHOD macro ...
117 // ClassifierDataLoader::DestroyInstance();
118 // Types::DestroyInstance();
119 //Tools::DestroyInstance();
120 //Config::DestroyInstance();
121}
122
123
124////////////////////////////////////////////////////////////////////////////////
125
127{
128 return fDataSetManager->AddDataSetInfo(dsi); // DSMTEST
129}
130
131////////////////////////////////////////////////////////////////////////////////
132
134{
135 DataSetInfo* dsi = fDataSetManager->GetDataSetInfo(dsiName); // DSMTEST
136
137 if (dsi!=0) return *dsi;
138
139 return fDataSetManager->AddDataSetInfo(*(new DataSetInfo(dsiName))); // DSMTEST
140}
141
142////////////////////////////////////////////////////////////////////////////////
143
145{
146 return DefaultDataSetInfo(); // DSMTEST
147}
148
149////////////////////////////////////////////////////////////////////////////////
150/// Transforms the variables and return a new DataLoader with the transformed
151/// variables
152
154{
155 TString trOptions = "0";
156 TString trName = "None";
157 if (trafoDefinition.Contains("(")) {
158
159 // contains transformation parameters
160 Ssiz_t parStart = trafoDefinition.Index( "(" );
161 Ssiz_t parLen = trafoDefinition.Index( ")", parStart )-parStart+1;
162
163 trName = trafoDefinition(0,parStart);
164 trOptions = trafoDefinition(parStart,parLen);
165 trOptions.Remove(parLen-1,1);
166 trOptions.Remove(0,1);
167 }
168 else
169 trName = trafoDefinition;
170
171 VarTransformHandler* handler = new VarTransformHandler(this);
172 // variance threshold variable transformation
173 if (trName == "VT") {
174
175 // find threshold value from given input
176 Double_t threshold = 0.0;
177 if (!trOptions.IsFloat()){
178 Log() << kFATAL << " VT transformation must be passed a floating threshold value" << Endl;
179 delete handler;
180 return this;
181 }
182 else
183 threshold = trOptions.Atof();
184 TMVA::DataLoader *transformedLoader = handler->VarianceThreshold(threshold);
185 delete handler;
186 return transformedLoader;
187 }
188 else {
189 Log() << kFATAL << "Incorrect transformation string provided, please check" << Endl;
190 }
191 Log() << kINFO << "No transformation applied, returning original loader" << Endl;
192 return this;
193}
194
195////////////////////////////////////////////////////////////////////////////////
196// the next functions are to assign events directly
197
198////////////////////////////////////////////////////////////////////////////////
199/// create the data assignment tree (for event-wise data assignment by user)
200
202{
203 TTree * assignTree = new TTree( name, name );
204 assignTree->SetDirectory(0);
205 assignTree->Branch( "type", &fATreeType, "ATreeType/I" );
206 assignTree->Branch( "weight", &fATreeWeight, "ATreeWeight/F" );
207
208 std::vector<VariableInfo>& vars = DefaultDataSetInfo().GetVariableInfos();
209 std::vector<VariableInfo>& tgts = DefaultDataSetInfo().GetTargetInfos();
210 std::vector<VariableInfo>& spec = DefaultDataSetInfo().GetSpectatorInfos();
211
212 if (fATreeEvent.size()==0) fATreeEvent.resize(vars.size()+tgts.size()+spec.size());
213 // add variables
214 for (UInt_t ivar=0; ivar<vars.size(); ivar++) {
215 TString vname = vars[ivar].GetExpression();
216 assignTree->Branch( vname, &fATreeEvent[ivar], vname + "/F" );
217 }
218 // add targets
219 for (UInt_t itgt=0; itgt<tgts.size(); itgt++) {
220 TString vname = tgts[itgt].GetExpression();
221 assignTree->Branch( vname, &fATreeEvent[vars.size()+itgt], vname + "/F" );
222 }
223 // add spectators
224 for (UInt_t ispc=0; ispc<spec.size(); ispc++) {
225 TString vname = spec[ispc].GetExpression();
226 assignTree->Branch( vname, &fATreeEvent[vars.size()+tgts.size()+ispc], vname + "/F" );
227 }
228 return assignTree;
229}
230
231////////////////////////////////////////////////////////////////////////////////
232/// add signal training event
233
234void TMVA::DataLoader::AddSignalTrainingEvent( const std::vector<Double_t>& event, Double_t weight )
235{
236 AddEvent( "Signal", Types::kTraining, event, weight );
237}
238
239////////////////////////////////////////////////////////////////////////////////
240/// add signal testing event
241
242void TMVA::DataLoader::AddSignalTestEvent( const std::vector<Double_t>& event, Double_t weight )
243{
244 AddEvent( "Signal", Types::kTesting, event, weight );
245}
246
247////////////////////////////////////////////////////////////////////////////////
248/// add signal training event
249
250void TMVA::DataLoader::AddBackgroundTrainingEvent( const std::vector<Double_t>& event, Double_t weight )
251{
252 AddEvent( "Background", Types::kTraining, event, weight );
253}
254
255////////////////////////////////////////////////////////////////////////////////
256/// add signal training event
257
258void TMVA::DataLoader::AddBackgroundTestEvent( const std::vector<Double_t>& event, Double_t weight )
259{
260 AddEvent( "Background", Types::kTesting, event, weight );
261}
262
263////////////////////////////////////////////////////////////////////////////////
264/// add signal training event
265
266void TMVA::DataLoader::AddTrainingEvent( const TString& className, const std::vector<Double_t>& event, Double_t weight )
267{
268 AddEvent( className, Types::kTraining, event, weight );
269}
270
271////////////////////////////////////////////////////////////////////////////////
272/// add signal test event
273
274void TMVA::DataLoader::AddTestEvent( const TString& className, const std::vector<Double_t>& event, Double_t weight )
275{
276 AddEvent( className, Types::kTesting, event, weight );
277}
278
279////////////////////////////////////////////////////////////////////////////////
280/// add event
281/// vector event : the order of values is: variables + targets + spectators
282
284 const std::vector<Double_t>& event, Double_t weight )
285{
286 ClassInfo* theClass = DefaultDataSetInfo().AddClass(className); // returns class (creates it if necessary)
287 UInt_t clIndex = theClass->GetNumber();
288
289
290 // set analysistype to "kMulticlass" if more than two classes and analysistype == kNoAnalysisType
291 if( fAnalysisType == Types::kNoAnalysisType && DefaultDataSetInfo().GetNClasses() > 2 )
292 fAnalysisType = Types::kMulticlass;
293
294
295 if (clIndex>=fTrainAssignTree.size()) {
296 fTrainAssignTree.resize(clIndex+1, 0);
297 fTestAssignTree.resize(clIndex+1, 0);
298 }
299
300 if (fTrainAssignTree[clIndex]==0) { // does not exist yet
301 fTrainAssignTree[clIndex] = CreateEventAssignTrees( Form("TrainAssignTree_%s", className.Data()) );
302 fTestAssignTree[clIndex] = CreateEventAssignTrees( Form("TestAssignTree_%s", className.Data()) );
303 }
304
305 fATreeType = clIndex;
306 fATreeWeight = weight;
307 for (UInt_t ivar=0; ivar<event.size(); ivar++) fATreeEvent[ivar] = event[ivar];
308
309 if(tt==Types::kTraining) fTrainAssignTree[clIndex]->Fill();
310 else fTestAssignTree[clIndex]->Fill();
311
312}
313
314////////////////////////////////////////////////////////////////////////////////
315///
316
318{
319 return fTrainAssignTree[clIndex]!=0;
320}
321
322////////////////////////////////////////////////////////////////////////////////
323/// assign event-wise local trees to data set
324
326{
327 UInt_t size = fTrainAssignTree.size();
328 for(UInt_t i=0; i<size; i++) {
329 if(!UserAssignEvents(i)) continue;
330 const TString& className = DefaultDataSetInfo().GetClassInfo(i)->GetName();
331 SetWeightExpression( "weight", className );
332 AddTree(fTrainAssignTree[i], className, 1.0, TCut(""), Types::kTraining );
333 AddTree(fTestAssignTree[i], className, 1.0, TCut(""), Types::kTesting );
334 }
335}
336
337////////////////////////////////////////////////////////////////////////////////
338/// number of signal events (used to compute significance)
339
340void TMVA::DataLoader::AddTree( TTree* tree, const TString& className, Double_t weight,
341 const TCut& cut, const TString& treetype )
342{
344 TString tmpTreeType = treetype; tmpTreeType.ToLower();
345 if (tmpTreeType.Contains( "train" ) && tmpTreeType.Contains( "test" )) tt = Types::kMaxTreeType;
346 else if (tmpTreeType.Contains( "train" )) tt = Types::kTraining;
347 else if (tmpTreeType.Contains( "test" )) tt = Types::kTesting;
348 else {
349 Log() << kFATAL << "<AddTree> cannot interpret tree type: \"" << treetype
350 << "\" should be \"Training\" or \"Test\" or \"Training and Testing\"" << Endl;
351 }
352 AddTree( tree, className, weight, cut, tt );
353}
354
355////////////////////////////////////////////////////////////////////////////////
356
357void TMVA::DataLoader::AddTree( TTree* tree, const TString& className, Double_t weight,
358 const TCut& cut, Types::ETreeType tt )
359{
360 if(!tree)
361 Log() << kFATAL << "Tree does not exist (empty pointer)." << Endl;
362
363 DefaultDataSetInfo().AddClass( className );
364
365 // set analysistype to "kMulticlass" if more than two classes and analysistype == kNoAnalysisType
366 if( fAnalysisType == Types::kNoAnalysisType && DefaultDataSetInfo().GetNClasses() > 2 )
367 fAnalysisType = Types::kMulticlass;
368
369 Log() << kINFO<< "Add Tree " << tree->GetName() << " of type " << className
370 << " with " << tree->GetEntries() << " events" << Endl;
371 DataInput().AddTree( tree, className, weight, cut, tt );
372}
373
374////////////////////////////////////////////////////////////////////////////////
375/// number of signal events (used to compute significance)
376
378{
379 AddTree( signal, "Signal", weight, TCut(""), treetype );
380}
381
382////////////////////////////////////////////////////////////////////////////////
383/// add signal tree from text file
384
386{
387 // create trees from these ascii files
388 TTree* signalTree = new TTree( "TreeS", "Tree (S)" );
389 signalTree->ReadFile( datFileS );
390
391 Log() << kINFO << "Create TTree objects from ASCII input files ... \n- Signal file : \""
392 << datFileS << Endl;
393
394 // number of signal events (used to compute significance)
395 AddTree( signalTree, "Signal", weight, TCut(""), treetype );
396}
397
398////////////////////////////////////////////////////////////////////////////////
399
401{
402 AddTree( signal, "Signal", weight, TCut(""), treetype );
403}
404
405////////////////////////////////////////////////////////////////////////////////
406/// number of signal events (used to compute significance)
407
409{
410 AddTree( signal, "Background", weight, TCut(""), treetype );
411}
412
413////////////////////////////////////////////////////////////////////////////////
414/// add background tree from text file
415
417{
418 // create trees from these ascii files
419 TTree* bkgTree = new TTree( "TreeB", "Tree (B)" );
420 bkgTree->ReadFile( datFileB );
421
422 Log() << kINFO << "Create TTree objects from ASCII input files ... \n- Background file : \""
423 << datFileB << Endl;
424
425 // number of signal events (used to compute significance)
426 AddTree( bkgTree, "Background", weight, TCut(""), treetype );
427}
428
429////////////////////////////////////////////////////////////////////////////////
430
432{
433 AddTree( signal, "Background", weight, TCut(""), treetype );
434}
435
436////////////////////////////////////////////////////////////////////////////////
437
439{
440 AddTree( tree, "Signal", weight );
441}
442
443////////////////////////////////////////////////////////////////////////////////
444
446{
447 AddTree( tree, "Background", weight );
448}
449
450////////////////////////////////////////////////////////////////////////////////
451/// set background tree
452
453void TMVA::DataLoader::SetTree( TTree* tree, const TString& className, Double_t weight )
454{
455 AddTree( tree, className, weight, TCut(""), Types::kMaxTreeType );
456}
457
458////////////////////////////////////////////////////////////////////////////////
459/// define the input trees for signal and background; no cuts are applied
460
462 Double_t signalWeight, Double_t backgroundWeight )
463{
464 AddTree( signal, "Signal", signalWeight, TCut(""), Types::kMaxTreeType );
465 AddTree( background, "Background", backgroundWeight, TCut(""), Types::kMaxTreeType );
466}
467
468////////////////////////////////////////////////////////////////////////////////
469
470void TMVA::DataLoader::SetInputTrees( const TString& datFileS, const TString& datFileB,
471 Double_t signalWeight, Double_t backgroundWeight )
472{
473 DataInput().AddTree( datFileS, "Signal", signalWeight );
474 DataInput().AddTree( datFileB, "Background", backgroundWeight );
475}
476
477////////////////////////////////////////////////////////////////////////////////
478/// define the input trees for signal and background from single input tree,
479/// containing both signal and background events distinguished by the type
480/// identifiers: SigCut and BgCut
481
482void TMVA::DataLoader::SetInputTrees( TTree* inputTree, const TCut& SigCut, const TCut& BgCut )
483{
484 AddTree( inputTree, "Signal", 1.0, SigCut, Types::kMaxTreeType );
485 AddTree( inputTree, "Background", 1.0, BgCut , Types::kMaxTreeType );
486}
487
488////////////////////////////////////////////////////////////////////////////////
489/// user inserts discriminating variable in data set info
490
491void TMVA::DataLoader::AddVariable( const TString& expression, const TString& title, const TString& unit,
492 char type, Double_t min, Double_t max )
493{
494 DefaultDataSetInfo().AddVariable( expression, title, unit, min, max, type );
495}
496
497////////////////////////////////////////////////////////////////////////////////
498/// user inserts discriminating variable in data set info
499
500void TMVA::DataLoader::AddVariable( const TString& expression, char type,
501 Double_t min, Double_t max )
502{
503 DefaultDataSetInfo().AddVariable( expression, "", "", min, max, type );
504}
505
506////////////////////////////////////////////////////////////////////////////////
507/// user inserts target in data set info
508
509void TMVA::DataLoader::AddTarget( const TString& expression, const TString& title, const TString& unit,
510 Double_t min, Double_t max )
511{
512 if( fAnalysisType == Types::kNoAnalysisType )
513 fAnalysisType = Types::kRegression;
514
515 DefaultDataSetInfo().AddTarget( expression, title, unit, min, max );
516}
517
518////////////////////////////////////////////////////////////////////////////////
519/// user inserts target in data set info
520
521void TMVA::DataLoader::AddSpectator( const TString& expression, const TString& title, const TString& unit,
522 Double_t min, Double_t max )
523{
524 DefaultDataSetInfo().AddSpectator( expression, title, unit, min, max );
525}
526
527////////////////////////////////////////////////////////////////////////////////
528/// default creation
529
531{
532 return AddDataSet( fName );
533}
534
535////////////////////////////////////////////////////////////////////////////////
536/// fill input variables in data set
537
538void TMVA::DataLoader::SetInputVariables( std::vector<TString>* theVariables )
539{
540 for (std::vector<TString>::iterator it=theVariables->begin();
541 it!=theVariables->end(); ++it) AddVariable(*it);
542}
543
544////////////////////////////////////////////////////////////////////////////////
545
547{
548 DefaultDataSetInfo().SetWeightExpression(variable, "Signal");
549}
550
551////////////////////////////////////////////////////////////////////////////////
552
554{
555 DefaultDataSetInfo().SetWeightExpression(variable, "Background");
556}
557
558////////////////////////////////////////////////////////////////////////////////
559
560void TMVA::DataLoader::SetWeightExpression( const TString& variable, const TString& className )
561{
562 //Log() << kWarning << DefaultDataSetInfo().GetNClasses() /*fClasses.size()*/ << Endl;
563 if (className=="") {
564 SetSignalWeightExpression(variable);
565 SetBackgroundWeightExpression(variable);
566 }
567 else DefaultDataSetInfo().SetWeightExpression( variable, className );
568}
569
570////////////////////////////////////////////////////////////////////////////////
571
572void TMVA::DataLoader::SetCut( const TString& cut, const TString& className ) {
573 SetCut( TCut(cut), className );
574}
575
576////////////////////////////////////////////////////////////////////////////////
577
578void TMVA::DataLoader::SetCut( const TCut& cut, const TString& className )
579{
580 DefaultDataSetInfo().SetCut( cut, className );
581}
582
583////////////////////////////////////////////////////////////////////////////////
584
585void TMVA::DataLoader::AddCut( const TString& cut, const TString& className )
586{
587 AddCut( TCut(cut), className );
588}
589
590////////////////////////////////////////////////////////////////////////////////
591void TMVA::DataLoader::AddCut( const TCut& cut, const TString& className )
592{
593 DefaultDataSetInfo().AddCut( cut, className );
594}
595
596////////////////////////////////////////////////////////////////////////////////
597/// prepare the training and test trees
598
600 Int_t NsigTrain, Int_t NbkgTrain, Int_t NsigTest, Int_t NbkgTest,
601 const TString& otherOpt )
602{
603 SetInputTreesFromEventAssignTrees();
604
605 AddCut( cut );
606
607 DefaultDataSetInfo().SetSplitOptions( Form("nTrain_Signal=%i:nTrain_Background=%i:nTest_Signal=%i:nTest_Background=%i:%s",
608 NsigTrain, NbkgTrain, NsigTest, NbkgTest, otherOpt.Data()) );
609}
610
611////////////////////////////////////////////////////////////////////////////////
612/// prepare the training and test trees
613/// kept for backward compatibility
614
616{
617 SetInputTreesFromEventAssignTrees();
618
619 AddCut( cut );
620
621 DefaultDataSetInfo().SetSplitOptions( Form("nTrain_Signal=%i:nTrain_Background=%i:nTest_Signal=%i:nTest_Background=%i:SplitMode=Random:EqualTrainSample:!V",
622 Ntrain, Ntrain, Ntest, Ntest) );
623}
624
625////////////////////////////////////////////////////////////////////////////////
626/// prepare the training and test trees
627/// -> same cuts for signal and background
628
630{
631 SetInputTreesFromEventAssignTrees();
632
633 DefaultDataSetInfo().PrintClasses();
634 AddCut( cut );
635 DefaultDataSetInfo().SetSplitOptions( opt );
636}
637
638////////////////////////////////////////////////////////////////////////////////
639/// prepare the training and test trees
640
641void TMVA::DataLoader::PrepareTrainingAndTestTree( TCut sigcut, TCut bkgcut, const TString& splitOpt )
642{
643 // if event-wise data assignment, add local trees to dataset first
644 SetInputTreesFromEventAssignTrees();
645
646 //Log() << kINFO <<"Preparing trees for training and testing..."<< Endl;
647 AddCut( sigcut, "Signal" );
648 AddCut( bkgcut, "Background" );
649
650 DefaultDataSetInfo().SetSplitOptions( splitOpt );
651}
652
653////////////////////////////////////////////////////////////////////////////////
654/// Function required to split the training and testing datasets into a
655/// number of folds. Required by the CrossValidation and HyperParameterOptimisation
656/// classes. The option to split the training dataset into a training set and
657/// a validation set is implemented but not currently used.
658
660{
661 s.MakeKFoldDataSet( DefaultDataSetInfo() );
662}
663
664////////////////////////////////////////////////////////////////////////////////
665/// Function for assigning the correct folds to the testing or training set.
666
668{
669 s.PrepareFoldDataSet( DefaultDataSetInfo(), foldNumber, tt );
670}
671
672
673////////////////////////////////////////////////////////////////////////////////
674/// Recombines the dataset. The precise semantics depend on the actual split.
675///
676/// Similar to the inverse operation of `MakeKFoldDataSet` but _will_ differ.
677/// See documentation for each particular split for more information.
678///
679
681{
682 s.RecombineKFoldDataSet( DefaultDataSetInfo(), tt );
683}
684
685////////////////////////////////////////////////////////////////////////////////
686/// Copy method use in VI and CV
687
689{
691 DataLoaderCopy(des,this);
692 return des;
693}
694
695////////////////////////////////////////////////////////////////////////////////
696///Loading Dataset from DataInputHandler for subseed
697
699{
700 for( std::vector<TreeInfo>::const_iterator treeinfo=src->DataInput().Sbegin();treeinfo!=src->DataInput().Send();++treeinfo)
701 {
702 des->AddSignalTree( (*treeinfo).GetTree(), (*treeinfo).GetWeight(),(*treeinfo).GetTreeType());
703 }
704
705 for( std::vector<TreeInfo>::const_iterator treeinfo=src->DataInput().Bbegin();treeinfo!=src->DataInput().Bend();++treeinfo)
706 {
707 des->AddBackgroundTree( (*treeinfo).GetTree(), (*treeinfo).GetWeight(),(*treeinfo).GetTreeType());
708 }
709}
710
711////////////////////////////////////////////////////////////////////////////////
712/// returns the correlation matrix of datasets
713
715{
716 const TMatrixD * m = DefaultDataSetInfo().CorrelationMatrix(className);
717 return DefaultDataSetInfo().CreateCorrelationMatrixHist(m,
718 "CorrelationMatrix"+className, "Correlation Matrix ("+className+")");
719}
int Int_t
Definition: RtypesCore.h:41
int Ssiz_t
Definition: RtypesCore.h:63
unsigned int UInt_t
Definition: RtypesCore.h:42
const Bool_t kFALSE
Definition: RtypesCore.h:88
bool Bool_t
Definition: RtypesCore.h:59
double Double_t
Definition: RtypesCore.h:55
#define ClassImp(name)
Definition: Rtypes.h:363
int type
Definition: TGX11.cxx:120
char * Form(const char *fmt,...)
A specialized string object used for TTree selections.
Definition: TCut.h:25
Service class for 2-Dim histogram classes.
Definition: TH2.h:30
Class that contains all the information of a class.
Definition: ClassInfo.h:49
UInt_t GetNumber() const
Definition: ClassInfo.h:65
MsgLogger * fLogger
Definition: Configurable.h:128
Class that contains all the data information.
std::vector< TreeInfo >::const_iterator Send() const
std::vector< TreeInfo >::const_iterator Sbegin() const
std::vector< TreeInfo >::const_iterator Bbegin() const
std::vector< TreeInfo >::const_iterator Bend() const
DataInputHandler * fDataInputHandler
Definition: DataLoader.h:192
TTree * CreateEventAssignTrees(const TString &name)
create the data assignment tree (for event-wise data assignment by user)
Definition: DataLoader.cxx:201
void SetBackgroundTree(TTree *background, Double_t weight=1.0)
Definition: DataLoader.cxx:445
void AddSignalTree(TTree *signal, Double_t weight=1.0, Types::ETreeType treetype=Types::kMaxTreeType)
number of signal events (used to compute significance)
Definition: DataLoader.cxx:377
DataSetInfo & AddDataSet(DataSetInfo &)
Definition: DataLoader.cxx:126
void AddSpectator(const TString &expression, const TString &title="", const TString &unit="", Double_t min=0, Double_t max=0)
user inserts target in data set info
Definition: DataLoader.cxx:521
void SetInputTreesFromEventAssignTrees()
assign event-wise local trees to data set
Definition: DataLoader.cxx:325
void AddTrainingEvent(const TString &className, const std::vector< Double_t > &event, Double_t weight)
add signal training event
Definition: DataLoader.cxx:266
void SetTree(TTree *tree, const TString &className, Double_t weight)
set background tree
Definition: DataLoader.cxx:453
void AddSignalTestEvent(const std::vector< Double_t > &event, Double_t weight=1.0)
add signal testing event
Definition: DataLoader.cxx:242
DataSetInfo & DefaultDataSetInfo()
default creation
Definition: DataLoader.cxx:530
void AddBackgroundTestEvent(const std::vector< Double_t > &event, Double_t weight=1.0)
add signal training event
Definition: DataLoader.cxx:258
DataSetManager * fDataSetManager
Definition: DataLoader.h:189
DataLoader * MakeCopy(TString name)
Copy method use in VI and CV.
Definition: DataLoader.cxx:688
void SetSignalWeightExpression(const TString &variable)
Definition: DataLoader.cxx:546
void MakeKFoldDataSet(CvSplit &s)
Function required to split the training and testing datasets into a number of folds.
Definition: DataLoader.cxx:659
void SetWeightExpression(const TString &variable, const TString &className="")
Definition: DataLoader.cxx:560
void AddBackgroundTrainingEvent(const std::vector< Double_t > &event, Double_t weight=1.0)
add signal training event
Definition: DataLoader.cxx:250
void RecombineKFoldDataSet(CvSplit &s, Types::ETreeType tt=Types::kTraining)
Recombines the dataset.
Definition: DataLoader.cxx:680
DataLoader * VarTransform(TString trafoDefinition)
Transforms the variables and return a new DataLoader with the transformed variables.
Definition: DataLoader.cxx:153
void SetBackgroundWeightExpression(const TString &variable)
Definition: DataLoader.cxx:553
void AddCut(const TString &cut, const TString &className="")
Definition: DataLoader.cxx:585
void AddEvent(const TString &className, Types::ETreeType tt, const std::vector< Double_t > &event, Double_t weight)
add event vector event : the order of values is: variables + targets + spectators
Definition: DataLoader.cxx:283
DataLoader(TString thedlName="default")
Definition: DataLoader.cxx:87
void PrepareTrainingAndTestTree(const TCut &cut, const TString &splitOpt)
prepare the training and test trees -> same cuts for signal and background
Definition: DataLoader.cxx:629
DataInputHandler & DataInput()
Definition: DataLoader.h:175
void AddBackgroundTree(TTree *background, Double_t weight=1.0, Types::ETreeType treetype=Types::kMaxTreeType)
number of signal events (used to compute significance)
Definition: DataLoader.cxx:408
DataSetInfo & GetDataSetInfo()
Definition: DataLoader.cxx:144
void AddTarget(const TString &expression, const TString &title="", const TString &unit="", Double_t min=0, Double_t max=0)
user inserts target in data set info
Definition: DataLoader.cxx:509
TH2 * GetCorrelationMatrix(const TString &className)
returns the correlation matrix of datasets
Definition: DataLoader.cxx:714
Bool_t UserAssignEvents(UInt_t clIndex)
Definition: DataLoader.cxx:317
void AddSignalTrainingEvent(const std::vector< Double_t > &event, Double_t weight=1.0)
add signal training event
Definition: DataLoader.cxx:234
void AddTestEvent(const TString &className, const std::vector< Double_t > &event, Double_t weight)
add signal test event
Definition: DataLoader.cxx:274
void SetSignalTree(TTree *signal, Double_t weight=1.0)
Definition: DataLoader.cxx:438
void SetInputTrees(const TString &signalFileName, const TString &backgroundFileName, Double_t signalWeight=1.0, Double_t backgroundWeight=1.0)
Definition: DataLoader.cxx:470
virtual ~DataLoader()
Definition: DataLoader.cxx:103
void AddTree(TTree *tree, const TString &className, Double_t weight=1.0, const TCut &cut="", Types::ETreeType tt=Types::kMaxTreeType)
Definition: DataLoader.cxx:357
void SetInputVariables(std::vector< TString > *theVariables)
fill input variables in data set
Definition: DataLoader.cxx:538
void SetCut(const TString &cut, const TString &className="")
Definition: DataLoader.cxx:572
void AddVariable(const TString &expression, const TString &title, const TString &unit, char type='F', Double_t min=0, Double_t max=0)
user inserts discriminating variable in data set info
Definition: DataLoader.cxx:491
void PrepareFoldDataSet(CvSplit &s, UInt_t foldNumber, Types::ETreeType tt=Types::kTraining)
Function for assigning the correct folds to the testing or training set.
Definition: DataLoader.cxx:667
Class that contains all the data information.
Definition: DataSetInfo.h:60
Class that contains all the data information.
void SetSource(const std::string &source)
Definition: MsgLogger.h:70
@ kMulticlass
Definition: Types.h:130
@ kNoAnalysisType
Definition: Types.h:131
@ kRegression
Definition: Types.h:129
@ kMaxTreeType
Definition: Types.h:146
@ kTraining
Definition: Types.h:144
@ kTesting
Definition: Types.h:145
TMVA::DataLoader * VarianceThreshold(Double_t threshold)
Computes variance of all the variables and returns a new DataLoader with the selected variables whose...
virtual void SetName(const char *name)
Set the name of the TNamed.
Definition: TNamed.cxx:140
Basic string class.
Definition: TString.h:131
void ToLower()
Change string to lower-case.
Definition: TString.cxx:1100
Double_t Atof() const
Return floating-point value contained in string.
Definition: TString.cxx:1962
Bool_t IsFloat() const
Returns kTRUE if string contains a floating point or integer number.
Definition: TString.cxx:1766
const char * Data() const
Definition: TString.h:364
TString & Remove(Ssiz_t pos)
Definition: TString.h:668
Bool_t Contains(const char *pat, ECaseCompare cmp=kExact) const
Definition: TString.h:619
Ssiz_t Index(const char *pat, Ssiz_t i=0, ECaseCompare cmp=kExact) const
Definition: TString.h:634
A TTree object has a header with a name and a title.
Definition: TTree.h:71
virtual void SetDirectory(TDirectory *dir)
Change the tree's directory.
Definition: TTree.cxx:8590
virtual Long64_t ReadFile(const char *filename, const char *branchDescriptor="", char delimiter=' ')
Create or simply read branches from filename.
Definition: TTree.cxx:7274
virtual Int_t Branch(TCollection *list, Int_t bufsize=32000, Int_t splitlevel=99, const char *name="")
Create one branch for each element in the collection.
Definition: TTree.cxx:1722
static constexpr double s
void DataLoaderCopy(TMVA::DataLoader *des, TMVA::DataLoader *src)
MsgLogger & Endl(MsgLogger &ml)
Definition: MsgLogger.h:158
Double_t Log(Double_t x)
Definition: TMath.h:748
Definition: tree.py:1
auto * m
Definition: textangle.C:8
auto * tt
Definition: textangle.C:16