Logo ROOT  
Reference Guide
DataLoader.h
Go to the documentation of this file.
1// @(#)root/tmva $Id$
2// Author: Andreas Hoecker, Peter Speckmayer, Joerg Stelzer, Helge Voss, Kai Voss, Eckhard von Toerne, Jan Therhaag, Omar Zapata, Lorenzo Moneta, Sergei Gleyzer
3//NOTE: Based on TMVA::Factory
4
5/**********************************************************************************
6 * Project: TMVA - a Root-integrated toolkit for multivariate data analysis *
7 * Package: TMVA *
8 * Class : DataLoader *
9 * Web : http://tmva.sourceforge.net *
10 * *
11 * Description: *
12 * This is a class to load datasets into every booked method *
13 * *
14 * Authors (alphabetical): *
15 * Lorenzo Moneta <Lorenzo.Moneta@cern.ch> - CERN, Switzerland *
16 * Omar Zapata <andresete.chaos@gmail.com> - ITM/UdeA, Colombia *
17 * Sergei Gleyzer<sergei.gleyzer@cern.ch> - CERN, Switzerland *
18 * *
19 * Copyright (c) 2005-2011: *
20 * CERN, Switzerland *
21 * ITM/UdeA, Colombia *
22 * *
23 * Redistribution and use in source and binary forms, with or without *
24 * modification, are permitted according to the terms listed in LICENSE *
25 * (http://tmva.sourceforge.net/LICENSE) *
26 **********************************************************************************/
27
28#ifndef ROOT_TMVA_DataLoader
29#define ROOT_TMVA_DataLoader
30
31#include <string>
32#include <vector>
33#include "TCut.h"
34
35#include "TMVA/Configurable.h"
36#include "TMVA/Types.h"
37#include "TMVA/DataSet.h"
38
39class TFile;
40class TTree;
41class TH2;
42
43namespace TMVA {
44
45 class CvSplit;
46 class DataInputHandler;
47 class DataSetInfo;
48 class DataSetManager;
49 class VariableTransformBase;
50
51 class DataLoader : public Configurable {
52 public:
53
54 DataLoader(TString thedlName="default");
55
56 // default destructor
57 virtual ~DataLoader();
58
59
60 // add events to training and testing trees
61 void AddSignalTrainingEvent ( const std::vector<Double_t>& event, Double_t weight = 1.0 );
62 void AddBackgroundTrainingEvent( const std::vector<Double_t>& event, Double_t weight = 1.0 );
63 void AddSignalTestEvent ( const std::vector<Double_t>& event, Double_t weight = 1.0 );
64 void AddBackgroundTestEvent ( const std::vector<Double_t>& event, Double_t weight = 1.0 );
65 void AddTrainingEvent( const TString& className, const std::vector<Double_t>& event, Double_t weight );
66 void AddTestEvent ( const TString& className, const std::vector<Double_t>& event, Double_t weight );
67 void AddEvent ( const TString& className, Types::ETreeType tt, const std::vector<Double_t>& event, Double_t weight );
70
74 DataLoader* VarTransform(TString trafoDefinition);
75
76 // special case: signal/background
77
78 // Data input related
79 void SetInputTrees( const TString& signalFileName, const TString& backgroundFileName,
80 Double_t signalWeight=1.0, Double_t backgroundWeight=1.0 );
81 void SetInputTrees( TTree* inputTree, const TCut& SigCut, const TCut& BgCut );
82 // Set input trees at once
83 void SetInputTrees( TTree* signal, TTree* background,
84 Double_t signalWeight=1.0, Double_t backgroundWeight=1.0) ;
85
86 void AddSignalTree( TTree* signal, Double_t weight=1.0, Types::ETreeType treetype = Types::kMaxTreeType );
87 void AddSignalTree( TString datFileS, Double_t weight=1.0, Types::ETreeType treetype = Types::kMaxTreeType );
88 void AddSignalTree( TTree* signal, Double_t weight, const TString& treetype );
89
90 // ... depreciated, kept for backwards compatibility
91 void SetSignalTree( TTree* signal, Double_t weight=1.0);
92
93 void AddBackgroundTree( TTree* background, Double_t weight=1.0, Types::ETreeType treetype = Types::kMaxTreeType );
94 void AddBackgroundTree( TString datFileB, Double_t weight=1.0, Types::ETreeType treetype = Types::kMaxTreeType );
95 void AddBackgroundTree( TTree* background, Double_t weight, const TString & treetype );
96
97 // ... depreciated, kept for backwards compatibility
98 void SetBackgroundTree( TTree* background, Double_t weight=1.0 );
99
100 void SetSignalWeightExpression( const TString& variable );
101 void SetBackgroundWeightExpression( const TString& variable );
102
103 // special case: regression
104 void AddRegressionTree( TTree* tree, Double_t weight = 1.0,
106 AddTree( tree, "Regression", weight, "", treetype );
107 }
108
109 // general
110
111 // Data input related
112 void SetTree( TTree* tree, const TString& className, Double_t weight ); // depreciated
113 void AddTree( TTree* tree, const TString& className, Double_t weight=1.0,
114 const TCut& cut = "",
116 void AddTree( TTree* tree, const TString& className, Double_t weight, const TCut& cut, const TString& treeType );
117
118 // set input variable
119 void SetInputVariables ( std::vector<TString>* theVariables ); // deprecated
120
121 void AddVariable ( const TString& expression, const TString& title, const TString& unit,
122 char type='F', Double_t min = 0, Double_t max = 0 );
123 void AddVariable ( const TString& expression, char type='F',
124 Double_t min = 0, Double_t max = 0 );
125
126 // NEW: add an array of variables (e.g. for image data) with the provided size
127 void AddVariablesArray(const TString &expression, int size, char type = 'F',
128 Double_t min = 0, Double_t max = 0);
129
130
131 void AddTarget ( const TString& expression, const TString& title = "", const TString& unit = "",
132 Double_t min = 0, Double_t max = 0 );
133 void AddRegressionTarget( const TString& expression, const TString& title = "", const TString& unit = "",
134 Double_t min = 0, Double_t max = 0 )
135 {
136 AddTarget( expression, title, unit, min, max );
137 }
138 void AddSpectator ( const TString& expression, const TString& title = "", const TString& unit = "",
139 Double_t min = 0, Double_t max = 0 );
140
141 // set weight for class
142 void SetWeightExpression( const TString& variable, const TString& className = "" );
143
144 // set cut for class
145 void SetCut( const TString& cut, const TString& className = "" );
146 void SetCut( const TCut& cut, const TString& className = "" );
147 void AddCut( const TString& cut, const TString& className = "" );
148 void AddCut( const TCut& cut, const TString& className = "" );
149
150
151 // prepare input tree for training
152 void PrepareTrainingAndTestTree( const TCut& cut, const TString& splitOpt );
153 void PrepareTrainingAndTestTree( TCut sigcut, TCut bkgcut, const TString& splitOpt );
154
155 // ... deprecated, kept for backwards compatibility
156 void PrepareTrainingAndTestTree( const TCut& cut, Int_t Ntrain, Int_t Ntest = -1 );
157
158 void PrepareTrainingAndTestTree( const TCut& cut, Int_t NsigTrain, Int_t NbkgTrain, Int_t NsigTest, Int_t NbkgTest,
159 const TString& otherOpt="SplitMode=Random:!V" );
160
161 // Cross validation
162 void MakeKFoldDataSet(CvSplit & s);
165
167
168 TH2* GetCorrelationMatrix(const TString& className);
169
170 //Copy method use in VI and CV DEPRECATED: you can just call Clone DataLoader *dl2=(DataLoader *)dl1->Clone("dl2")
174
175 private:
176
177
180
181
182 private:
183
184 // data members
185
186
188
189
191
192 std::vector<TMVA::VariableTransformBase*> fDefaultTrfs; // list of transformations on default DataSet
193
194 // cd to local directory
195 TString fOptions; // option string given by construction (presently only "V")
196 TString fTransformations; // List of transformations to test
197 Bool_t fVerbose; // verbose mode
198
199 // flag determining the way training and test data are assigned to DataLoader
203 DataAssignType fDataAssignType; // flags for data assigning
204 std::vector<TTree*> fTrainAssignTree; // for each class: tmp tree if user wants to assign the events directly
205 std::vector<TTree*> fTestAssignTree; // for each class: tmp tree if user wants to assign the events directly
206
207 Int_t fATreeType = 0; // type of event (=classIndex)
208 Float_t fATreeWeight = 0.0; // weight of the event
209 std::vector<Float_t> fATreeEvent; // event variables
210
212
213 protected:
214
216 };
218} // namespace TMVA
219
220#endif
221
unsigned int UInt_t
Definition: RtypesCore.h:44
double Double_t
Definition: RtypesCore.h:57
float Float_t
Definition: RtypesCore.h:55
#define ClassDef(name, id)
Definition: Rtypes.h:322
char name[80]
Definition: TGX11.cxx:109
int type
Definition: TGX11.cxx:120
A specialized string object used for TTree selections.
Definition: TCut.h:25
A ROOT file is a suite of consecutive data records (TKey instances) with a well defined format.
Definition: TFile.h:53
Service class for 2-Dim histogram classes.
Definition: TH2.h:30
Class that contains all the data information.
DataInputHandler * fDataInputHandler
Definition: DataLoader.h:190
TTree * CreateEventAssignTrees(const TString &name)
create the data assignment tree (for event-wise data assignment by user)
Definition: DataLoader.cxx:196
void AddVariablesArray(const TString &expression, int size, char type='F', Double_t min=0, Double_t max=0)
user inserts discriminating array of variables in data set info in case input tree provides an array ...
Definition: DataLoader.cxx:505
Float_t fATreeWeight
Definition: DataLoader.h:208
std::vector< TTree * > fTrainAssignTree
Definition: DataLoader.h:204
void SetBackgroundTree(TTree *background, Double_t weight=1.0)
Definition: DataLoader.cxx:440
void AddSignalTree(TTree *signal, Double_t weight=1.0, Types::ETreeType treetype=Types::kMaxTreeType)
number of signal events (used to compute significance)
Definition: DataLoader.cxx:372
DataSetInfo & AddDataSet(DataSetInfo &)
Definition: DataLoader.cxx:121
void AddSpectator(const TString &expression, const TString &title="", const TString &unit="", Double_t min=0, Double_t max=0)
user inserts target in data set info
Definition: DataLoader.cxx:525
void SetInputTreesFromEventAssignTrees()
assign event-wise local trees to data set
Definition: DataLoader.cxx:320
void AddTrainingEvent(const TString &className, const std::vector< Double_t > &event, Double_t weight)
add signal training event
Definition: DataLoader.cxx:261
void AddRegressionTree(TTree *tree, Double_t weight=1.0, Types::ETreeType treetype=Types::kMaxTreeType)
Definition: DataLoader.h:104
std::vector< TMVA::VariableTransformBase * > fDefaultTrfs
Definition: DataLoader.h:192
DataAssignType fDataAssignType
Definition: DataLoader.h:203
void SetTree(TTree *tree, const TString &className, Double_t weight)
set background tree
Definition: DataLoader.cxx:448
void AddSignalTestEvent(const std::vector< Double_t > &event, Double_t weight=1.0)
add signal testing event
Definition: DataLoader.cxx:237
std::vector< Float_t > fATreeEvent
Definition: DataLoader.h:209
DataSetInfo & DefaultDataSetInfo()
default creation
Definition: DataLoader.cxx:534
void AddBackgroundTestEvent(const std::vector< Double_t > &event, Double_t weight=1.0)
add signal training event
Definition: DataLoader.cxx:253
DataSetManager * fDataSetManager
Definition: DataLoader.h:187
DataLoader * MakeCopy(TString name)
Copy method use in VI and CV.
Definition: DataLoader.cxx:692
void SetSignalWeightExpression(const TString &variable)
Definition: DataLoader.cxx:550
void MakeKFoldDataSet(CvSplit &s)
Function required to split the training and testing datasets into a number of folds.
Definition: DataLoader.cxx:663
void SetWeightExpression(const TString &variable, const TString &className="")
Definition: DataLoader.cxx:564
void AddBackgroundTrainingEvent(const std::vector< Double_t > &event, Double_t weight=1.0)
add signal training event
Definition: DataLoader.cxx:245
void RecombineKFoldDataSet(CvSplit &s, Types::ETreeType tt=Types::kTraining)
Recombines the dataset.
Definition: DataLoader.cxx:684
DataLoader * VarTransform(TString trafoDefinition)
Transforms the variables and return a new DataLoader with the transformed variables.
Definition: DataLoader.cxx:148
void SetBackgroundWeightExpression(const TString &variable)
Definition: DataLoader.cxx:557
void AddCut(const TString &cut, const TString &className="")
Definition: DataLoader.cxx:589
void AddEvent(const TString &className, Types::ETreeType tt, const std::vector< Double_t > &event, Double_t weight)
add event vector event : the order of values is: variables + targets + spectators
Definition: DataLoader.cxx:278
DataLoader(TString thedlName="default")
Definition: DataLoader.cxx:82
void PrepareTrainingAndTestTree(const TCut &cut, const TString &splitOpt)
prepare the training and test trees -> same cuts for signal and background
Definition: DataLoader.cxx:633
DataInputHandler & DataInput()
Definition: DataLoader.h:173
void AddBackgroundTree(TTree *background, Double_t weight=1.0, Types::ETreeType treetype=Types::kMaxTreeType)
number of signal events (used to compute significance)
Definition: DataLoader.cxx:403
DataSetInfo & GetDataSetInfo()
Definition: DataLoader.cxx:139
void AddTarget(const TString &expression, const TString &title="", const TString &unit="", Double_t min=0, Double_t max=0)
user inserts target in data set info
Definition: DataLoader.cxx:513
TH2 * GetCorrelationMatrix(const TString &className)
returns the correlation matrix of datasets
Definition: DataLoader.cxx:718
friend void DataLoaderCopy(TMVA::DataLoader *des, TMVA::DataLoader *src)
Bool_t UserAssignEvents(UInt_t clIndex)
Definition: DataLoader.cxx:312
void AddSignalTrainingEvent(const std::vector< Double_t > &event, Double_t weight=1.0)
add signal training event
Definition: DataLoader.cxx:229
void AddRegressionTarget(const TString &expression, const TString &title="", const TString &unit="", Double_t min=0, Double_t max=0)
Definition: DataLoader.h:133
void AddTestEvent(const TString &className, const std::vector< Double_t > &event, Double_t weight)
add signal test event
Definition: DataLoader.cxx:269
void SetSignalTree(TTree *signal, Double_t weight=1.0)
Definition: DataLoader.cxx:433
TString fTransformations
Definition: DataLoader.h:196
void SetInputTrees(const TString &signalFileName, const TString &backgroundFileName, Double_t signalWeight=1.0, Double_t backgroundWeight=1.0)
Definition: DataLoader.cxx:465
virtual ~DataLoader()
Definition: DataLoader.cxx:98
void AddTree(TTree *tree, const TString &className, Double_t weight=1.0, const TCut &cut="", Types::ETreeType tt=Types::kMaxTreeType)
Definition: DataLoader.cxx:352
const DataSetInfo & GetDefaultDataSetInfo()
Definition: DataLoader.h:166
TString fOptions
Definition: DataLoader.h:195
void SetInputVariables(std::vector< TString > *theVariables)
fill input variables in data set
Definition: DataLoader.cxx:542
std::vector< TTree * > fTestAssignTree
Definition: DataLoader.h:205
Types::EAnalysisType fAnalysisType
Definition: DataLoader.h:211
void SetCut(const TString &cut, const TString &className="")
Definition: DataLoader.cxx:576
void AddVariable(const TString &expression, const TString &title, const TString &unit, char type='F', Double_t min=0, Double_t max=0)
user inserts discriminating variable in data set info
Definition: DataLoader.cxx:486
void PrepareFoldDataSet(CvSplit &s, UInt_t foldNumber, Types::ETreeType tt=Types::kTraining)
Function for assigning the correct folds to the testing or training set.
Definition: DataLoader.cxx:671
Class that contains all the data information.
Definition: DataSetInfo.h:60
Class that contains all the data information.
EAnalysisType
Definition: Types.h:127
@ kMaxTreeType
Definition: Types.h:146
@ kTraining
Definition: Types.h:144
Basic string class.
Definition: TString.h:131
A TTree represents a columnar dataset.
Definition: TTree.h:78
static constexpr double s
create variable transformations
void DataLoaderCopy(TMVA::DataLoader *des, TMVA::DataLoader *src)
Definition: tree.py:1
auto * tt
Definition: textangle.C:16