Logo ROOT  
Reference Guide
 
Loading...
Searching...
No Matches
DataLoader.h
Go to the documentation of this file.
1// @(#)root/tmva $Id$
2// Author: Andreas Hoecker, Peter Speckmayer, Joerg Stelzer, Helge Voss, Kai Voss, Eckhard von Toerne, Jan Therhaag, Omar Zapata, Lorenzo Moneta, Sergei Gleyzer
3//NOTE: Based on TMVA::Factory
4
5/**********************************************************************************
6 * Project: TMVA - a Root-integrated toolkit for multivariate data analysis *
7 * Package: TMVA *
8 * Class : DataLoader *
9 * *
10 * *
11 * Description: *
12 * This is a class to load datasets into every booked method *
13 * *
14 * Authors (alphabetical): *
15 * Lorenzo Moneta <Lorenzo.Moneta@cern.ch> - CERN, Switzerland *
16 * Omar Zapata <andresete.chaos@gmail.com> - ITM/UdeA, Colombia *
17 * Sergei Gleyzer<sergei.gleyzer@cern.ch> - CERN, Switzerland *
18 * *
19 * Copyright (c) 2005-2011: *
20 * CERN, Switzerland *
21 * ITM/UdeA, Colombia *
22 * *
23 * Redistribution and use in source and binary forms, with or without *
24 * modification, are permitted according to the terms listed in LICENSE *
25 * (see tmva/doc/LICENSE) *
26 **********************************************************************************/
27
28#ifndef ROOT_TMVA_DataLoader
29#define ROOT_TMVA_DataLoader
30
31#include <vector>
32#include "TCut.h"
33
34#include "TMVA/Configurable.h"
35#include "TMVA/Types.h"
36#include "TMVA/DataSet.h"
37
38class TFile;
39class TTree;
40class TH2;
41
42namespace TMVA {
43
44 class CvSplit;
45 class DataInputHandler;
46 class DataSetInfo;
47 class DataSetManager;
48 class VariableTransformBase;
49
50 class DataLoader : public Configurable {
51 public:
52
53 DataLoader(TString thedlName="default");
54
55 // default destructor
56 virtual ~DataLoader();
57
58
59 // add events to training and testing trees
60 void AddSignalTrainingEvent ( const std::vector<Double_t>& event, Double_t weight = 1.0 );
61 void AddBackgroundTrainingEvent( const std::vector<Double_t>& event, Double_t weight = 1.0 );
62 void AddSignalTestEvent ( const std::vector<Double_t>& event, Double_t weight = 1.0 );
63 void AddBackgroundTestEvent ( const std::vector<Double_t>& event, Double_t weight = 1.0 );
64 void AddTrainingEvent( const TString& className, const std::vector<Double_t>& event, Double_t weight );
65 void AddTestEvent ( const TString& className, const std::vector<Double_t>& event, Double_t weight );
66 void AddEvent ( const TString& className, Types::ETreeType tt, const std::vector<Double_t>& event, Double_t weight );
69
74
75 // special case: signal/background
76
77 // Data input related
80 void SetInputTrees( TTree* inputTree, const TCut& SigCut, const TCut& BgCut );
81 // Set input trees at once
84
87 void AddSignalTree( TTree* signal, Double_t weight, const TString& treetype );
88
89 // ... depreciated, kept for backwards compatibility
90 void SetSignalTree( TTree* signal, Double_t weight=1.0);
91
95
96 // ... depreciated, kept for backwards compatibility
97 void SetBackgroundTree( TTree* background, Double_t weight=1.0 );
98
99 void SetSignalWeightExpression( const TString& variable );
100 void SetBackgroundWeightExpression( const TString& variable );
101
102 // special case: regression
103 void AddRegressionTree( TTree* tree, Double_t weight = 1.0,
105 AddTree( tree, "Regression", weight, "", treetype );
106 }
107
108 // general
109
110 // Data input related
111 void SetTree( TTree* tree, const TString& className, Double_t weight ); ///< deprecated
112 void AddTree( TTree* tree, const TString& className, Double_t weight=1.0,
113 const TCut& cut = "",
115 void AddTree( TTree* tree, const TString& className, Double_t weight, const TCut& cut, const TString& treeType );
116
117 // set input variable
118 void SetInputVariables ( std::vector<TString>* theVariables ); ///< deprecated
119
120 void AddVariable ( const TString& expression, const TString& title, const TString& unit,
121 char type='F', Double_t min = 0, Double_t max = 0 );
122 void AddVariable ( const TString& expression, char type='F',
123 Double_t min = 0, Double_t max = 0 );
124
125 // NEW: add an array of variables (e.g. for image data) with the provided size
126 void AddVariablesArray(const TString &expression, int size, char type = 'F',
127 Double_t min = 0, Double_t max = 0);
128
129
130 void AddTarget ( const TString& expression, const TString& title = "", const TString& unit = "",
131 Double_t min = 0, Double_t max = 0 );
132 void AddRegressionTarget( const TString& expression, const TString& title = "", const TString& unit = "",
133 Double_t min = 0, Double_t max = 0 )
134 {
135 AddTarget( expression, title, unit, min, max );
136 }
137 void AddSpectator ( const TString& expression, const TString& title = "", const TString& unit = "",
138 Double_t min = 0, Double_t max = 0 );
139
140 // set weight for class
141 void SetWeightExpression( const TString& variable, const TString& className = "" );
142
143 // set cut for class
144 void SetCut( const TString& cut, const TString& className = "" );
145 void SetCut( const TCut& cut, const TString& className = "" );
146 void AddCut( const TString& cut, const TString& className = "" );
147 void AddCut( const TCut& cut, const TString& className = "" );
148
149
150 // prepare input tree for training
151 void PrepareTrainingAndTestTree( const TCut& cut, const TString& splitOpt );
153
154 // ... deprecated, kept for backwards compatibility
155 void PrepareTrainingAndTestTree( const TCut& cut, Int_t Ntrain, Int_t Ntest = -1 );
156
158 const TString& otherOpt="SplitMode=Random:!V" );
159
160 // Cross validation
161 void MakeKFoldDataSet(CvSplit & s);
164
166
167 TH2* GetCorrelationMatrix(const TString& className);
168
169 //Copy method use in VI and CV DEPRECATED: you can just call Clone DataLoader *dl2=(DataLoader *)dl1->Clone("dl2")
173
174 private:
175
176
179
180
181 private:
182
183 // data members
184
185
187
188
190
191 std::vector<TMVA::VariableTransformBase*> fDefaultTrfs; ///< list of transformations on default DataSet
192
193 // cd to local directory
194 TString fOptions; ///< option string given by construction (presently only "V")
195 TString fTransformations; ///< List of transformations to test
196 Bool_t fVerbose; ///< verbose mode
197
198 // flag determining the way training and test data are assigned to DataLoader
202 DataAssignType fDataAssignType; ///< flags for data assigning
203 std::vector<TTree*> fTrainAssignTree; ///< for each class: tmp tree if user wants to assign the events directly
204 std::vector<TTree*> fTestAssignTree; ///< for each class: tmp tree if user wants to assign the events directly
205
206 Int_t fATreeType = 0; ///< type of event (=classIndex)
207 Float_t fATreeWeight = 0.0; ///< weight of the event
208 std::vector<Float_t> fATreeEvent; ///< event variables
209
210 Types::EAnalysisType fAnalysisType; ///< the training type
211
212 protected:
213
215 };
217} // namespace TMVA
218
219#endif
220
size_t size(const MatrixT &matrix)
retrieve the size of a square matrix
unsigned int UInt_t
Definition RtypesCore.h:46
float Float_t
Definition RtypesCore.h:57
double Double_t
Definition RtypesCore.h:59
#define ClassDef(name, id)
Definition Rtypes.h:342
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void char Point_t Rectangle_t src
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void char Point_t Rectangle_t WindowAttributes_t Float_t Float_t Float_t Int_t Int_t UInt_t UInt_t Rectangle_t Int_t Int_t Window_t TString Int_t GCValues_t GetPrimarySelectionOwner GetDisplay GetScreen GetColormap GetNativeEvent const char const char dpyName wid window const char font_name cursor keysym reg const char only_if_exist regb h Point_t winding char text const char depth char const char Int_t count const char ColorStruct_t color const char Pixmap_t Pixmap_t PictureAttributes_t attr const char char ret_data h unsigned char height h Atom_t Int_t ULong_t ULong_t unsigned char prop_list Atom_t Atom_t Atom_t Time_t type
char name[80]
Definition TGX11.cxx:110
A specialized string object used for TTree selections.
Definition TCut.h:25
A ROOT file is an on-disk file, usually with extension .root, that stores objects in a file-system-li...
Definition TFile.h:53
Service class for 2-D histogram classes.
Definition TH2.h:30
Class that contains all the data information.
DataInputHandler * fDataInputHandler
->
Definition DataLoader.h:189
TTree * CreateEventAssignTrees(const TString &name)
create the data assignment tree (for event-wise data assignment by user)
void AddVariablesArray(const TString &expression, int size, char type='F', Double_t min=0, Double_t max=0)
user inserts discriminating array of variables in data set info in case input tree provides an array ...
Float_t fATreeWeight
weight of the event
Definition DataLoader.h:207
std::vector< TTree * > fTrainAssignTree
for each class: tmp tree if user wants to assign the events directly
Definition DataLoader.h:203
void SetBackgroundTree(TTree *background, Double_t weight=1.0)
void AddSignalTree(TTree *signal, Double_t weight=1.0, Types::ETreeType treetype=Types::kMaxTreeType)
number of signal events (used to compute significance)
DataSetInfo & AddDataSet(DataSetInfo &)
void AddSpectator(const TString &expression, const TString &title="", const TString &unit="", Double_t min=0, Double_t max=0)
user inserts target in data set info
void SetInputTreesFromEventAssignTrees()
assign event-wise local trees to data set
void AddTrainingEvent(const TString &className, const std::vector< Double_t > &event, Double_t weight)
add signal training event
void AddRegressionTree(TTree *tree, Double_t weight=1.0, Types::ETreeType treetype=Types::kMaxTreeType)
Definition DataLoader.h:103
std::vector< TMVA::VariableTransformBase * > fDefaultTrfs
list of transformations on default DataSet
Definition DataLoader.h:191
DataAssignType fDataAssignType
flags for data assigning
Definition DataLoader.h:202
void SetTree(TTree *tree, const TString &className, Double_t weight)
deprecated
void AddSignalTestEvent(const std::vector< Double_t > &event, Double_t weight=1.0)
add signal testing event
std::vector< Float_t > fATreeEvent
event variables
Definition DataLoader.h:208
DataSetInfo & DefaultDataSetInfo()
default creation
void AddBackgroundTestEvent(const std::vector< Double_t > &event, Double_t weight=1.0)
add signal training event
DataSetManager * fDataSetManager
Definition DataLoader.h:186
DataLoader * MakeCopy(TString name)
Copy method use in VI and CV.
void SetSignalWeightExpression(const TString &variable)
void MakeKFoldDataSet(CvSplit &s)
Function required to split the training and testing datasets into a number of folds.
void SetWeightExpression(const TString &variable, const TString &className="")
void AddBackgroundTrainingEvent(const std::vector< Double_t > &event, Double_t weight=1.0)
add signal training event
void RecombineKFoldDataSet(CvSplit &s, Types::ETreeType tt=Types::kTraining)
Recombines the dataset.
DataLoader * VarTransform(TString trafoDefinition)
Transforms the variables and return a new DataLoader with the transformed variables.
void SetBackgroundWeightExpression(const TString &variable)
void AddCut(const TString &cut, const TString &className="")
void AddEvent(const TString &className, Types::ETreeType tt, const std::vector< Double_t > &event, Double_t weight)
add event vector event : the order of values is: variables + targets + spectators
DataLoader(TString thedlName="default")
void PrepareTrainingAndTestTree(const TCut &cut, const TString &splitOpt)
prepare the training and test trees -> same cuts for signal and background
DataInputHandler & DataInput()
Definition DataLoader.h:172
void AddBackgroundTree(TTree *background, Double_t weight=1.0, Types::ETreeType treetype=Types::kMaxTreeType)
number of signal events (used to compute significance)
DataSetInfo & GetDataSetInfo()
void AddTarget(const TString &expression, const TString &title="", const TString &unit="", Double_t min=0, Double_t max=0)
user inserts target in data set info
TH2 * GetCorrelationMatrix(const TString &className)
returns the correlation matrix of datasets
friend void DataLoaderCopy(TMVA::DataLoader *des, TMVA::DataLoader *src)
Bool_t UserAssignEvents(UInt_t clIndex)
void AddSignalTrainingEvent(const std::vector< Double_t > &event, Double_t weight=1.0)
add signal training event
Bool_t fVerbose
verbose mode
Definition DataLoader.h:196
void AddRegressionTarget(const TString &expression, const TString &title="", const TString &unit="", Double_t min=0, Double_t max=0)
Definition DataLoader.h:132
void AddTestEvent(const TString &className, const std::vector< Double_t > &event, Double_t weight)
add signal test event
void SetSignalTree(TTree *signal, Double_t weight=1.0)
TString fTransformations
List of transformations to test.
Definition DataLoader.h:195
void SetInputTrees(const TString &signalFileName, const TString &backgroundFileName, Double_t signalWeight=1.0, Double_t backgroundWeight=1.0)
virtual ~DataLoader()
void AddTree(TTree *tree, const TString &className, Double_t weight=1.0, const TCut &cut="", Types::ETreeType tt=Types::kMaxTreeType)
const DataSetInfo & GetDefaultDataSetInfo()
Definition DataLoader.h:165
TString fOptions
option string given by construction (presently only "V")
Definition DataLoader.h:194
void SetInputVariables(std::vector< TString > *theVariables)
deprecated
Int_t fATreeType
type of event (=classIndex)
Definition DataLoader.h:206
std::vector< TTree * > fTestAssignTree
for each class: tmp tree if user wants to assign the events directly
Definition DataLoader.h:204
Types::EAnalysisType fAnalysisType
the training type
Definition DataLoader.h:210
void SetCut(const TString &cut, const TString &className="")
void AddVariable(const TString &expression, const TString &title, const TString &unit, char type='F', Double_t min=0, Double_t max=0)
user inserts discriminating variable in data set info
void PrepareFoldDataSet(CvSplit &s, UInt_t foldNumber, Types::ETreeType tt=Types::kTraining)
Function for assigning the correct folds to the testing or training set.
Class that contains all the data information.
Definition DataSetInfo.h:62
Class that contains all the data information.
@ kMaxTreeType
also used as temporary storage for trees not yet assigned for testing;training...
Definition Types.h:145
@ kTraining
Definition Types.h:143
Basic string class.
Definition TString.h:139
A TTree represents a columnar dataset.
Definition TTree.h:79
create variable transformations
void DataLoaderCopy(TMVA::DataLoader *des, TMVA::DataLoader *src)
auto * tt
Definition textangle.C:16