Logo ROOT  
Reference Guide
DataLoader.h
Go to the documentation of this file.
1 // @(#)root/tmva $Id$
2 // Author: Andreas Hoecker, Peter Speckmayer, Joerg Stelzer, Helge Voss, Kai Voss, Eckhard von Toerne, Jan Therhaag, Omar Zapata, Lorenzo Moneta, Sergei Gleyzer
3 //NOTE: Based on TMVA::Factory
4 
5 /**********************************************************************************
6  * Project: TMVA - a Root-integrated toolkit for multivariate data analysis *
7  * Package: TMVA *
8  * Class : DataLoader *
9  * Web : http://tmva.sourceforge.net *
10  * *
11  * Description: *
12  * This is a class to load datasets into every booked method *
13  * *
14  * Authors (alphabetical): *
15  * Lorenzo Moneta <Lorenzo.Moneta@cern.ch> - CERN, Switzerland *
16  * Omar Zapata <andresete.chaos@gmail.com> - ITM/UdeA, Colombia *
17  * Sergei Gleyzer<sergei.gleyzer@cern.ch> - CERN, Switzerland *
18  * *
19  * Copyright (c) 2005-2011: *
20  * CERN, Switzerland *
21  * ITM/UdeA, Colombia *
22  * *
23  * Redistribution and use in source and binary forms, with or without *
24  * modification, are permitted according to the terms listed in LICENSE *
25  * (http://tmva.sourceforge.net/LICENSE) *
26  **********************************************************************************/
27 
28 #ifndef ROOT_TMVA_DataLoader
29 #define ROOT_TMVA_DataLoader
30 
31 #include <vector>
32 #include "TCut.h"
33 
34 #include "TMVA/Configurable.h"
35 #include "TMVA/Types.h"
36 #include "TMVA/DataSet.h"
37 
38 class TFile;
39 class TTree;
40 class TH2;
41 
42 namespace TMVA {
43 
44  class CvSplit;
45  class DataInputHandler;
46  class DataSetInfo;
47  class DataSetManager;
48  class VariableTransformBase;
49 
50  class DataLoader : public Configurable {
51  public:
52 
53  DataLoader(TString thedlName="default");
54 
55  // default destructor
56  virtual ~DataLoader();
57 
58 
59  // add events to training and testing trees
60  void AddSignalTrainingEvent ( const std::vector<Double_t>& event, Double_t weight = 1.0 );
61  void AddBackgroundTrainingEvent( const std::vector<Double_t>& event, Double_t weight = 1.0 );
62  void AddSignalTestEvent ( const std::vector<Double_t>& event, Double_t weight = 1.0 );
63  void AddBackgroundTestEvent ( const std::vector<Double_t>& event, Double_t weight = 1.0 );
64  void AddTrainingEvent( const TString& className, const std::vector<Double_t>& event, Double_t weight );
65  void AddTestEvent ( const TString& className, const std::vector<Double_t>& event, Double_t weight );
66  void AddEvent ( const TString& className, Types::ETreeType tt, const std::vector<Double_t>& event, Double_t weight );
69 
71  DataSetInfo& AddDataSet( const TString& );
73  DataLoader* VarTransform(TString trafoDefinition);
74 
75  // special case: signal/background
76 
77  // Data input related
78  void SetInputTrees( const TString& signalFileName, const TString& backgroundFileName,
79  Double_t signalWeight=1.0, Double_t backgroundWeight=1.0 );
80  void SetInputTrees( TTree* inputTree, const TCut& SigCut, const TCut& BgCut );
81  // Set input trees at once
82  void SetInputTrees( TTree* signal, TTree* background,
83  Double_t signalWeight=1.0, Double_t backgroundWeight=1.0) ;
84 
85  void AddSignalTree( TTree* signal, Double_t weight=1.0, Types::ETreeType treetype = Types::kMaxTreeType );
86  void AddSignalTree( TString datFileS, Double_t weight=1.0, Types::ETreeType treetype = Types::kMaxTreeType );
87  void AddSignalTree( TTree* signal, Double_t weight, const TString& treetype );
88 
89  // ... depreciated, kept for backwards compatibility
90  void SetSignalTree( TTree* signal, Double_t weight=1.0);
91 
92  void AddBackgroundTree( TTree* background, Double_t weight=1.0, Types::ETreeType treetype = Types::kMaxTreeType );
93  void AddBackgroundTree( TString datFileB, Double_t weight=1.0, Types::ETreeType treetype = Types::kMaxTreeType );
94  void AddBackgroundTree( TTree* background, Double_t weight, const TString & treetype );
95 
96  // ... depreciated, kept for backwards compatibility
97  void SetBackgroundTree( TTree* background, Double_t weight=1.0 );
98 
99  void SetSignalWeightExpression( const TString& variable );
100  void SetBackgroundWeightExpression( const TString& variable );
101 
102  // special case: regression
103  void AddRegressionTree( TTree* tree, Double_t weight = 1.0,
104  Types::ETreeType treetype = Types::kMaxTreeType ) {
105  AddTree( tree, "Regression", weight, "", treetype );
106  }
107 
108  // general
109 
110  // Data input related
111  void SetTree( TTree* tree, const TString& className, Double_t weight ); // depreciated
112  void AddTree( TTree* tree, const TString& className, Double_t weight=1.0,
113  const TCut& cut = "",
115  void AddTree( TTree* tree, const TString& className, Double_t weight, const TCut& cut, const TString& treeType );
116 
117  // set input variable
118  void SetInputVariables ( std::vector<TString>* theVariables ); // deprecated
119 
120  void AddVariable ( const TString& expression, const TString& title, const TString& unit,
121  char type='F', Double_t min = 0, Double_t max = 0 );
122  void AddVariable ( const TString& expression, char type='F',
123  Double_t min = 0, Double_t max = 0 );
124 
125  // NEW: add an array of variables (e.g. for image data) with the provided size
126  void AddVariablesArray(const TString &expression, int size, char type = 'F',
127  Double_t min = 0, Double_t max = 0);
128 
129 
130  void AddTarget ( const TString& expression, const TString& title = "", const TString& unit = "",
131  Double_t min = 0, Double_t max = 0 );
132  void AddRegressionTarget( const TString& expression, const TString& title = "", const TString& unit = "",
133  Double_t min = 0, Double_t max = 0 )
134  {
135  AddTarget( expression, title, unit, min, max );
136  }
137  void AddSpectator ( const TString& expression, const TString& title = "", const TString& unit = "",
138  Double_t min = 0, Double_t max = 0 );
139 
140  // set weight for class
141  void SetWeightExpression( const TString& variable, const TString& className = "" );
142 
143  // set cut for class
144  void SetCut( const TString& cut, const TString& className = "" );
145  void SetCut( const TCut& cut, const TString& className = "" );
146  void AddCut( const TString& cut, const TString& className = "" );
147  void AddCut( const TCut& cut, const TString& className = "" );
148 
149 
150  // prepare input tree for training
151  void PrepareTrainingAndTestTree( const TCut& cut, const TString& splitOpt );
152  void PrepareTrainingAndTestTree( TCut sigcut, TCut bkgcut, const TString& splitOpt );
153 
154  // ... deprecated, kept for backwards compatibility
155  void PrepareTrainingAndTestTree( const TCut& cut, Int_t Ntrain, Int_t Ntest = -1 );
156 
157  void PrepareTrainingAndTestTree( const TCut& cut, Int_t NsigTrain, Int_t NbkgTrain, Int_t NsigTest, Int_t NbkgTest,
158  const TString& otherOpt="SplitMode=Random:!V" );
159 
160  // Cross validation
161  void MakeKFoldDataSet(CvSplit & s);
164 
166 
167  TH2* GetCorrelationMatrix(const TString& className);
168 
169  //Copy method use in VI and CV DEPRECATED: you can just call Clone DataLoader *dl2=(DataLoader *)dl1->Clone("dl2")
173 
174  private:
175 
176 
179 
180 
181  private:
182 
183  // data members
184 
185 
187 
188 
190 
191  std::vector<TMVA::VariableTransformBase*> fDefaultTrfs; // list of transformations on default DataSet
192 
193  // cd to local directory
194  TString fOptions; // option string given by construction (presently only "V")
195  TString fTransformations; // List of transformations to test
196  Bool_t fVerbose; // verbose mode
197 
198  // flag determining the way training and test data are assigned to DataLoader
202  DataAssignType fDataAssignType; // flags for data assigning
203  std::vector<TTree*> fTrainAssignTree; // for each class: tmp tree if user wants to assign the events directly
204  std::vector<TTree*> fTestAssignTree; // for each class: tmp tree if user wants to assign the events directly
205 
206  Int_t fATreeType = 0; // type of event (=classIndex)
207  Float_t fATreeWeight = 0.0; // weight of the event
208  std::vector<Float_t> fATreeEvent; // event variables
209 
210  Types::EAnalysisType fAnalysisType; // the training type
211 
212  protected:
213 
214  ClassDef(DataLoader,4);
215  };
217 } // namespace TMVA
218 
219 #endif
220 
TMVA::DataLoader::SetBackgroundTree
void SetBackgroundTree(TTree *background, Double_t weight=1.0)
Definition: DataLoader.cxx:438
TMVA::DataLoader::fOptions
TString fOptions
Definition: DataLoader.h:194
TMVA::DataLoader::AddVariablesArray
void AddVariablesArray(const TString &expression, int size, char type='F', Double_t min=0, Double_t max=0)
user inserts discriminating array of variables in data set info in case input tree provides an array ...
Definition: DataLoader.cxx:503
TMVA::DataLoader::fDataInputHandler
DataInputHandler * fDataInputHandler
Definition: DataLoader.h:189
TMVA::DataLoader::AddBackgroundTrainingEvent
void AddBackgroundTrainingEvent(const std::vector< Double_t > &event, Double_t weight=1.0)
add signal training event
Definition: DataLoader.cxx:243
TCut
A specialized string object used for TTree selections.
Definition: TCut.h:25
TMVA::DataLoader::AddEvent
void AddEvent(const TString &className, Types::ETreeType tt, const std::vector< Double_t > &event, Double_t weight)
add event vector event : the order of values is: variables + targets + spectators
Definition: DataLoader.cxx:276
TMVA::DataLoader::DataLoader
DataLoader(TString thedlName="default")
Definition: DataLoader.cxx:80
TMVA::DataLoader::SetCut
void SetCut(const TString &cut, const TString &className="")
Definition: DataLoader.cxx:574
tt
auto * tt
Definition: textangle.C:16
TMVA::Configurable
Definition: Configurable.h:45
TMVA::DataLoader::PrepareTrainingAndTestTree
void PrepareTrainingAndTestTree(const TCut &cut, const TString &splitOpt)
prepare the training and test trees -> same cuts for signal and background
Definition: DataLoader.cxx:631
TMVA::DataLoader::fATreeWeight
Float_t fATreeWeight
Definition: DataLoader.h:207
TMVA::DataLoader::fTestAssignTree
std::vector< TTree * > fTestAssignTree
Definition: DataLoader.h:204
TMVA::DataLoader::AddSignalTestEvent
void AddSignalTestEvent(const std::vector< Double_t > &event, Double_t weight=1.0)
add signal testing event
Definition: DataLoader.cxx:235
TMVA::DataInputHandler
Class that contains all the data information.
Definition: DataInputHandler.h:78
TMVA::DataLoader::MakeCopy
DataLoader * MakeCopy(TString name)
Copy method use in VI and CV.
Definition: DataLoader.cxx:690
TMVA::DataLoader::fAnalysisType
Types::EAnalysisType fAnalysisType
Definition: DataLoader.h:210
tree
Definition: tree.py:1
TMVA::DataLoaderCopy
void DataLoaderCopy(TMVA::DataLoader *des, TMVA::DataLoader *src)
TMVA::DataLoader::SetSignalWeightExpression
void SetSignalWeightExpression(const TString &variable)
Definition: DataLoader.cxx:548
TMVA::DataLoader::AddTree
void AddTree(TTree *tree, const TString &className, Double_t weight=1.0, const TCut &cut="", Types::ETreeType tt=Types::kMaxTreeType)
Definition: DataLoader.cxx:350
TTree
A TTree represents a columnar dataset.
Definition: TTree.h:79
Float_t
float Float_t
Definition: RtypesCore.h:57
TGeant4Unit::s
static constexpr double s
Definition: TGeant4SystemOfUnits.h:162
TMVA::DataLoader::fTrainAssignTree
std::vector< TTree * > fTrainAssignTree
Definition: DataLoader.h:203
TMVA::DataLoader::SetTree
void SetTree(TTree *tree, const TString &className, Double_t weight)
set background tree
Definition: DataLoader.cxx:446
TMVA::DataLoader::SetWeightExpression
void SetWeightExpression(const TString &variable, const TString &className="")
Definition: DataLoader.cxx:562
TMVA::DataLoader::kAssignEvents
@ kAssignEvents
Definition: DataLoader.h:201
TMVA::DataLoader::AddSignalTree
void AddSignalTree(TTree *signal, Double_t weight=1.0, Types::ETreeType treetype=Types::kMaxTreeType)
number of signal events (used to compute significance)
Definition: DataLoader.cxx:370
TString
Basic string class.
Definition: TString.h:136
TMVA::DataLoader::fTransformations
TString fTransformations
Definition: DataLoader.h:195
bool
TMVA::DataLoader::AddRegressionTarget
void AddRegressionTarget(const TString &expression, const TString &title="", const TString &unit="", Double_t min=0, Double_t max=0)
Definition: DataLoader.h:132
TMVA::DataLoader::AddSpectator
void AddSpectator(const TString &expression, const TString &title="", const TString &unit="", Double_t min=0, Double_t max=0)
user inserts target in data set info
Definition: DataLoader.cxx:523
TMVA::DataLoader::AddTarget
void AddTarget(const TString &expression, const TString &title="", const TString &unit="", Double_t min=0, Double_t max=0)
user inserts target in data set info
Definition: DataLoader.cxx:511
TMVA::DataLoader::SetInputTreesFromEventAssignTrees
void SetInputTreesFromEventAssignTrees()
assign event-wise local trees to data set
Definition: DataLoader.cxx:318
TMVA::DataLoader::UserAssignEvents
Bool_t UserAssignEvents(UInt_t clIndex)
Definition: DataLoader.cxx:310
TMVA::DataLoader::kUndefined
@ kUndefined
Definition: DataLoader.h:199
TMVA::DataSetInfo
Class that contains all the data information.
Definition: DataSetInfo.h:62
TMVA::DataLoader::SetBackgroundWeightExpression
void SetBackgroundWeightExpression(const TString &variable)
Definition: DataLoader.cxx:555
TMVA::Types::EAnalysisType
EAnalysisType
Definition: Types.h:128
TMVA::Types::ETreeType
ETreeType
Definition: Types.h:144
TMVA::DataLoader::fATreeType
Int_t fATreeType
Definition: DataLoader.h:206
TMVA::DataLoader::AddBackgroundTestEvent
void AddBackgroundTestEvent(const std::vector< Double_t > &event, Double_t weight=1.0)
add signal training event
Definition: DataLoader.cxx:251
TH2
Service class for 2-Dim histogram classes.
Definition: TH2.h:30
TCut.h
UInt_t
unsigned int UInt_t
Definition: RtypesCore.h:46
TMVA::CvSplit
Definition: CvSplit.h:37
TMVA::DataLoader::SetInputTrees
void SetInputTrees(const TString &signalFileName, const TString &backgroundFileName, Double_t signalWeight=1.0, Double_t backgroundWeight=1.0)
Definition: DataLoader.cxx:463
TMVA::DataLoader::GetCorrelationMatrix
TH2 * GetCorrelationMatrix(const TString &className)
returns the correlation matrix of datasets
Definition: DataLoader.cxx:716
Types.h
Configurable.h
TFile
A ROOT file is a suite of consecutive data records (TKey instances) with a well defined format.
Definition: TFile.h:54
TMVA::DataLoader::fDefaultTrfs
std::vector< TMVA::VariableTransformBase * > fDefaultTrfs
Definition: DataLoader.h:191
TMVA::DataLoader::PrepareFoldDataSet
void PrepareFoldDataSet(CvSplit &s, UInt_t foldNumber, Types::ETreeType tt=Types::kTraining)
Function for assigning the correct folds to the testing or training set.
Definition: DataLoader.cxx:669
TMVA::Types::kMaxTreeType
@ kMaxTreeType
Definition: Types.h:147
unsigned int
TMVA::DataLoader::AddCut
void AddCut(const TString &cut, const TString &className="")
Definition: DataLoader.cxx:587
TMVA::DataLoader::CreateEventAssignTrees
TTree * CreateEventAssignTrees(const TString &name)
create the data assignment tree (for event-wise data assignment by user)
Definition: DataLoader.cxx:194
TMVA::DataLoader::fDataAssignType
DataAssignType fDataAssignType
Definition: DataLoader.h:202
TMVA::Types::kTraining
@ kTraining
Definition: Types.h:145
TMVA::DataSetManager
Class that contains all the data information.
Definition: DataSetManager.h:51
TMVA::DataLoader::SetSignalTree
void SetSignalTree(TTree *signal, Double_t weight=1.0)
Definition: DataLoader.cxx:431
Double_t
double Double_t
Definition: RtypesCore.h:59
TMVA::DataLoader::DataAssignType
DataAssignType
Definition: DataLoader.h:199
TMVA::DataLoader::fDataSetManager
DataSetManager * fDataSetManager
Definition: DataLoader.h:186
TMVA::DataLoader::GetDefaultDataSetInfo
const DataSetInfo & GetDefaultDataSetInfo()
Definition: DataLoader.h:165
TMVA::DataLoader::DataInput
DataInputHandler & DataInput()
Definition: DataLoader.h:172
TMVA::DataLoader::AddRegressionTree
void AddRegressionTree(TTree *tree, Double_t weight=1.0, Types::ETreeType treetype=Types::kMaxTreeType)
Definition: DataLoader.h:103
ClassDef
#define ClassDef(name, id)
Definition: Rtypes.h:325
name
char name[80]
Definition: TGX11.cxx:110
TMVA::DataLoader::RecombineKFoldDataSet
void RecombineKFoldDataSet(CvSplit &s, Types::ETreeType tt=Types::kTraining)
Recombines the dataset.
Definition: DataLoader.cxx:682
TMVA::DataLoader::fVerbose
Bool_t fVerbose
Definition: DataLoader.h:196
TMVA::DataLoader::fATreeEvent
std::vector< Float_t > fATreeEvent
Definition: DataLoader.h:208
TMVA::DataLoader::kAssignTrees
@ kAssignTrees
Definition: DataLoader.h:200
TMVA::DataLoader::AddBackgroundTree
void AddBackgroundTree(TTree *background, Double_t weight=1.0, Types::ETreeType treetype=Types::kMaxTreeType)
number of signal events (used to compute significance)
Definition: DataLoader.cxx:401
TMVA::DataLoader::AddDataSet
DataSetInfo & AddDataSet(DataSetInfo &)
Definition: DataLoader.cxx:119
type
int type
Definition: TGX11.cxx:121
TMVA::DataLoader::AddVariable
void AddVariable(const TString &expression, const TString &title, const TString &unit, char type='F', Double_t min=0, Double_t max=0)
user inserts discriminating variable in data set info
Definition: DataLoader.cxx:484
TMVA::DataLoader::DataLoaderCopy
friend void DataLoaderCopy(TMVA::DataLoader *des, TMVA::DataLoader *src)
TMVA::DataLoader::MakeKFoldDataSet
void MakeKFoldDataSet(CvSplit &s)
Function required to split the training and testing datasets into a number of folds.
Definition: DataLoader.cxx:661
TMVA::DataLoader::SetInputVariables
void SetInputVariables(std::vector< TString > *theVariables)
fill input variables in data set
Definition: DataLoader.cxx:540
TMVA::DataLoader::GetDataSetInfo
DataSetInfo & GetDataSetInfo()
Definition: DataLoader.cxx:137
DataSet.h
TMVA::DataLoader::~DataLoader
virtual ~DataLoader()
Definition: DataLoader.cxx:96
TMVA::DataLoader::AddTestEvent
void AddTestEvent(const TString &className, const std::vector< Double_t > &event, Double_t weight)
add signal test event
Definition: DataLoader.cxx:267
TMVA::DataLoader::VarTransform
DataLoader * VarTransform(TString trafoDefinition)
Transforms the variables and return a new DataLoader with the transformed variables.
Definition: DataLoader.cxx:146
TMVA
create variable transformations
Definition: GeneticMinimizer.h:22
int
TMVA::DataLoader::AddSignalTrainingEvent
void AddSignalTrainingEvent(const std::vector< Double_t > &event, Double_t weight=1.0)
add signal training event
Definition: DataLoader.cxx:227
TMVA::DataLoader
Definition: DataLoader.h:50
TMVA::DataLoader::DefaultDataSetInfo
DataSetInfo & DefaultDataSetInfo()
default creation
Definition: DataLoader.cxx:532
TMVA::DataLoader::AddTrainingEvent
void AddTrainingEvent(const TString &className, const std::vector< Double_t > &event, Double_t weight)
add signal training event
Definition: DataLoader.cxx:259