Logo ROOT  
Reference Guide
DataSetFactory.h
Go to the documentation of this file.
1// @(#)root/tmva $Id$
2// Author: Andreas Hoecker, Peter Speckmayer, Joerg Stelzer, Eckhard von Toerne, Helge Voss
3
4/**********************************************************************************
5 * Project: TMVA - a Root-integrated toolkit for multivariate data analysis *
6 * Package: TMVA *
7 * Class : DataSetFactory *
8 * Web : http://tmva.sourceforge.net *
9 * *
10 * Description: *
11 * Contains all the data information *
12 * *
13 * Authors (alphabetical): *
14 * Andreas Hoecker <Andreas.Hocker@cern.ch> - CERN, Switzerland *
15 * Joerg Stelzer <Joerg.Stelzer@cern.ch> - CERN, Switzerland *
16 * Peter Speckmayer <Peter.Speckmayer@cern.ch> - CERN, Switzerland *
17 * Eckhard von Toerne <evt@physik.uni-bonn.de> - U. of Bonn, Germany *
18 * Helge Voss <Helge.Voss@cern.ch> - MPI-K Heidelberg, Germany *
19 * *
20 * Copyright (c) 2006: *
21 * CERN, Switzerland *
22 * MPI-K Heidelberg, Germany *
23 * *
24 * Redistribution and use in source and binary forms, with or without *
25 * modification, are permitted according to the terms listed in LICENSE *
26 * (http://tmva.sourceforge.net/LICENSE) *
27 **********************************************************************************/
28
29#ifndef ROOT_TMVA_DataSetFactory
30#define ROOT_TMVA_DataSetFactory
31
32//////////////////////////////////////////////////////////////////////////
33// //
34// DataSetFactory //
35// //
36// Class that contains all the data information //
37// //
38//////////////////////////////////////////////////////////////////////////
39
40#include <vector>
41
42#include "TString.h"
43#include "TTree.h"
44#include "TCut.h"
45#include "TTreeFormula.h"
46#include "TMatrixDfwd.h"
47#include "TPrincipal.h"
48#include "TRandom3.h"
49
50#include "TMVA/Types.h"
51#include "TMVA/VariableInfo.h"
52#include "TMVA/Event.h"
53
54namespace TMVA {
55
56 class DataSet;
57 class DataSetInfo;
58 class DataInputHandler;
59 class TreeInfo;
60 class MsgLogger;
61
62 // =============== maybe move these elswhere (e.g. into the tools )
63
64 // =============== functors =======================
65
66 // delete-functor (to be used in e.g. for_each algorithm)
67 template<class T>
69 {
71 delete p;
72 return *this;
73 }
74 };
75
76 template<class T>
78 {
80 }
81
82
83 template< typename T >
84 class Increment {
86 public:
87 Increment( T start ) : value( start ){ }
89 return value++;
90 }
91 };
92
93
94
95 template <typename F>
96 class null_t
97 {
98 private:
99 // returns argF
100 public:
102 F operator()(const F& argF) const
103 {
104 return argF;
105 }
106 };
107
108 template <typename F>
109 inline null_t<F> null() {
110 return null_t<F>();
111 }
112
113
114
115 template <typename F, typename G, typename H>
116 class compose_binary_t : public std::binary_function<typename G::argument_type,
117 typename H::argument_type,
118 typename F::result_type>
119 {
120 private:
121 const F& f; // f(g(argG),h(argH))
122 const G& g;
123 const H& h;
124 public:
125 compose_binary_t(const F& _f, const G& _g, const H& _h) : f(_f), g(_g), h(_h)
126 {
127 }
128
129 typename F::result_type operator()(const typename G::argument_type& argG,
130 const typename H::argument_type& argH) const
131 {
132 return f(g(argG),h(argH));
133 }
134 };
135
136 template <typename F, typename G, typename H>
137 inline compose_binary_t<F,G,H> compose_binary(const F& _f, const G& _g, const H& _h) {
138 return compose_binary_t<F,G,H>(_f,_g,_h);
139 }
140
141
142
143
144 template <typename F, typename G>
145 class compose_unary_t : public std::unary_function<typename G::argument_type,
146 typename F::result_type>
147 {
148 private:
149 const F& f; // f(g(argG))
150 const G& g;
151 public:
152 compose_unary_t(const F& _f, const G& _g) : f(_f), g(_g)
153 {
154 }
155
156 typename F::result_type operator()(const typename G::argument_type& argG) const
157 {
158 return f(g(argG));
159 }
160 };
161
162 template <typename F, typename G>
163 inline compose_unary_t<F,G> compose_unary(const F& _f, const G& _g) {
164 return compose_unary_t<F,G>(_f,_g);
165 }
166
167 // =============== functors =======================
168
169
170 // =========================================================
171
172
173 class DataSetFactory:public TObject {
174
175 typedef std::vector<Event* > EventVector;
176 typedef std::vector< EventVector > EventVectorOfClasses;
177 typedef std::map<Types::ETreeType, EventVectorOfClasses > EventVectorOfClassesOfTreeType;
178 typedef std::map<Types::ETreeType, EventVector > EventVectorOfTreeType;
179
180 typedef std::vector< Double_t > ValuePerClass;
181 typedef std::map<Types::ETreeType, ValuePerClass > ValuePerClassOfTreeType;
182
184 public:
200 nEvBeforeCut(0),
201 nEvAfterCut(0),
203 nWeEvAfterCut(0),
204 nNegWeights(0),
205 varAvLength(0)
206 {}
207 ~EventStats() { delete[] varAvLength; }
209 };
210
211 typedef std::vector< int > NumberPerClass;
212 typedef std::vector< EventStats > EvtStatsPerClass;
213
214 public:
215
217
219
221 protected:
222
223
226
227 // ---------- new versions
228 void BuildEventVector ( DataSetInfo& dsi,
229 DataInputHandler& dataInput,
231 EvtStatsPerClass& eventCounts);
232
235 EvtStatsPerClass& eventCounts,
236 const TString& splitMode,
237 const TString& mixMode,
238 const TString& normMode,
239 UInt_t splitSeed);
240
241 void RenormEvents ( DataSetInfo& dsi,
243 const EvtStatsPerClass& eventCounts,
244 const TString& normMode );
245
246 void InitOptions ( DataSetInfo& dsi,
247 EvtStatsPerClass& eventsmap,
248 TString& normMode, UInt_t& splitSeed,
249 TString& splitMode, TString& mixMode);
250
251
252 // ------------------------
253
254 // auxiliary functions to compute correlations
255 TMatrixD* CalcCorrelationMatrix( DataSet*, const UInt_t classNumber );
256 TMatrixD* CalcCovarianceMatrix ( DataSet*, const UInt_t classNumber );
257 void CalcMinMax ( DataSet*, DataSetInfo& dsi );
258
259 // resets branch addresses to current event
262 void ChangeToNewTree( TreeInfo&, const DataSetInfo & );
263 Bool_t CheckTTreeFormula( TTreeFormula* ttf, const TString& expression, Bool_t& hasDollar );
264
265 // verbosity
266 Bool_t Verbose() { return fVerbose; }
267
268 // data members
269
270 // verbosity
271 Bool_t fVerbose; // Verbosity
272 TString fVerboseLevel; // VerboseLevel
273
274 // Printing
275 Bool_t fCorrelations = kFALSE; // Whether to print correlations or not
276 Bool_t fComputeCorrelations = kFALSE; // Whether to force computation of correlations or not
277
278 Bool_t fScaleWithPreselEff; // how to deal with requested #events in connection with preselection cuts
279
280 // the event
281 TTree* fCurrentTree; // the tree, events are currently read from
282 UInt_t fCurrentEvtIdx; // the current event (to avoid reading of the same event)
283
284 // the formulas for reading the original tree
285 std::vector<TTreeFormula*> fInputFormulas; // input variables
286 std::vector<std::pair<TTreeFormula*, Int_t>> fInputTableFormulas; //! input variables expression for arrays
287 std::vector<TTreeFormula *> fTargetFormulas; // targets
288 std::vector<TTreeFormula*> fCutFormulas; // cuts
289 std::vector<TTreeFormula*> fWeightFormula; // weights
290 std::vector<TTreeFormula*> fSpectatorFormulas; // spectators
291
292 MsgLogger* fLogger; //! message logger
293 MsgLogger& Log() const { return *fLogger; }
294 public:
296 };
297}
298
299#endif
const Bool_t kFALSE
Definition: RtypesCore.h:90
bool Bool_t
Definition: RtypesCore.h:61
double Double_t
Definition: RtypesCore.h:57
float Float_t
Definition: RtypesCore.h:55
#define ClassDef(name, id)
Definition: Rtypes.h:322
Class that contains all the data information.
Class that contains all the data information.
std::vector< TTreeFormula * > fSpectatorFormulas
std::vector< TTreeFormula * > fWeightFormula
MsgLogger & Log() const
message logger
DataSet * BuildInitialDataSet(DataSetInfo &, TMVA::DataInputHandler &)
if no entries, than create a DataSet with one Event which uses dynamic variables (pointers to variabl...
DataSetFactory()
constructor
std::map< Types::ETreeType, EventVectorOfClasses > EventVectorOfClassesOfTreeType
void ChangeToNewTree(TreeInfo &, const DataSetInfo &)
While the data gets copied into the local training and testing trees, the input tree can change (for ...
std::map< Types::ETreeType, EventVector > EventVectorOfTreeType
std::vector< std::pair< TTreeFormula *, Int_t > > fInputTableFormulas
void BuildEventVector(DataSetInfo &dsi, DataInputHandler &dataInput, EventVectorOfClassesOfTreeType &eventsmap, EvtStatsPerClass &eventCounts)
build empty event vectors distributes events between kTraining/kTesting/kMaxTreeType
DataSet * CreateDataSet(DataSetInfo &, DataInputHandler &)
steering the creation of a new dataset
DataSet * MixEvents(DataSetInfo &dsi, EventVectorOfClassesOfTreeType &eventsmap, EvtStatsPerClass &eventCounts, const TString &splitMode, const TString &mixMode, const TString &normMode, UInt_t splitSeed)
Select and distribute unassigned events to kTraining and kTesting.
std::vector< int > NumberPerClass
std::vector< TTreeFormula * > fInputFormulas
std::vector< EventVector > EventVectorOfClasses
void InitOptions(DataSetInfo &dsi, EvtStatsPerClass &eventsmap, TString &normMode, UInt_t &splitSeed, TString &splitMode, TString &mixMode)
the dataset splitting
void CalcMinMax(DataSet *, DataSetInfo &dsi)
compute covariance matrix
std::vector< Double_t > ValuePerClass
DataSet * BuildDynamicDataSet(DataSetInfo &)
std::vector< EventStats > EvtStatsPerClass
std::vector< TTreeFormula * > fTargetFormulas
input variables expression for arrays
Bool_t CheckTTreeFormula(TTreeFormula *ttf, const TString &expression, Bool_t &hasDollar)
checks a TTreeFormula for problems
std::vector< TTreeFormula * > fCutFormulas
void ResetBranchAndEventAddresses(TTree *)
std::map< Types::ETreeType, ValuePerClass > ValuePerClassOfTreeType
void RenormEvents(DataSetInfo &dsi, EventVectorOfClassesOfTreeType &eventsmap, const EvtStatsPerClass &eventCounts, const TString &normMode)
renormalisation of the TRAINING event weights
TMatrixD * CalcCorrelationMatrix(DataSet *, const UInt_t classNumber)
computes correlation matrix for variables "theVars" in tree; "theType" defines the required event "ty...
TMatrixD * CalcCovarianceMatrix(DataSet *, const UInt_t classNumber)
compute covariance matrix
std::vector< Event * > EventVector
Class that contains all the data information.
Definition: DataSetInfo.h:60
Class that contains all the data information.
Definition: DataSet.h:69
Increment(T start)
ostringstream derivative to redirect and format output
Definition: MsgLogger.h:59
compose_binary_t(const F &_f, const G &_g, const H &_h)
F::result_type operator()(const typename G::argument_type &argG, const typename H::argument_type &argH) const
F::result_type operator()(const typename G::argument_type &argG) const
compose_unary_t(const F &_f, const G &_g)
F operator()(const F &argF) const
Mother of all ROOT objects.
Definition: TObject.h:37
Basic string class.
Definition: TString.h:131
Used to pass a selection expression to the Tree drawing routine.
Definition: TTreeFormula.h:58
A TTree represents a columnar dataset.
Definition: TTree.h:78
#define F(x, y, z)
#define G(x, y, z)
#define H(x, y, z)
double T(double x)
Definition: ChebyshevPol.h:34
create variable transformations
compose_unary_t< F, G > compose_unary(const F &_f, const G &_g)
DeleteFunctor_t< const T > DeleteFunctor()
null_t< F > null()
compose_binary_t< F, G, H > compose_binary(const F &_f, const G &_g, const H &_h)
DeleteFunctor_t & operator()(const T *p)