Logo ROOT  
Reference Guide
DataSetFactory.h
Go to the documentation of this file.
1 // @(#)root/tmva $Id$
2 // Author: Andreas Hoecker, Peter Speckmayer, Joerg Stelzer, Eckhard von Toerne, Helge Voss
3 
4 /**********************************************************************************
5  * Project: TMVA - a Root-integrated toolkit for multivariate data analysis *
6  * Package: TMVA *
7  * Class : DataSetFactory *
8  * Web : http://tmva.sourceforge.net *
9  * *
10  * Description: *
11  * Contains all the data information *
12  * *
13  * Authors (alphabetical): *
14  * Andreas Hoecker <Andreas.Hocker@cern.ch> - CERN, Switzerland *
15  * Joerg Stelzer <Joerg.Stelzer@cern.ch> - CERN, Switzerland *
16  * Peter Speckmayer <Peter.Speckmayer@cern.ch> - CERN, Switzerland *
17  * Eckhard von Toerne <evt@physik.uni-bonn.de> - U. of Bonn, Germany *
18  * Helge Voss <Helge.Voss@cern.ch> - MPI-K Heidelberg, Germany *
19  * *
20  * Copyright (c) 2006: *
21  * CERN, Switzerland *
22  * MPI-K Heidelberg, Germany *
23  * *
24  * Redistribution and use in source and binary forms, with or without *
25  * modification, are permitted according to the terms listed in LICENSE *
26  * (http://tmva.sourceforge.net/LICENSE) *
27  **********************************************************************************/
28 
29 #ifndef ROOT_TMVA_DataSetFactory
30 #define ROOT_TMVA_DataSetFactory
31 
32 //////////////////////////////////////////////////////////////////////////
33 // //
34 // DataSetFactory //
35 // //
36 // Class that contains all the data information //
37 // //
38 //////////////////////////////////////////////////////////////////////////
39 
40 #include <vector>
41 #include <map>
42 
43 #include "TString.h"
44 #include "TTree.h"
45 #include "TCut.h"
46 #include "TTreeFormula.h"
47 #include "TMatrixDfwd.h"
48 #include "TPrincipal.h"
49 #include "TRandom3.h"
50 
51 #include "TMVA/Types.h"
52 #include "TMVA/VariableInfo.h"
53 #include "TMVA/Event.h"
54 
55 namespace TMVA {
56 
57  class DataSet;
58  class DataSetInfo;
59  class DataInputHandler;
60  class TreeInfo;
61  class MsgLogger;
62 
63  // =============== maybe move these elswhere (e.g. into the tools )
64 
65  // =============== functors =======================
66 
67  // delete-functor (to be used in e.g. for_each algorithm)
68  template<class T>
69  struct DeleteFunctor_t
70  {
71  DeleteFunctor_t& operator()(const T* p) {
72  delete p;
73  return *this;
74  }
75  };
76 
77  template<class T>
78  DeleteFunctor_t<const T> DeleteFunctor()
79  {
80  return DeleteFunctor_t<const T>();
81  }
82 
83 
84  template< typename T >
85  class Increment {
86  T value;
87  public:
88  Increment( T start ) : value( start ){ }
89  T operator()() {
90  return value++;
91  }
92  };
93 
94 
95 
96  template <typename F>
97  class null_t
98  {
99  private:
100  // returns argF
101  public:
102  typedef F argument_type;
103  F operator()(const F& argF) const
104  {
105  return argF;
106  }
107  };
108 
109  template <typename F>
110  inline null_t<F> null() {
111  return null_t<F>();
112  }
113 
114 
115 
116  template <typename F, typename G, typename H>
117  class compose_binary_t : public std::binary_function<typename G::argument_type,
118  typename H::argument_type,
119  typename F::result_type>
120  {
121  private:
122  const F& f; // f(g(argG),h(argH))
123  const G& g;
124  const H& h;
125  public:
126  compose_binary_t(const F& _f, const G& _g, const H& _h) : f(_f), g(_g), h(_h)
127  {
128  }
129 
130  typename F::result_type operator()(const typename G::argument_type& argG,
131  const typename H::argument_type& argH) const
132  {
133  return f(g(argG),h(argH));
134  }
135  };
136 
137  template <typename F, typename G, typename H>
138  inline compose_binary_t<F,G,H> compose_binary(const F& _f, const G& _g, const H& _h) {
139  return compose_binary_t<F,G,H>(_f,_g,_h);
140  }
141 
142 
143 
144 
145  template <typename F, typename G>
146  class compose_unary_t : public std::unary_function<typename G::argument_type,
147  typename F::result_type>
148  {
149  private:
150  const F& f; // f(g(argG))
151  const G& g;
152  public:
153  compose_unary_t(const F& _f, const G& _g) : f(_f), g(_g)
154  {
155  }
156 
157  typename F::result_type operator()(const typename G::argument_type& argG) const
158  {
159  return f(g(argG));
160  }
161  };
162 
163  template <typename F, typename G>
164  inline compose_unary_t<F,G> compose_unary(const F& _f, const G& _g) {
165  return compose_unary_t<F,G>(_f,_g);
166  }
167 
168  // =============== functors =======================
169 
170 
171  // =========================================================
172 
173 
174  class DataSetFactory:public TObject {
175 
176  typedef std::vector<Event* > EventVector;
177  typedef std::vector< EventVector > EventVectorOfClasses;
178  typedef std::map<Types::ETreeType, EventVectorOfClasses > EventVectorOfClassesOfTreeType;
179  typedef std::map<Types::ETreeType, EventVector > EventVectorOfTreeType;
180 
181  typedef std::vector< Double_t > ValuePerClass;
182  typedef std::map<Types::ETreeType, ValuePerClass > ValuePerClassOfTreeType;
183 
184  class EventStats {
185  public:
195  Float_t* varAvLength;//->
196  EventStats():
203  nWeEvBeforeCut(0),
206  varAvLength(0)
207  {}
208  ~EventStats() { delete[] varAvLength; }
210  };
211 
212  typedef std::vector< int > NumberPerClass;
213  typedef std::vector< EventStats > EvtStatsPerClass;
214 
215  public:
216 
218 
220 
222  protected:
223 
224 
227 
228  // ---------- new versions
229  void BuildEventVector ( DataSetInfo& dsi,
230  DataInputHandler& dataInput,
232  EvtStatsPerClass& eventCounts);
233 
234  DataSet* MixEvents ( DataSetInfo& dsi,
236  EvtStatsPerClass& eventCounts,
237  const TString& splitMode,
238  const TString& mixMode,
239  const TString& normMode,
240  UInt_t splitSeed);
241 
242  void RenormEvents ( DataSetInfo& dsi,
244  const EvtStatsPerClass& eventCounts,
245  const TString& normMode );
246 
247  void InitOptions ( DataSetInfo& dsi,
248  EvtStatsPerClass& eventsmap,
249  TString& normMode, UInt_t& splitSeed,
250  TString& splitMode, TString& mixMode);
251 
252 
253  // ------------------------
254 
255  // auxiliary functions to compute correlations
256  TMatrixD* CalcCorrelationMatrix( DataSet*, const UInt_t classNumber );
257  TMatrixD* CalcCovarianceMatrix ( DataSet*, const UInt_t classNumber );
258  void CalcMinMax ( DataSet*, DataSetInfo& dsi );
259 
260  // resets branch addresses to current event
262  void ResetCurrentTree() { fCurrentTree = 0; }
263  void ChangeToNewTree( TreeInfo&, const DataSetInfo & );
264  Bool_t CheckTTreeFormula( TTreeFormula* ttf, const TString& expression, Bool_t& hasDollar );
265 
266  // verbosity
267  Bool_t Verbose() { return fVerbose; }
268 
269  // data members
270 
271  // verbosity
272  Bool_t fVerbose; // Verbosity
273  TString fVerboseLevel; // VerboseLevel
274 
275  // Printing
276  Bool_t fCorrelations = kFALSE; // Whether to print correlations or not
277  Bool_t fComputeCorrelations = kFALSE; // Whether to force computation of correlations or not
278 
279  Bool_t fScaleWithPreselEff; // how to deal with requested #events in connection with preselection cuts
280 
281  // the event
282  TTree* fCurrentTree; // the tree, events are currently read from
283  UInt_t fCurrentEvtIdx; // the current event (to avoid reading of the same event)
284 
285  // the formulas for reading the original tree
286  std::vector<TTreeFormula*> fInputFormulas; // input variables
287  std::vector<std::pair<TTreeFormula*, Int_t>> fInputTableFormulas; //! input variables expression for arrays
288  std::vector<TTreeFormula *> fTargetFormulas; // targets
289  std::vector<TTreeFormula*> fCutFormulas; // cuts
290  std::vector<TTreeFormula*> fWeightFormula; // weights
291  std::vector<TTreeFormula*> fSpectatorFormulas; // spectators
292 
293  MsgLogger* fLogger; //! message logger
294  MsgLogger& Log() const { return *fLogger; }
295  public:
297  };
298 }
299 
300 #endif
TMatrixDfwd.h
TMVA::DataSetFactory::EventStats::EventStats
EventStats()
Definition: DataSetFactory.h:219
TMVA::DataSetFactory::fSpectatorFormulas
std::vector< TTreeFormula * > fSpectatorFormulas
Definition: DataSetFactory.h:314
TMVA::DataSetFactory::ChangeToNewTree
void ChangeToNewTree(TreeInfo &, const DataSetInfo &)
While the data gets copied into the local training and testing trees, the input tree can change (for ...
Definition: DataSetFactory.cxx:288
TPrincipal.h
TMVA::DataSetFactory::CalcMinMax
void CalcMinMax(DataSet *, DataSetInfo &dsi)
compute covariance matrix
Definition: DataSetFactory.cxx:474
TMVA::DeleteFunctor
DeleteFunctor_t< const T > DeleteFunctor()
Definition: DataSetFactory.h:101
TMVA::compose_binary_t::f
const F & f
Definition: DataSetFactory.h:145
TMVA::DataSetFactory::fCorrelations
Bool_t fCorrelations
Definition: DataSetFactory.h:299
TMVA::null_t
Definition: DataSetFactory.h:120
TMVA::DataSetFactory::fComputeCorrelations
Bool_t fComputeCorrelations
Definition: DataSetFactory.h:300
TMVA::DataSetFactory::BuildEventVector
void BuildEventVector(DataSetInfo &dsi, DataInputHandler &dataInput, EventVectorOfClassesOfTreeType &eventsmap, EvtStatsPerClass &eventCounts)
build empty event vectors distributes events between kTraining/kTesting/kMaxTreeType
Definition: DataSetFactory.cxx:723
H
#define H(x, y, z)
TMVA::DataSetFactory::fCutFormulas
std::vector< TTreeFormula * > fCutFormulas
Definition: DataSetFactory.h:312
TMVA::DataSetFactory::BuildDynamicDataSet
DataSet * BuildDynamicDataSet(DataSetInfo &)
Definition: DataSetFactory.cxx:149
TMVA::DataSetFactory::EvtStatsPerClass
std::vector< EventStats > EvtStatsPerClass
Definition: DataSetFactory.h:236
TMVA::DataSetFactory::EventStats::nWeEvBeforeCut
Float_t nWeEvBeforeCut
Definition: DataSetFactory.h:215
TMVA::DataSetFactory::ValuePerClass
std::vector< Double_t > ValuePerClass
Definition: DataSetFactory.h:204
TMVA::DataSetFactory::Log
MsgLogger & Log() const
message logger
Definition: DataSetFactory.h:317
TMVA::DataSetFactory::EventVectorOfClasses
std::vector< EventVector > EventVectorOfClasses
Definition: DataSetFactory.h:200
TMVA::DataSetFactory::InitOptions
void InitOptions(DataSetInfo &dsi, EvtStatsPerClass &eventsmap, TString &normMode, UInt_t &splitSeed, TString &splitMode, TString &mixMode)
the dataset splitting
Definition: DataSetFactory.cxx:633
TMVA::DataInputHandler
Definition: DataInputHandler.h:100
F
#define F(x, y, z)
TMVA::DataSetFactory::fVerbose
Bool_t fVerbose
Definition: DataSetFactory.h:295
TMVA::compose_binary_t::h
const H & h
Definition: DataSetFactory.h:147
TMVA::DataSetFactory::ResetCurrentTree
void ResetCurrentTree()
Definition: DataSetFactory.h:285
TMVA::compose_unary_t::f
const F & f
Definition: DataSetFactory.h:173
TTree
Definition: TTree.h:79
TMVA::DataSetFactory::CalcCovarianceMatrix
TMatrixD * CalcCovarianceMatrix(DataSet *, const UInt_t classNumber)
compute covariance matrix
Definition: DataSetFactory.cxx:574
TMVA::null_t::operator()
F operator()(const F &argF) const
Definition: DataSetFactory.h:126
TTreeFormula.h
TMVA::DataSetFactory::fCurrentEvtIdx
UInt_t fCurrentEvtIdx
Definition: DataSetFactory.h:306
Float_t
float Float_t
Definition: RtypesCore.h:57
VariableInfo.h
TMVA::Increment::value
T value
Definition: DataSetFactory.h:109
TMVA::DataSetFactory::EventStats::nTestingEventsRequested
Int_t nTestingEventsRequested
Definition: DataSetFactory.h:210
TMVA::DataSetFactory::EventStats::varAvLength
Float_t * varAvLength
Definition: DataSetFactory.h:218
TMVA::DataSetFactory::EventStats::nInitialEvents
Int_t nInitialEvents
Definition: DataSetFactory.h:212
TMVA::compose_binary_t
Definition: DataSetFactory.h:140
TMVA::DataSetFactory
Definition: DataSetFactory.h:197
TMVA::Increment::Increment
Increment(T start)
Definition: DataSetFactory.h:111
TMVA::DataSetFactory::EventVectorOfClassesOfTreeType
std::map< Types::ETreeType, EventVectorOfClasses > EventVectorOfClassesOfTreeType
Definition: DataSetFactory.h:201
TMVA::compose_binary_t::g
const G & g
Definition: DataSetFactory.h:146
TTree.h
TMVA::DataSetFactory::BuildInitialDataSet
DataSet * BuildInitialDataSet(DataSetInfo &, TMVA::DataInputHandler &)
if no entries, than create a DataSet with one Event which uses dynamic variables (pointers to variabl...
Definition: DataSetFactory.cxx:197
TMVA::DataSetFactory::fInputFormulas
std::vector< TTreeFormula * > fInputFormulas
Definition: DataSetFactory.h:309
TString
Definition: TString.h:136
TMVA::compose_unary_t::operator()
F::result_type operator()(const typename G::argument_type &argG) const
Definition: DataSetFactory.h:180
TMatrixT
Definition: TMatrixDfwd.h:22
Bool_t
bool Bool_t
Definition: RtypesCore.h:63
TString.h
TMVA::DataSetFactory::EventStats::~EventStats
~EventStats()
Definition: DataSetFactory.h:231
TMVA::DataSetFactory::fScaleWithPreselEff
Bool_t fScaleWithPreselEff
Definition: DataSetFactory.h:302
G
#define G(x, y, z)
TMVA::compose_binary_t::operator()
F::result_type operator()(const typename G::argument_type &argG, const typename H::argument_type &argH) const
Definition: DataSetFactory.h:153
TMVA::DataSetFactory::fWeightFormula
std::vector< TTreeFormula * > fWeightFormula
Definition: DataSetFactory.h:313
TMVA::DataSetFactory::EventStats::nNegWeights
Double_t nNegWeights
Definition: DataSetFactory.h:217
TMVA::DataSetInfo
Definition: DataSetInfo.h:62
TMVA::compose_binary
compose_binary_t< F, G, H > compose_binary(const F &_f, const G &_g, const H &_h)
Definition: DataSetFactory.h:161
TMVA::DataSetFactory::NumberPerClass
std::vector< int > NumberPerClass
Definition: DataSetFactory.h:235
TMVA::TreeInfo
Definition: DataInputHandler.h:74
TMVA::DataSetFactory::CheckTTreeFormula
Bool_t CheckTTreeFormula(TTreeFormula *ttf, const TString &expression, Bool_t &hasDollar)
checks a TTreeFormula for problems
Definition: DataSetFactory.cxx:246
TMVA::DataSetFactory::~DataSetFactory
~DataSetFactory()
destructor
Definition: DataSetFactory.cxx:107
TMVA::compose_unary_t
Definition: DataSetFactory.h:169
TMVA::DataSetFactory::CreateDataSet
DataSet * CreateDataSet(DataSetInfo &, DataInputHandler &)
steering the creation of a new dataset
Definition: DataSetFactory.cxx:123
TMVA::DataSetFactory::EventStats::TrainTestSplitRequested
Float_t TrainTestSplitRequested
Definition: DataSetFactory.h:211
TMVA::compose_unary_t::compose_unary_t
compose_unary_t(const F &_f, const G &_g)
Definition: DataSetFactory.h:176
kFALSE
const Bool_t kFALSE
Definition: RtypesCore.h:92
TMVA::DataSetFactory::RenormEvents
void RenormEvents(DataSetInfo &dsi, EventVectorOfClassesOfTreeType &eventsmap, const EvtStatsPerClass &eventCounts, const TString &normMode)
renormalisation of the TRAINING event weights
Definition: DataSetFactory.cxx:1459
TMVA::DataSetFactory::MixEvents
DataSet * MixEvents(DataSetInfo &dsi, EventVectorOfClassesOfTreeType &eventsmap, EvtStatsPerClass &eventCounts, const TString &splitMode, const TString &mixMode, const TString &normMode, UInt_t splitSeed)
Select and distribute unassigned events to kTraining and kTesting.
Definition: DataSetFactory.cxx:1035
TMVA::DataSet
Definition: DataSet.h:81
TMVA::Increment::operator()
T operator()()
Definition: DataSetFactory.h:112
Event.h
TTreeFormula
Definition: TTreeFormula.h:58
TCut.h
TRandom3.h
TMVA::DataSetFactory::fCurrentTree
TTree * fCurrentTree
Definition: DataSetFactory.h:305
TMVA::DataSetFactory::fLogger
MsgLogger * fLogger
Definition: DataSetFactory.h:316
UInt_t
unsigned int UInt_t
Definition: RtypesCore.h:46
Types.h
TMVA::DataSetFactory::fVerboseLevel
TString fVerboseLevel
Definition: DataSetFactory.h:296
TMVA::DataSetFactory::ValuePerClassOfTreeType
std::map< Types::ETreeType, ValuePerClass > ValuePerClassOfTreeType
Definition: DataSetFactory.h:205
unsigned int
TMVA::DataSetFactory::EventStats::cutScaling
Float_t cutScaling() const
Definition: DataSetFactory.h:232
TMVA::DataSetFactory::DataSetFactory
DataSetFactory()
constructor
Definition: DataSetFactory.cxx:93
Double_t
double Double_t
Definition: RtypesCore.h:59
TMVA::DataSetFactory::CalcCorrelationMatrix
TMatrixD * CalcCorrelationMatrix(DataSet *, const UInt_t classNumber)
computes correlation matrix for variables "theVars" in tree; "theType" defines the required event "ty...
Definition: DataSetFactory.cxx:543
TMVA::MsgLogger
Definition: MsgLogger.h:83
TMVA::DataSetFactory::EventStats::nWeEvAfterCut
Float_t nWeEvAfterCut
Definition: DataSetFactory.h:216
TMVA::DeleteFunctor_t::operator()
DeleteFunctor_t & operator()(const T *p)
Definition: DataSetFactory.h:94
TMVA::DataSetFactory::fInputTableFormulas
std::vector< std::pair< TTreeFormula *, Int_t > > fInputTableFormulas
Definition: DataSetFactory.h:310
TMVA::DataSetFactory::EventStats::nTrainingEventsRequested
Int_t nTrainingEventsRequested
Definition: DataSetFactory.h:209
TMVA::DataSetFactory::Verbose
Bool_t Verbose()
Definition: DataSetFactory.h:290
TObject
Definition: TObject.h:37
ClassDef
#define ClassDef(name, id)
Definition: Rtypes.h:325
TMVA::DataSetFactory::ResetBranchAndEventAddresses
void ResetBranchAndEventAddresses(TTree *)
TMVA::DataSetFactory::EventStats::nEvBeforeCut
Int_t nEvBeforeCut
Definition: DataSetFactory.h:213
TMVA::DataSetFactory::EventStats::nEvAfterCut
Int_t nEvAfterCut
Definition: DataSetFactory.h:214
TMVA::null_t::argument_type
F argument_type
Definition: DataSetFactory.h:125
ROOT::Math::Chebyshev::T
double T(double x)
Definition: ChebyshevPol.h:52
TMVA::DataSetFactory::EventVectorOfTreeType
std::map< Types::ETreeType, EventVector > EventVectorOfTreeType
Definition: DataSetFactory.h:202
TMVA::compose_unary_t::g
const G & g
Definition: DataSetFactory.h:174
TMVA::DataSetFactory::fTargetFormulas
std::vector< TTreeFormula * > fTargetFormulas
input variables expression for arrays
Definition: DataSetFactory.h:311
TMVA::compose_unary
compose_unary_t< F, G > compose_unary(const F &_f, const G &_g)
Definition: DataSetFactory.h:187
TMVA::compose_binary_t::compose_binary_t
compose_binary_t(const F &_f, const G &_g, const H &_h)
Definition: DataSetFactory.h:149
TMVA
create variable transformations
Definition: GeneticMinimizer.h:22
int
TMVA::DataSetFactory::EventVector
std::vector< Event * > EventVector
Definition: DataSetFactory.h:199