ROOT  6.07/01
Reference Guide
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Properties Friends Macros Groups Pages
DataSetFactory.h
Go to the documentation of this file.
1 // @(#)root/tmva $Id$
2 // Author: Andreas Hoecker, Peter Speckmayer, Joerg Stelzer, Eckhard von Toerne, Helge Voss
3 
4 /**********************************************************************************
5  * Project: TMVA - a Root-integrated toolkit for multivariate data analysis *
6  * Package: TMVA *
7  * Class : DataSetFactory *
8  * Web : http://tmva.sourceforge.net *
9  * *
10  * Description: *
11  * Contains all the data information *
12  * *
13  * Authors (alphabetical): *
14  * Andreas Hoecker <Andreas.Hocker@cern.ch> - CERN, Switzerland *
15  * Joerg Stelzer <Joerg.Stelzer@cern.ch> - CERN, Switzerland *
16  * Peter Speckmayer <Peter.Speckmayer@cern.ch> - CERN, Switzerland *
17  * Eckhard von Toerne <evt@physik.uni-bonn.de> - U. of Bonn, Germany *
18  * Helge Voss <Helge.Voss@cern.ch> - MPI-K Heidelberg, Germany *
19  * *
20  * Copyright (c) 2006: *
21  * CERN, Switzerland *
22  * MPI-K Heidelberg, Germany *
23  * *
24  * Redistribution and use in source and binary forms, with or without *
25  * modification, are permitted according to the terms listed in LICENSE *
26  * (http://tmva.sourceforge.net/LICENSE) *
27  **********************************************************************************/
28 
29 #ifndef ROOT_TMVA_DataSetFactory
30 #define ROOT_TMVA_DataSetFactory
31 
32 //////////////////////////////////////////////////////////////////////////
33 // //
34 // DataSetFactory //
35 // //
36 // Class that contains all the data information //
37 // //
38 //////////////////////////////////////////////////////////////////////////
39 
40 #include <vector>
41 #include <stdlib.h>
42 
43 #ifndef ROOT_TString
44 #include "TString.h"
45 #endif
46 #ifndef ROOT_TTree
47 #include "TTree.h"
48 #endif
49 #ifndef ROOT_TCut
50 #include "TCut.h"
51 #endif
52 #ifndef ROOT_TTreeFormula
53 #include "TTreeFormula.h"
54 #endif
55 #ifndef ROOT_TMatrixDfwd
56 #include "TMatrixDfwd.h"
57 #endif
58 #ifndef ROOT_TPrincipal
59 #include "TPrincipal.h"
60 #endif
61 #ifndef ROOT_TRandom3
62 #include "TRandom3.h"
63 #endif
64 
65 #ifndef ROOT_TMVA_Types
66 #include "TMVA/Types.h"
67 #endif
68 #ifndef ROOT_TMVA_VariableInfo
69 #include "TMVA/VariableInfo.h"
70 #endif
71 #ifndef ROOT_TMVA_Event
72 #include "TMVA/Event.h"
73 #endif
74 
75 namespace TMVA {
76 
77  class DataSet;
78  class DataSetInfo;
79  class DataInputHandler;
80  class TreeInfo;
81  class MsgLogger;
82 
83  // =============== maybe move these elswhere (e.g. into the tools )
84 
85  // =============== functors =======================
86 
87 
89  public:
91  fRandom.SetSeed( seed );
92  }
94  return fRandom.Integer(n);
95  }
96  private:
97  TRandom3 fRandom; // random generator
98  };
99 
100 
101  // delete-functor (to be used in e.g. for_each algorithm)
102  template<class T>
104  {
106  delete p;
107  return *this;
108  }
109  };
110 
111  template<class T>
113  {
114  return DeleteFunctor_t<const T>();
115  }
116 
117 
118  template< typename T >
119  class Increment {
121  public:
122  Increment( T start ) : value( start ){ }
124  return value++;
125  }
126  };
127 
128 
129 
130  template <typename F>
131  class null_t
132  {
133  private:
134  // returns argF
135  public:
136  typedef F argument_type;
137  F operator()(const F& argF) const
138  {
139  return argF;
140  }
141  };
142 
143  template <typename F>
144  inline null_t<F> null() {
145  return null_t<F>();
146  }
147 
148 
149 
150  template <typename F, typename G, typename H>
151  class compose_binary_t : public std::binary_function<typename G::argument_type,
152  typename H::argument_type,
153  typename F::result_type>
154  {
155  private:
156  const F& f; // f(g(argG),h(argH))
157  const G& g;
158  const H& h;
159  public:
160  compose_binary_t(const F& _f, const G& _g, const H& _h) : f(_f), g(_g), h(_h)
161  {
162  }
163 
164  typename F::result_type operator()(const typename G::argument_type& argG,
165  const typename H::argument_type& argH) const
166  {
167  return f(g(argG),h(argH));
168  }
169  };
170 
171  template <typename F, typename G, typename H>
172  inline compose_binary_t<F,G,H> compose_binary(const F& _f, const G& _g, const H& _h) {
173  return compose_binary_t<F,G,H>(_f,_g,_h);
174  }
175 
176 
177 
178 
179  template <typename F, typename G>
180  class compose_unary_t : public std::unary_function<typename G::argument_type,
181  typename F::result_type>
182  {
183  private:
184  const F& f; // f(g(argG))
185  const G& g;
186  public:
187  compose_unary_t(const F& _f, const G& _g) : f(_f), g(_g)
188  {
189  }
190 
191  typename F::result_type operator()(const typename G::argument_type& argG) const
192  {
193  return f(g(argG));
194  }
195  };
196 
197  template <typename F, typename G>
198  inline compose_unary_t<F,G> compose_unary(const F& _f, const G& _g) {
199  return compose_unary_t<F,G>(_f,_g);
200  }
201 
202  // =============== functors =======================
203 
204 
205  // =========================================================
206 
207 
209 
210  typedef std::vector<Event* > EventVector;
211  typedef std::vector< EventVector > EventVectorOfClasses;
212  typedef std::map<Types::ETreeType, EventVectorOfClasses > EventVectorOfClassesOfTreeType;
213  typedef std::map<Types::ETreeType, EventVector > EventVectorOfTreeType;
214 
215  typedef std::vector< Double_t > ValuePerClass;
216  typedef std::map<Types::ETreeType, ValuePerClass > ValuePerClassOfTreeType;
217 
218  class EventStats {
219  public:
232  nInitialEvents(0),
233  nEvBeforeCut(0),
234  nEvAfterCut(0),
235  nWeEvBeforeCut(0),
236  nWeEvAfterCut(0),
237  nNegWeights(0),
238  varAvLength(0)
239  {}
240  ~EventStats() { delete[] varAvLength; }
242  };
243 
244  typedef std::vector< int > NumberPerClass;
245  typedef std::vector< EventStats > EvtStatsPerClass;
246 
247  public:
248 
249 
250 
251  // singleton class
253  static void destroyInstance() { if (fgInstance) { delete fgInstance; fgInstance=0; } }
254 
256 
257  static DataSetFactory* NewInstance() { return new DataSetFactory(); }
258  static void destroyNewInstance(DataSetFactory* iOther) { delete iOther;}
259  protected:
260 
261  ~DataSetFactory();
262 
263  DataSetFactory();
265 
268 
269  // ---------- new versions
270  void BuildEventVector ( DataSetInfo& dsi,
271  DataInputHandler& dataInput,
273  EvtStatsPerClass& eventCounts);
274 
275  DataSet* MixEvents ( DataSetInfo& dsi,
277  EvtStatsPerClass& eventCounts,
278  const TString& splitMode,
279  const TString& mixMode,
280  const TString& normMode,
281  UInt_t splitSeed);
282 
283  void RenormEvents ( DataSetInfo& dsi,
285  const EvtStatsPerClass& eventCounts,
286  const TString& normMode );
287 
288  void InitOptions ( DataSetInfo& dsi,
289  EvtStatsPerClass& eventsmap,
290  TString& normMode, UInt_t& splitSeed,
291  TString& splitMode, TString& mixMode );
292 
293 
294  // ------------------------
295 
296  // auxiliary functions to compute correlations
297  TMatrixD* CalcCorrelationMatrix( DataSet*, const UInt_t classNumber );
298  TMatrixD* CalcCovarianceMatrix ( DataSet*, const UInt_t classNumber );
299  void CalcMinMax ( DataSet*, DataSetInfo& dsi );
300 
301  // resets branch addresses to current event
304  void ChangeToNewTree( TreeInfo&, const DataSetInfo & );
305  Bool_t CheckTTreeFormula( TTreeFormula* ttf, const TString& expression, Bool_t& hasDollar );
306 
307  // verbosity
308  Bool_t Verbose() { return fVerbose; }
309 
310  // data members
311 
312  // verbosity
313  Bool_t fVerbose; //! Verbosity
314  TString fVerboseLevel; //! VerboseLevel
315 
316  Bool_t fScaleWithPreselEff; //! how to deal with requested #events in connection with preselection cuts
317 
318  // the event
319  TTree* fCurrentTree; //! the tree, events are currently read from
320  UInt_t fCurrentEvtIdx; //! the current event (to avoid reading of the same event)
321 
322  // the formulas for reading the original tree
323  std::vector<TTreeFormula*> fInputFormulas; //! input variables
324  std::vector<TTreeFormula*> fTargetFormulas; //! targets
325  std::vector<TTreeFormula*> fCutFormulas; //! cuts
326  std::vector<TTreeFormula*> fWeightFormula; //! weights
327  std::vector<TTreeFormula*> fSpectatorFormulas; //! spectators
328 
329  MsgLogger* fLogger; //! message logger
330  MsgLogger& Log() const { return *fLogger; }
331  };
332 }
333 
334 #endif
void ResetBranchAndEventAddresses(TTree *)
std::vector< EventVector > EventVectorOfClasses
Random number generator class based on M.
Definition: TRandom3.h:29
static void destroyInstance()
UInt_t fCurrentEvtIdx
the tree, events are currently read from
std::vector< TTreeFormula * > fInputFormulas
the current event (to avoid reading of the same event)
float Float_t
Definition: RtypesCore.h:53
std::vector< TTreeFormula * > fCutFormulas
targets
std::vector< Double_t > ValuePerClass
F operator()(const F &argF) const
#define H(x, y, z)
compose_unary_t< F, G > compose_unary(const F &_f, const G &_g)
F::result_type operator()(const typename G::argument_type &argG, const typename H::argument_type &argH) const
Basic string class.
Definition: TString.h:137
void BuildEventVector(DataSetInfo &dsi, DataInputHandler &dataInput, EventVectorOfClassesOfTreeType &eventsmap, EvtStatsPerClass &eventCounts)
build empty event vectors distributes events between kTraining/kTesting/kMaxTreeType ...
int Int_t
Definition: RtypesCore.h:41
bool Bool_t
Definition: RtypesCore.h:59
compose_unary_t(const F &_f, const G &_g)
TTree * fCurrentTree
how to deal with requested #events in connection with preselection cuts
void CalcMinMax(DataSet *, DataSetInfo &dsi)
compute covariance matrix
#define G(x, y, z)
DeleteFunctor_t & operator()(const T *p)
compose_binary_t(const F &_f, const G &_g, const H &_h)
std::vector< int > NumberPerClass
TTree * T
std::map< Types::ETreeType, EventVectorOfClasses > EventVectorOfClassesOfTreeType
null_t< F > null()
void InitOptions(DataSetInfo &dsi, EvtStatsPerClass &eventsmap, TString &normMode, UInt_t &splitSeed, TString &splitMode, TString &mixMode)
the dataset splitting
DataSet * BuildDynamicDataSet(DataSetInfo &)
std::vector< TTreeFormula * > fWeightFormula
cuts
F::result_type operator()(const typename G::argument_type &argG) const
TString fVerboseLevel
Verbosity.
virtual UInt_t Integer(UInt_t imax)
Returns a random integer on [ 0, imax-1 ].
Definition: TRandom.cxx:320
MsgLogger & Log() const
message logger
void RenormEvents(DataSetInfo &dsi, EventVectorOfClassesOfTreeType &eventsmap, const EvtStatsPerClass &eventCounts, const TString &normMode)
renormalisation of the TRAINING event weights -none (kind of obvious) .
DeleteFunctor_t< const T > DeleteFunctor()
Used to pass a selection expression to the Tree drawing routine.
Definition: TTreeFormula.h:64
void ChangeToNewTree(TreeInfo &, const DataSetInfo &)
While the data gets copied into the local training and testing trees, the input tree can change (for ...
#define F(x, y, z)
DataSetFactory()
constructor
TMatrixD * CalcCorrelationMatrix(DataSet *, const UInt_t classNumber)
computes correlation matrix for variables "theVars" in tree; "theType" defines the required event "ty...
Bool_t fScaleWithPreselEff
VerboseLevel.
virtual void SetSeed(UInt_t seed=0)
Set the random generator sequence if seed is 0 (default value) a TUUID is generated and used to fill ...
Definition: TRandom3.cxx:201
std::vector< TTreeFormula * > fSpectatorFormulas
weights
DataSet * CreateDataSet(DataSetInfo &, DataInputHandler &)
steering the creation of a new dataset
unsigned int UInt_t
Definition: RtypesCore.h:42
UInt_t operator()(UInt_t n)
std::vector< TTreeFormula * > fTargetFormulas
input variables
std::vector< Event * > EventVector
static DataSetFactory * fgInstance
DataSet * BuildInitialDataSet(DataSetInfo &, TMVA::DataInputHandler &)
if no entries, than create a DataSet with one Event which uses dynamic variables (pointers to variabl...
~DataSetFactory()
destructor
RandomGenerator(UInt_t seed)
double Double_t
Definition: RtypesCore.h:55
MsgLogger * fLogger
spectators
compose_binary_t< F, G, H > compose_binary(const F &_f, const G &_g, const H &_h)
std::map< Types::ETreeType, EventVector > EventVectorOfTreeType
static DataSetFactory * NewInstance()
std::vector< EventStats > EvtStatsPerClass
static DataSetFactory & Instance()
Bool_t CheckTTreeFormula(TTreeFormula *ttf, const TString &expression, Bool_t &hasDollar)
checks a TTreeFormula for problems
A TTree object has a header with a name and a title.
Definition: TTree.h:98
DataSet * MixEvents(DataSetInfo &dsi, EventVectorOfClassesOfTreeType &eventsmap, EvtStatsPerClass &eventCounts, const TString &splitMode, const TString &mixMode, const TString &normMode, UInt_t splitSeed)
Select and distribute unassigned events to kTraining and kTesting Bool_t emptyUndefined = kTRUE;...
std::map< Types::ETreeType, ValuePerClass > ValuePerClassOfTreeType
const Int_t n
Definition: legend1.C:16
TMatrixD * CalcCovarianceMatrix(DataSet *, const UInt_t classNumber)
compute covariance matrix
static void destroyNewInstance(DataSetFactory *iOther)