Logo ROOT  
Reference Guide
DataSet.h
Go to the documentation of this file.
1 // @(#)root/tmva $Id$
2 // Author: Andreas Hoecker, Peter Speckmayer, Joerg Stelzer, Helge Voss
3 
4 /**********************************************************************************
5  * Project: TMVA - a Root-integrated toolkit for multivariate data analysis *
6  * Package: TMVA *
7  * Class : DataSet *
8  * Web : http://tmva.sourceforge.net *
9  * *
10  * Description: *
11  * Contains all the data information *
12  * *
13  * Authors (alphabetical): *
14  * Andreas Hoecker <Andreas.Hocker@cern.ch> - CERN, Switzerland *
15  * Joerg Stelzer <Joerg.Stelzer@cern.ch> - CERN, Switzerland *
16  * Peter Speckmayer <Peter.Speckmayer@cern.ch> - CERN, Switzerland *
17  * Helge Voss <Helge.Voss@cern.ch> - MPI-K Heidelberg, Germany *
18  * *
19  * Copyright (c) 2006: *
20  * CERN, Switzerland *
21  * U. of Victoria, Canada *
22  * MPI-K Heidelberg, Germany *
23  * *
24  * Redistribution and use in source and binary forms, with or without *
25  * modification, are permitted according to the terms listed in LICENSE *
26  * (http://tmva.sourceforge.net/LICENSE) *
27  **********************************************************************************/
28 
29 #ifndef ROOT_TMVA_DataSet
30 #define ROOT_TMVA_DataSet
31 
32 //////////////////////////////////////////////////////////////////////////
33 // //
34 // DataSet //
35 // //
36 // Class that contains all the data information //
37 // //
38 //////////////////////////////////////////////////////////////////////////
39 
40 #include <vector>
41 #include <map>
42 
43 #include "TNamed.h"
44 #include "TString.h"
45 #include "TTree.h"
46 #include "TRandom3.h"
47 
48 #include "TMVA/Types.h"
49 #include "TMVA/VariableInfo.h"
50 
51 namespace TMVA {
52 
53  class Event;
54  class DataSetInfo;
55  class MsgLogger;
56  class Results;
57 
58  class DataSet :public TNamed {
59 
60  public:
61  DataSet();
62  DataSet(const DataSetInfo&);
63  virtual ~DataSet();
64 
65  void AddEvent( Event *, Types::ETreeType );
66 
70 
71  // const getters
72  const Event* GetEvent() const; // returns event without transformations
73  const Event* GetEvent ( Long64_t ievt ) const { fCurrentEventIdx = ievt; return GetEvent(); } // returns event without transformations
74  const Event* GetTrainingEvent( Long64_t ievt ) const { return GetEvent(ievt, Types::kTraining); }
75  const Event* GetTestEvent ( Long64_t ievt ) const { return GetEvent(ievt, Types::kTesting); }
76  const Event* GetEvent ( Long64_t ievt, Types::ETreeType type ) const
77  {
79  }
80 
81 
82 
83 
84  UInt_t GetNVariables() const;
85  UInt_t GetNTargets() const;
86  UInt_t GetNSpectators() const;
87 
88  void SetCurrentEvent( Long64_t ievt ) const { fCurrentEventIdx = ievt; }
91 
92  void SetEventCollection( std::vector<Event*>*, Types::ETreeType, Bool_t deleteEvents = true );
93  const std::vector<Event*>& GetEventCollection( Types::ETreeType type = Types::kMaxTreeType ) const;
95 
100 
102 
103  Results* GetResults ( const TString &,
105  Types::EAnalysisType analysistype );
106  void DeleteResults ( const TString &,
108  Types::EAnalysisType analysistype );
110  Types::EAnalysisType analysistype);
111 
112  void SetVerbose( Bool_t ) {}
113 
114  // sets the number of blocks to which the training set is divided,
115  // some of which are given to the Validation sample. As default they belong all to Training set.
116  void DivideTrainingSet( UInt_t blockNum );
117 
118  // sets a certrain block from the origin training set to belong to either Training or Validation set
119  void MoveTrainingBlock( Int_t blockInd,Types::ETreeType dest, Bool_t applyChanges = kTRUE );
120 
121  void IncrementNClassEvents( Int_t type, UInt_t classNumber );
122  Long64_t GetNClassEvents ( Int_t type, UInt_t classNumber );
123  void ClearNClassEvents ( Int_t type );
124 
126 
127  // accessors for random and importance sampling
128  void InitSampling( Float_t fraction, Float_t weight, UInt_t seed = 0 );
129  void EventResult( Bool_t successful, Long64_t evtNumber = -1 );
130  void CreateSampling() const;
131 
133 
134  private:
135 
136  // data members
137  void DestroyCollection( Types::ETreeType type, Bool_t deleteEvents );
138 
139  const DataSetInfo *fdsi; //-> datasetinfo that created this dataset
140 
141  std::vector< std::vector<Event*> > fEventCollection; // list of events for training/testing/...
142 
143  std::vector< std::map< TString, Results* > > fResults; //! [train/test/...][method-identifier]
144 
145  mutable UInt_t fCurrentTreeIdx;
146  mutable Long64_t fCurrentEventIdx;
147 
148  // event sampling
149  std::vector<Char_t> fSampling; // random or importance sampling (not all events are taken) !! Bool_t are stored ( no std::vector<bool> taken for speed (performance) issues )
150  std::vector<Int_t> fSamplingNEvents; // number of events which should be sampled
151  std::vector<Float_t> fSamplingWeight; // weight change factor [weight is indicating if sampling is random (1.0) or importance (<1.0)]
152  mutable std::vector< std::vector< std::pair< Float_t, Long64_t > > > fSamplingEventList; // weights and indices for sampling
153  mutable std::vector< std::vector< std::pair< Float_t, Long64_t > > > fSamplingSelected; // selected events
154  TRandom3 *fSamplingRandom; //-> random generator for sampling
155 
156 
157  // further things
158  std::vector< std::vector<Long64_t> > fClassEvents; // number of events of class 0,1,2,... in training[0]
159  // and testing[1] (+validation, trainingoriginal)
160 
161  Bool_t fHasNegativeEventWeights; // true if at least one signal or bkg event has negative weight
162 
163  mutable MsgLogger* fLogger; //! message logger
164  MsgLogger& Log() const { return *fLogger; }
165  std::vector<Char_t> fBlockBelongToTraining; // when dividing the dataset to blocks, sets whether
166  // the certain block is in the Training set or else
167  // in the validation set
168  // boolean are stored, taken std::vector<Char_t> for performance reasons (instead of std::vector<Bool_t>)
169  Long64_t fTrainingBlockSize; // block size into which the training dataset is divided
170 
173  public:
174 
176  };
177 }
178 
179 
180 //_______________________________________________________________________
182 {
183  switch (type) {
185  case Types::kTraining : return 0;
186  case Types::kTesting : return 1;
187  case Types::kValidation : return 2;
188  case Types::kTrainingOriginal : return 3;
189  default : return fCurrentTreeIdx;
190  }
191 }
192 
193 //_______________________________________________________________________
195 {
196  switch (fCurrentTreeIdx) {
197  case 0: return Types::kTraining;
198  case 1: return Types::kTesting;
199  case 2: return Types::kValidation;
200  case 3: return Types::kTrainingOriginal;
201  }
202  return Types::kMaxTreeType;
203 }
204 
205 //_______________________________________________________________________
207 {
208  Int_t treeIdx = TreeIndex(type);
209  if (fSampling.size() > UInt_t(treeIdx) && fSampling.at(treeIdx)) {
210  return fSamplingSelected.at(treeIdx).size();
211  }
212  return GetEventCollection(type).size();
213 }
214 
215 //_______________________________________________________________________
216 inline const std::vector<TMVA::Event*>& TMVA::DataSet::GetEventCollection( TMVA::Types::ETreeType type ) const
217 {
218  return fEventCollection.at(TreeIndex(type));
219 }
220 
221 
222 #endif
TMVA::DataSet::TreeIndex
UInt_t TreeIndex(Types::ETreeType type) const
Definition: DataSet.h:181
TMVA::DataSet::GetNVariables
UInt_t GetNVariables() const
access the number of variables through the datasetinfo
Definition: DataSet.cxx:216
TMVA::DataSet::fCurrentEventIdx
Long64_t fCurrentEventIdx
Definition: DataSet.h:169
kTRUE
const Bool_t kTRUE
Definition: RtypesCore.h:91
TMVA::DataSet::GetCurrentType
Types::ETreeType GetCurrentType() const
Definition: DataSet.h:194
TMVA::DataSet::IncrementNClassEvents
void IncrementNClassEvents(Int_t type, UInt_t classNumber)
Definition: DataSet.cxx:151
dest
#define dest(otri, vertexptr)
Definition: triangle.c:1040
TMVA::DataSet::fClassEvents
std::vector< std::vector< Long64_t > > fClassEvents
Definition: DataSet.h:181
TMVA::DataSet::EventResult
void EventResult(Bool_t successful, Long64_t evtNumber=-1)
increase the importance sampling weight of the event when not successful and decrease it when success...
Definition: DataSet.cxx:572
TMVA::DataSet::fLogger
MsgLogger * fLogger
Definition: DataSet.h:186
TMVA::DataSet::fCurrentTreeIdx
UInt_t fCurrentTreeIdx
[train/test/...][method-identifier]
Definition: DataSet.h:168
TMVA::DataSet::GetNTargets
UInt_t GetNTargets() const
access the number of targets through the datasetinfo
Definition: DataSet.cxx:224
TMVA::DataSet::SetCurrentType
void SetCurrentType(Types::ETreeType type) const
Definition: DataSet.h:112
TNamed.h
Long64_t
long long Long64_t
Definition: RtypesCore.h:73
TMVA::Types::kTesting
@ kTesting
Definition: Types.h:168
TTree
Definition: TTree.h:79
TMVA::DataSet::fdsi
const DataSetInfo * fdsi
Definition: DataSet.h:162
Float_t
float Float_t
Definition: RtypesCore.h:57
VariableInfo.h
TMVA::DataSet::fHasNegativeEventWeights
Bool_t fHasNegativeEventWeights
Definition: DataSet.h:184
TTree.h
TString
Definition: TString.h:136
Bool_t
bool Bool_t
Definition: RtypesCore.h:63
TMVA::DataSet::fSamplingNEvents
std::vector< Int_t > fSamplingNEvents
Definition: DataSet.h:173
TMVA::DataSet::fSampling
std::vector< Char_t > fSampling
Definition: DataSet.h:172
TMVA::DataSet::fEventCollection
std::vector< std::vector< Event * > > fEventCollection
Definition: DataSet.h:164
TString.h
bool
TMVA::DataSet::DestroyCollection
void DestroyCollection(Types::ETreeType type, Bool_t deleteEvents)
destroys the event collection (events + vector)
Definition: DataSet.cxx:189
TMVA::DataSet::SetEventCollection
void SetEventCollection(std::vector< Event * > *, Types::ETreeType, Bool_t deleteEvents=true)
Sets the event collection (by DataSetFactory)
Definition: DataSet.cxx:250
TMVA::DataSet::GetNEvtBkgdTest
Long64_t GetNEvtBkgdTest()
return number of background test events in dataset
Definition: DataSet.cxx:435
TMVA::DataSet::GetNClassEvents
Long64_t GetNClassEvents(Int_t type, UInt_t classNumber)
Definition: DataSet.cxx:168
TMVA::DataSetInfo
Definition: DataSetInfo.h:62
TMVA::DataSet::CreateSampling
void CreateSampling() const
create an event sampling (random or importance sampling)
Definition: DataSet.cxx:508
TMVA::DataSet::GetEventCollection
const std::vector< Event * > & GetEventCollection(Types::ETreeType type=Types::kMaxTreeType) const
Definition: DataSet.h:216
TMVA::DataSet::fSamplingRandom
TRandom3 * fSamplingRandom
Definition: DataSet.h:177
TMVA::DataSet::GetNSpectators
UInt_t GetNSpectators() const
access the number of targets through the datasetinfo
Definition: DataSet.cxx:232
TMVA::DataSet::GetNEvtSigTrain
Long64_t GetNEvtSigTrain()
return number of signal training events in dataset
Definition: DataSet.cxx:443
TMVA::DataSet::Log
MsgLogger & Log() const
message logger
Definition: DataSet.h:187
TRandom3
Definition: TRandom3.h:27
TMVA::DataSet::fSamplingSelected
std::vector< std::vector< std::pair< Float_t, Long64_t > > > fSamplingSelected
Definition: DataSet.h:176
TMVA::Types::EAnalysisType
EAnalysisType
Definition: Types.h:150
TMVA::DataSet::GetNTestEvents
Long64_t GetNTestEvents() const
Definition: DataSet.h:92
TMVA::DataSet::GetTree
TTree * GetTree(Types::ETreeType type)
create the test/trainings tree with all the variables, the weights, the classes, the targets,...
Definition: DataSet.cxx:609
TMVA::DataSet::DeleteAllResults
void DeleteAllResults(Types::ETreeType type, Types::EAnalysisType analysistype)
Deletes all results currently in the dataset.
Definition: DataSet.cxx:343
TMVA::Types::ETreeType
ETreeType
Definition: Types.h:166
TMVA::DataSet::ApplyTrainingBlockDivision
void ApplyTrainingBlockDivision()
TMVA::DataSet::GetEvent
const Event * GetEvent() const
Definition: DataSet.cxx:202
TMVA::DataSet::GetNEvents
Long64_t GetNEvents(Types::ETreeType type=Types::kMaxTreeType) const
Definition: DataSet.h:206
TNamed
Definition: TNamed.h:29
TMVA::DataSet::fSamplingEventList
std::vector< std::vector< std::pair< Float_t, Long64_t > > > fSamplingEventList
Definition: DataSet.h:175
TMVA::DataSet::GetNEvtBkgdTrain
Long64_t GetNEvtBkgdTrain()
return number of background training events in dataset
Definition: DataSet.cxx:451
TMVA::DataSet::GetEventCollectionAsTree
const TTree * GetEventCollectionAsTree()
TMVA::DataSet::fSamplingWeight
std::vector< Float_t > fSamplingWeight
Definition: DataSet.h:174
TMVA::DataSet
Definition: DataSet.h:81
TRandom3.h
UInt_t
unsigned int UInt_t
Definition: RtypesCore.h:46
Types.h
TMVA::Results
Definition: Results.h:57
TMVA::Types::kMaxTreeType
@ kMaxTreeType
Definition: Types.h:169
unsigned int
TMVA::Types::kTraining
@ kTraining
Definition: Types.h:167
TMVA::DataSet::GetNEvtSigTest
Long64_t GetNEvtSigTest()
return number of signal test events in dataset
Definition: DataSet.cxx:427
TMVA::DataSet::~DataSet
virtual ~DataSet()
destructor
Definition: DataSet.cxx:123
TMVA::DataSet::InitSampling
void InitSampling(Float_t fraction, Float_t weight, UInt_t seed=0)
initialize random or importance sampling
Definition: DataSet.cxx:459
TMVA::DataSet::MoveTrainingBlock
void MoveTrainingBlock(Int_t blockInd, Types::ETreeType dest, Bool_t applyChanges=kTRUE)
move training block
Definition: DataSet.cxx:415
TMVA::DataSet::GetTestEvent
const Event * GetTestEvent(Long64_t ievt) const
Definition: DataSet.h:98
TMVA::DataSet::GetTrainingEvent
const Event * GetTrainingEvent(Long64_t ievt) const
Definition: DataSet.h:97
TMVA::DataSet::SetCurrentEvent
void SetCurrentEvent(Long64_t ievt) const
Definition: DataSet.h:111
TMVA::DataSet::ApplyTrainingSetDivision
void ApplyTrainingSetDivision()
apply division of data set
Definition: DataSet.cxx:395
TMVA::Types::kValidation
@ kValidation
Definition: Types.h:170
TMVA::MsgLogger
Definition: MsgLogger.h:83
TMVA::DataSet::DivideTrainingSet
void DivideTrainingSet(UInt_t blockNum)
divide training set
Definition: DataSet.cxx:371
TMVA::DataSet::fTrainingBlockSize
Long64_t fTrainingBlockSize
Definition: DataSet.h:192
TMVA::DataSet::AddEvent
void AddEvent(Event *, Types::ETreeType)
add event to event list after which the event is owned by the dataset
Definition: DataSet.cxx:241
TMVA::DataSet::fResults
std::vector< std::map< TString, Results * > > fResults
Definition: DataSet.h:166
TMVA::DataSet::GetResults
Results * GetResults(const TString &, Types::ETreeType type, Types::EAnalysisType analysistype)
Definition: DataSet.cxx:265
TMVA::DataSet::fBlockBelongToTraining
std::vector< Char_t > fBlockBelongToTraining
Definition: DataSet.h:188
ClassDef
#define ClassDef(name, id)
Definition: Rtypes.h:325
TMVA::DataSet::HasNegativeEventWeights
Bool_t HasNegativeEventWeights() const
Definition: DataSet.h:124
TMVA::Types::kTrainingOriginal
@ kTrainingOriginal
Definition: Types.h:171
TMVA::DataSet::GetNTrainingEvents
Long64_t GetNTrainingEvents() const
Definition: DataSet.h:91
TMVA::DataSet::DataSet
DataSet()
constructor
Definition: DataSet.cxx:91
TMVA::DataSet::SetVerbose
void SetVerbose(Bool_t)
Definition: DataSet.h:135
type
int type
Definition: TGX11.cxx:121
TMVA::DataSet::ClearNClassEvents
void ClearNClassEvents(Int_t type)
Definition: DataSet.cxx:160
TMVA::DataSet::DeleteResults
void DeleteResults(const TString &, Types::ETreeType type, Types::EAnalysisType analysistype)
delete the results stored for this particular Method instance.
Definition: DataSet.cxx:316
TMVA
create variable transformations
Definition: GeneticMinimizer.h:22
int