Logo ROOT  
Reference Guide
 
Loading...
Searching...
No Matches
DataSet.h
Go to the documentation of this file.
1// @(#)root/tmva $Id$
2// Author: Andreas Hoecker, Peter Speckmayer, Joerg Stelzer, Helge Voss
3
4/**********************************************************************************
5 * Project: TMVA - a Root-integrated toolkit for multivariate data analysis *
6 * Package: TMVA *
7 * Class : DataSet *
8 * *
9 * *
10 * Description: *
11 * Contains all the data information *
12 * *
13 * Authors (alphabetical): *
14 * Andreas Hoecker <Andreas.Hocker@cern.ch> - CERN, Switzerland *
15 * Joerg Stelzer <Joerg.Stelzer@cern.ch> - CERN, Switzerland *
16 * Peter Speckmayer <Peter.Speckmayer@cern.ch> - CERN, Switzerland *
17 * Helge Voss <Helge.Voss@cern.ch> - MPI-K Heidelberg, Germany *
18 * *
19 * Copyright (c) 2006: *
20 * CERN, Switzerland *
21 * U. of Victoria, Canada *
22 * MPI-K Heidelberg, Germany *
23 * *
24 * Redistribution and use in source and binary forms, with or without *
25 * modification, are permitted according to the terms listed in LICENSE *
26 * (see tmva/doc/LICENSE) *
27 **********************************************************************************/
28
29#ifndef ROOT_TMVA_DataSet
30#define ROOT_TMVA_DataSet
31
32//////////////////////////////////////////////////////////////////////////
33// //
34// DataSet //
35// //
36// Class that contains all the data information //
37// //
38//////////////////////////////////////////////////////////////////////////
39
40#include <vector>
41#include <map>
42
43#include "TNamed.h"
44#include "TString.h"
45#include "TTree.h"
46#include "TRandom3.h"
47
48#include "TMVA/Types.h"
49#include "TMVA/VariableInfo.h"
50
51namespace TMVA {
52
53 class Event;
54 class DataSetInfo;
55 class MsgLogger;
56 class Results;
57
58 class DataSet :public TNamed {
59
60 public:
61 DataSet();
62 DataSet(const DataSetInfo&);
63 virtual ~DataSet();
64
66
70
71 // const getters
72 const Event* GetEvent() const; ///< returns event without transformations
73 const Event* GetEvent ( Long64_t ievt ) const { fCurrentEventIdx = ievt; return GetEvent(); } // returns event without transformations
74 const Event* GetTrainingEvent( Long64_t ievt ) const { return GetEvent(ievt, Types::kTraining); }
75 const Event* GetTestEvent ( Long64_t ievt ) const { return GetEvent(ievt, Types::kTesting); }
77 {
79 }
80
81
82
83
84 UInt_t GetNVariables() const;
85 UInt_t GetNTargets() const;
86 UInt_t GetNSpectators() const;
87
88 void SetCurrentEvent( Long64_t ievt ) const { fCurrentEventIdx = ievt; }
91
92 void SetEventCollection( std::vector<Event*>*, Types::ETreeType, Bool_t deleteEvents = true );
93 const std::vector<Event*>& GetEventCollection( Types::ETreeType type = Types::kMaxTreeType ) const;
95
100
102
103 Results* GetResults ( const TString &,
105 Types::EAnalysisType analysistype );
106 void DeleteResults ( const TString &,
108 Types::EAnalysisType analysistype );
110 Types::EAnalysisType analysistype);
111
113
114 // sets the number of blocks to which the training set is divided,
115 // some of which are given to the Validation sample. As default they belong all to Training set.
116 void DivideTrainingSet( UInt_t blockNum );
117
118 // sets a certain block from the origin training set to belong to either Training or Validation set
119 void MoveTrainingBlock( Int_t blockInd,Types::ETreeType dest, Bool_t applyChanges = kTRUE );
120
121 void IncrementNClassEvents( Int_t type, UInt_t classNumber );
122 Long64_t GetNClassEvents ( Int_t type, UInt_t classNumber );
124
126
127 // accessors for random and importance sampling
128 void InitSampling( Float_t fraction, Float_t weight, UInt_t seed = 0 );
129 void EventResult( Bool_t successful, Long64_t evtNumber = -1 );
130 void CreateSampling() const;
131
133
134 private:
135
136 // data members
137 void DestroyCollection( Types::ETreeType type, Bool_t deleteEvents );
138
139 const DataSetInfo *fdsi; ///<-> datasetinfo that created this dataset
140
141 std::vector< std::vector<Event*> > fEventCollection; ///< list of events for training/testing/...
142
143 std::vector< std::map< TString, Results* > > fResults; ///<! [train/test/...][method-identifier]
144
147
148 // event sampling
149 std::vector<Char_t> fSampling; ///< random or importance sampling (not all events are taken) !! Bool_t are stored ( no std::vector<bool> taken for speed (performance) issues )
150 std::vector<Int_t> fSamplingNEvents; ///< number of events which should be sampled
151 std::vector<Float_t> fSamplingWeight; ///< weight change factor [weight is indicating if sampling is random (1.0) or importance (<1.0)]
152 mutable std::vector< std::vector< std::pair< Float_t, Long64_t > > > fSamplingEventList; ///< weights and indices for sampling
153 mutable std::vector< std::vector< std::pair< Float_t, Long64_t > > > fSamplingSelected; ///< selected events
154 TRandom3 *fSamplingRandom; ///<-> random generator for sampling
155
156
157 // further things
158 std::vector< std::vector<Long64_t> > fClassEvents; ///< number of events of class 0,1,2,... in training[0]
159 ///< and testing[1] (+validation, trainingoriginal)
160
161 Bool_t fHasNegativeEventWeights; ///< true if at least one signal or bkg event has negative weight
162
163 mutable MsgLogger* fLogger; ///<! message logger
164 MsgLogger& Log() const { return *fLogger; }
165 std::vector<Char_t> fBlockBelongToTraining; ///< when dividing the dataset to blocks, sets whether
166 ///< the certain block is in the Training set or else
167 ///< in the validation set
168 ///< boolean are stored, taken std::vector<Char_t> for performance reasons (instead of std::vector<Bool_t>)
169 Long64_t fTrainingBlockSize; ///< block size into which the training dataset is divided
170
173 public:
174
176 };
177}
178
179
180//_______________________________________________________________________
182{
183 switch (type) {
185 case Types::kTraining : return 0;
186 case Types::kTesting : return 1;
187 case Types::kValidation : return 2;
188 case Types::kTrainingOriginal : return 3;
189 default : return fCurrentTreeIdx;
190 }
191}
192
193//_______________________________________________________________________
195{
196 switch (fCurrentTreeIdx) {
197 case 0: return Types::kTraining;
198 case 1: return Types::kTesting;
199 case 2: return Types::kValidation;
200 case 3: return Types::kTrainingOriginal;
201 }
202 return Types::kMaxTreeType;
203}
204
205//_______________________________________________________________________
207{
208 Int_t treeIdx = TreeIndex(type);
209 if (fSampling.size() > UInt_t(treeIdx) && fSampling.at(treeIdx)) {
210 return fSamplingSelected.at(treeIdx).size();
211 }
212 return GetEventCollection(type).size();
213}
214
215//_______________________________________________________________________
216inline const std::vector<TMVA::Event*>& TMVA::DataSet::GetEventCollection( TMVA::Types::ETreeType type ) const
217{
218 return fEventCollection.at(TreeIndex(type));
219}
220
221
222#endif
bool Bool_t
Definition RtypesCore.h:63
unsigned int UInt_t
Definition RtypesCore.h:46
float Float_t
Definition RtypesCore.h:57
long long Long64_t
Definition RtypesCore.h:80
constexpr Bool_t kTRUE
Definition RtypesCore.h:100
#define ClassDef(name, id)
Definition Rtypes.h:337
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void char Point_t Rectangle_t dest
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void char Point_t Rectangle_t WindowAttributes_t Float_t Float_t Float_t Int_t Int_t UInt_t UInt_t Rectangle_t Int_t Int_t Window_t TString Int_t GCValues_t GetPrimarySelectionOwner GetDisplay GetScreen GetColormap GetNativeEvent const char const char dpyName wid window const char font_name cursor keysym reg const char only_if_exist regb h Point_t winding char text const char depth char const char Int_t count const char ColorStruct_t color const char Pixmap_t Pixmap_t PictureAttributes_t attr const char char ret_data h unsigned char height h Atom_t Int_t ULong_t ULong_t unsigned char prop_list Atom_t Atom_t Atom_t Time_t type
Class that contains all the data information.
Definition DataSetInfo.h:62
Class that contains all the data information.
Definition DataSet.h:58
void DivideTrainingSet(UInt_t blockNum)
divide training set
Definition DataSet.cxx:371
const DataSetInfo * fdsi
-> datasetinfo that created this dataset
Definition DataSet.h:139
void AddEvent(Event *, Types::ETreeType)
add event to event list after which the event is owned by the dataset
Definition DataSet.cxx:241
Long64_t GetNEvtSigTest()
return number of signal test events in dataset
Definition DataSet.cxx:427
std::vector< Char_t > fSampling
random or importance sampling (not all events are taken) !! Bool_t are stored ( no std::vector<bool> ...
Definition DataSet.h:149
std::vector< std::vector< std::pair< Float_t, Long64_t > > > fSamplingEventList
weights and indices for sampling
Definition DataSet.h:152
std::vector< Float_t > fSamplingWeight
weight change factor [weight is indicating if sampling is random (1.0) or importance (<1....
Definition DataSet.h:151
UInt_t GetNTargets() const
access the number of targets through the datasetinfo
Definition DataSet.cxx:224
void ClearNClassEvents(Int_t type)
Definition DataSet.cxx:160
Long64_t GetNEvtSigTrain()
return number of signal training events in dataset
Definition DataSet.cxx:443
void EventResult(Bool_t successful, Long64_t evtNumber=-1)
increase the importance sampling weight of the event when not successful and decrease it when success...
Definition DataSet.cxx:572
std::vector< std::map< TString, Results * > > fResults
! [train/test/...][method-identifier]
Definition DataSet.h:143
void SetEventCollection(std::vector< Event * > *, Types::ETreeType, Bool_t deleteEvents=true)
Sets the event collection (by DataSetFactory)
Definition DataSet.cxx:250
Long64_t GetNTestEvents() const
Definition DataSet.h:69
TTree * GetTree(Types::ETreeType type)
create the test/trainings tree with all the variables, the weights, the classes, the targets,...
Definition DataSet.cxx:609
const Event * GetEvent() const
returns event without transformations
Definition DataSet.cxx:202
Types::ETreeType GetCurrentType() const
Definition DataSet.h:194
Long64_t GetNEvents(Types::ETreeType type=Types::kMaxTreeType) const
Definition DataSet.h:206
Results * GetResults(const TString &, Types::ETreeType type, Types::EAnalysisType analysistype)
Definition DataSet.cxx:265
Long64_t GetNClassEvents(Int_t type, UInt_t classNumber)
Definition DataSet.cxx:168
Long64_t fCurrentEventIdx
Definition DataSet.h:146
std::vector< Char_t > fBlockBelongToTraining
when dividing the dataset to blocks, sets whether the certain block is in the Training set or else in...
Definition DataSet.h:165
MsgLogger * fLogger
! message logger
Definition DataSet.h:163
Long64_t GetNTrainingEvents() const
Definition DataSet.h:68
UInt_t GetNSpectators() const
access the number of targets through the datasetinfo
Definition DataSet.cxx:232
void MoveTrainingBlock(Int_t blockInd, Types::ETreeType dest, Bool_t applyChanges=kTRUE)
move training block
Definition DataSet.cxx:415
UInt_t GetNVariables() const
access the number of variables through the datasetinfo
Definition DataSet.cxx:216
const Event * GetTestEvent(Long64_t ievt) const
Definition DataSet.h:75
std::vector< Int_t > fSamplingNEvents
number of events which should be sampled
Definition DataSet.h:150
DataSet()
constructor
Definition DataSet.cxx:91
std::vector< std::vector< std::pair< Float_t, Long64_t > > > fSamplingSelected
selected events
Definition DataSet.h:153
virtual ~DataSet()
destructor
Definition DataSet.cxx:123
std::vector< std::vector< Long64_t > > fClassEvents
number of events of class 0,1,2,... in training[0] and testing[1] (+validation, trainingoriginal)
Definition DataSet.h:158
void DeleteAllResults(Types::ETreeType type, Types::EAnalysisType analysistype)
Deletes all results currently in the dataset.
Definition DataSet.cxx:343
MsgLogger & Log() const
Definition DataSet.h:164
void ApplyTrainingBlockDivision()
void InitSampling(Float_t fraction, Float_t weight, UInt_t seed=0)
initialize random or importance sampling
Definition DataSet.cxx:459
const Event * GetEvent(Long64_t ievt, Types::ETreeType type) const
Definition DataSet.h:76
UInt_t TreeIndex(Types::ETreeType type) const
Definition DataSet.h:181
void IncrementNClassEvents(Int_t type, UInt_t classNumber)
Definition DataSet.cxx:151
void DeleteResults(const TString &, Types::ETreeType type, Types::EAnalysisType analysistype)
delete the results stored for this particular Method instance.
Definition DataSet.cxx:316
Bool_t fHasNegativeEventWeights
true if at least one signal or bkg event has negative weight
Definition DataSet.h:161
Long64_t fTrainingBlockSize
block size into which the training dataset is divided
Definition DataSet.h:169
void CreateSampling() const
create an event sampling (random or importance sampling)
Definition DataSet.cxx:508
const TTree * GetEventCollectionAsTree()
std::vector< std::vector< Event * > > fEventCollection
list of events for training/testing/...
Definition DataSet.h:141
void SetCurrentType(Types::ETreeType type) const
Definition DataSet.h:89
TRandom3 * fSamplingRandom
-> random generator for sampling
Definition DataSet.h:154
const Event * GetEvent(Long64_t ievt) const
Definition DataSet.h:73
const std::vector< Event * > & GetEventCollection(Types::ETreeType type=Types::kMaxTreeType) const
Definition DataSet.h:216
void SetVerbose(Bool_t)
Definition DataSet.h:112
void SetCurrentEvent(Long64_t ievt) const
Definition DataSet.h:88
Long64_t GetNEvtBkgdTrain()
return number of background training events in dataset
Definition DataSet.cxx:451
void DestroyCollection(Types::ETreeType type, Bool_t deleteEvents)
destroys the event collection (events + vector)
Definition DataSet.cxx:189
void ApplyTrainingSetDivision()
apply division of data set
Definition DataSet.cxx:395
const Event * GetTrainingEvent(Long64_t ievt) const
Definition DataSet.h:74
Bool_t HasNegativeEventWeights() const
Definition DataSet.h:101
UInt_t fCurrentTreeIdx
Definition DataSet.h:145
Long64_t GetNEvtBkgdTest()
return number of background test events in dataset
Definition DataSet.cxx:435
ostringstream derivative to redirect and format output
Definition MsgLogger.h:57
Class that is the base-class for a vector of result.
Definition Results.h:57
@ kMaxTreeType
also used as temporary storage for trees not yet assigned for testing;training...
Definition Types.h:145
@ kTrainingOriginal
ever needed
Definition Types.h:147
@ kTraining
Definition Types.h:143
@ kValidation
these are placeholders... currently not used, but could be moved "forward" if
Definition Types.h:146
The TNamed class is the base class for all named ROOT classes.
Definition TNamed.h:29
Random number generator class based on M.
Definition TRandom3.h:27
Basic string class.
Definition TString.h:139
A TTree represents a columnar dataset.
Definition TTree.h:79
create variable transformations