Logo ROOT  
Reference Guide
 
Loading...
Searching...
No Matches
DataSetInfo.h
Go to the documentation of this file.
1// // @(#)root/tmva $Id$
2// Author: Andreas Hoecker, Peter Speckmayer, Joerg Stelzer, Helge Voss
3
4/**********************************************************************************
5 * Project: TMVA - a Root-integrated toolkit for multivariate data analysis *
6 * Package: TMVA *
7 * Class : DataSetInfo *
8 * Web : http://tmva.sourceforge.net *
9 * *
10 * Description: *
11 * Contains all the data information *
12 * *
13 * Authors (alphabetical): *
14 * Peter Speckmayer <speckmay@mail.cern.ch> - CERN, Switzerland *
15 * Joerg Stelzer <Joerg.Stelzer@cern.ch> - DESY, Germany *
16 * *
17 * Copyright (c) 2008-2011: *
18 * CERN, Switzerland *
19 * MPI-K Heidelberg, Germany *
20 * DESY Hamburg, Germany *
21 * *
22 * Redistribution and use in source and binary forms, with or without *
23 * modification, are permitted according to the terms listed in LICENSE *
24 * (http://tmva.sourceforge.net/LICENSE) *
25 **********************************************************************************/
26
27#ifndef ROOT_TMVA_DataSetInfo
28#define ROOT_TMVA_DataSetInfo
29
30//////////////////////////////////////////////////////////////////////////
31// //
32// DataSetInfo //
33// //
34// Class that contains all the data information //
35// //
36//////////////////////////////////////////////////////////////////////////
37
38#include <iosfwd>
39#include <vector>
40#include <map>
41
42#include "TObject.h"
43#include "TString.h"
44#include "TTree.h"
45#include "TCut.h"
46#include "TMatrixDfwd.h"
47
48#include "TMVA/Types.h"
49#include "TMVA/VariableInfo.h"
50#include "TMVA/ClassInfo.h"
51#include "TMVA/Event.h"
52
53class TH2;
54
55namespace TMVA {
56
57 class DataSet;
58 class VariableTransformBase;
59 class MsgLogger;
60 class DataSetManager;
61
62 class DataSetInfo : public TObject {
63
64 public:
65
66 enum { kIsArrayVariable = BIT(15) };
67
68 DataSetInfo(const TString& name = "Default");
69 virtual ~DataSetInfo();
70
71 virtual const char* GetName() const { return fName.Data(); }
72
73 // the data set
74 void ClearDataSet() const;
75 DataSet* GetDataSet() const;
76
77 // ---
78 // the variable data
79 // ---
80 VariableInfo& AddVariable( const TString& expression, const TString& title = "", const TString& unit = "",
81 Double_t min = 0, Double_t max = 0, char varType='F',
82 Bool_t normalized = kTRUE, void* external = nullptr );
83 VariableInfo& AddVariable( const VariableInfo& varInfo );
84
85 // NEW: add an array of variables (e.g. for image data)
86 void AddVariablesArray(const TString &expression, Int_t size, const TString &title = "", const TString &unit = "",
87 Double_t min = 0, Double_t max = 0, char type = 'F', Bool_t normalized = kTRUE,
88 void *external = nullptr );
89
90 VariableInfo& AddTarget ( const TString& expression, const TString& title, const TString& unit,
91 Double_t min, Double_t max, Bool_t normalized = kTRUE, void* external = nullptr );
92 VariableInfo& AddTarget ( const VariableInfo& varInfo );
93
94 VariableInfo& AddSpectator ( const TString& expression, const TString& title, const TString& unit,
95 Double_t min, Double_t max, char type = 'F', Bool_t normalized = kTRUE, void* external = nullptr );
96 VariableInfo& AddSpectator ( const VariableInfo& varInfo );
97
98 ClassInfo* AddClass ( const TString& className );
99
100 // accessors
101
102 // general
103 std::vector<VariableInfo>& GetVariableInfos() { return fVariables; }
104 const std::vector<VariableInfo>& GetVariableInfos() const { return fVariables; }
106 const VariableInfo& GetVariableInfo( Int_t i ) const { return fVariables.at(i); }
107
108 Int_t GetVarArraySize(const TString &expression) const {
109 auto element = fVarArrays.find(expression);
110 return (element != fVarArrays.end()) ? element->second : -1;
111 }
113
114 std::vector<VariableInfo> &GetTargetInfos()
115 {
116 return fTargets;
117 }
118 const std::vector<VariableInfo> &GetTargetInfos() const { return fTargets; }
120 const VariableInfo &GetTargetInfo(Int_t i) const { return fTargets.at(i); }
121
122 std::vector<VariableInfo> &GetSpectatorInfos() { return fSpectators; }
123 const std::vector<VariableInfo> &GetSpectatorInfos() const { return fSpectators; }
125 const VariableInfo &GetSpectatorInfo(Int_t i) const { return fSpectators.at(i); }
126
127 UInt_t GetNVariables() const { return fVariables.size(); }
128 UInt_t GetNTargets() const { return fTargets.size(); }
129 UInt_t GetNSpectators(bool all = kTRUE) const;
130
131 const TString &GetNormalization() const { return fNormalization; }
132 void SetNormalization(const TString &norm) { fNormalization = norm; }
133
134 void SetTrainingSumSignalWeights(Double_t trainingSumSignalWeights)
135 {
136 fTrainingSumSignalWeights = trainingSumSignalWeights;}
137 void SetTrainingSumBackgrWeights(Double_t trainingSumBackgrWeights){fTrainingSumBackgrWeights = trainingSumBackgrWeights;}
138 void SetTestingSumSignalWeights (Double_t testingSumSignalWeights ){fTestingSumSignalWeights = testingSumSignalWeights ;}
139 void SetTestingSumBackgrWeights (Double_t testingSumBackgrWeights ){fTestingSumBackgrWeights = testingSumBackgrWeights ;}
140
145
146
147
148 // classification information
152 ClassInfo* GetClassInfo( Int_t clNum ) const;
153 ClassInfo* GetClassInfo( const TString& name ) const;
154 void PrintClasses() const;
155 UInt_t GetNClasses() const { return fClasses.size(); }
156 Bool_t IsSignal( const Event* ev ) const;
157 std::vector<Float_t>* GetTargetsForMulticlass( const Event* ev );
159
160 // by variable
161 Int_t FindVarIndex( const TString& ) const;
162
163 // weights
164 const TString GetWeightExpression(Int_t i) const { return GetClassInfo(i)->GetWeight(); }
165 void SetWeightExpression( const TString& exp, const TString& className = "" );
166
167 // cuts
168 const TCut& GetCut (Int_t i) const { return GetClassInfo(i)->GetCut(); }
169 const TCut& GetCut ( const TString& className ) const { return GetClassInfo(className)->GetCut(); }
170 void SetCut ( const TCut& cut, const TString& className );
171 void AddCut ( const TCut& cut, const TString& className );
172 Bool_t HasCuts() const;
173
174 std::vector<TString> GetListOfVariables() const;
175
176 // correlation matrix
177 const TMatrixD* CorrelationMatrix ( const TString& className ) const;
178 void SetCorrelationMatrix ( const TString& className, TMatrixD* matrix );
179 void PrintCorrelationMatrix( const TString& className );
181 const TString& hName,
182 const TString& hTitle ) const;
183
184 // options
186 const TString& GetSplitOptions() const { return fSplitOptions; }
187
188 // root dir
190 TDirectory* GetRootDir() const { return fOwnRootDir; }
191
192 void SetMsgType( EMsgType t ) const;
193
195 private:
196
198 void SetDataSetManager( DataSetManager* dsm ) { fDataSetManager = dsm; } // DSMTEST
199 friend class DataSetManager; // DSMTEST (datasetmanager test)
200
201 DataSetInfo(const DataSetInfo &) = delete;
202 DataSetInfo & operator= (const DataSetInfo &) = delete;
203
205
206 TString fName; ///< name of the dataset info object
207
208 mutable DataSet* fDataSet; ///< dataset, owned by this datasetinfo object
209 mutable Bool_t fNeedsRebuilding; ///< flag if rebuilding of dataset is needed (after change of cuts, vars, etc.)
210
211 // expressions/formulas
212 std::vector<VariableInfo> fVariables; ///< list of variable expressions/internal names
213 std::vector<VariableInfo> fTargets; ///< list of targets expressions/internal names
214 std::vector<VariableInfo> fSpectators; ///< list of spectators expressions/internal names
215
216 // variable arrays
217 std::map<TString, int> fVarArrays;
218
219 // the classes
220 mutable std::vector<ClassInfo*> fClasses; ///< name and other infos of the classes
221
224
229
230
231
232 TDirectory* fOwnRootDir; ///< ROOT output dir
233 Bool_t fVerbose; ///< Verbosity
234
235 UInt_t fSignalClass; ///< index of the class with the name signal
236
237 std::vector<Float_t>* fTargetsForMulticlass;///<-> all targets 0 except the one with index==classNumber
238
239 mutable MsgLogger* fLogger; ///<! message logger
240 MsgLogger& Log() const { return *fLogger; }
241
242 public:
243
245 };
246}
247
248#endif
#define d(i)
Definition RSha256.hxx:102
size_t size(const MatrixT &matrix)
retrieve the size of a square matrix
bool Bool_t
Definition RtypesCore.h:63
unsigned int UInt_t
Definition RtypesCore.h:46
double Double_t
Definition RtypesCore.h:59
constexpr Bool_t kTRUE
Definition RtypesCore.h:100
#define ClassDef(name, id)
Definition Rtypes.h:337
#define BIT(n)
Definition Rtypes.h:85
Option_t Option_t TPoint TPoint const char GetTextMagnitude GetFillStyle GetLineColor GetLineWidth GetMarkerStyle GetTextAlign GetTextColor GetTextSize void char Point_t Rectangle_t WindowAttributes_t Float_t Float_t Float_t Int_t Int_t UInt_t UInt_t Rectangle_t Int_t Int_t Window_t TString Int_t GCValues_t GetPrimarySelectionOwner GetDisplay GetScreen GetColormap GetNativeEvent const char const char dpyName wid window const char font_name cursor keysym reg const char only_if_exist regb h Point_t winding char text const char depth char const char Int_t count const char ColorStruct_t color const char Pixmap_t Pixmap_t PictureAttributes_t attr const char char ret_data h unsigned char height h Atom_t Int_t ULong_t ULong_t unsigned char prop_list Atom_t Atom_t Atom_t Time_t type
char name[80]
Definition TGX11.cxx:110
A specialized string object used for TTree selections.
Definition TCut.h:25
Describe directory structure in memory.
Definition TDirectory.h:45
Service class for 2-D histogram classes.
Definition TH2.h:30
Class that contains all the information of a class.
Definition ClassInfo.h:49
const TCut & GetCut() const
Definition ClassInfo.h:64
const TString & GetWeight() const
Definition ClassInfo.h:63
Class that contains all the data information.
Definition DataSetInfo.h:62
const TString GetWeightExpression(Int_t i) const
std::vector< VariableInfo > & GetVariableInfos()
const std::vector< VariableInfo > & GetSpectatorInfos() const
Bool_t HasCuts() const
UInt_t GetNVariables() const
const std::vector< VariableInfo > & GetVariableInfos() const
UInt_t GetNSpectators(bool all=kTRUE) const
const VariableInfo & GetVariableInfo(Int_t i) const
Int_t GetVarArraySize(const TString &expression) const
VariableInfo & AddVariable(const TString &expression, const TString &title="", const TString &unit="", Double_t min=0, Double_t max=0, char varType='F', Bool_t normalized=kTRUE, void *external=nullptr)
add a variable (can be a complex expression) to the set of variables used in the MV analysis
void SetSplitOptions(const TString &so)
DataSetInfo & operator=(const DataSetInfo &)=delete
TDirectory * fOwnRootDir
ROOT output dir.
std::map< TString, int > fVarArrays
ClassInfo * AddClass(const TString &className)
virtual const char * GetName() const
Returns name of object.
Definition DataSetInfo.h:71
const TString & GetNormalization() const
const TMatrixD * CorrelationMatrix(const TString &className) const
Bool_t IsVariableFromArray(Int_t i) const
std::vector< VariableInfo > & GetSpectatorInfos()
const VariableInfo & GetTargetInfo(Int_t i) const
TDirectory * GetRootDir() const
std::vector< ClassInfo * > fClasses
name and other infos of the classes
Double_t fTrainingSumBackgrWeights
void SetNormalization(const TString &norm)
Int_t GetTargetNameMaxLength() const
virtual ~DataSetInfo()
destructor
MsgLogger & Log() const
Double_t fTestingSumSignalWeights
Double_t GetTestingSumBackgrWeights()
void SetMsgType(EMsgType t) const
UInt_t GetNClasses() const
const TString & GetSplitOptions() const
UInt_t GetNTargets() const
Bool_t fNeedsRebuilding
flag if rebuilding of dataset is needed (after change of cuts, vars, etc.)
const TCut & GetCut(const TString &className) const
void SetTestingSumSignalWeights(Double_t testingSumSignalWeights)
std::vector< VariableInfo > fSpectators
list of spectators expressions/internal names
void PrintCorrelationMatrix(TTree *theTree)
Bool_t fVerbose
Verbosity.
DataSet * GetDataSet() const
returns data set
MsgLogger * fLogger
! message logger
TH2 * CreateCorrelationMatrixHist(const TMatrixD *m, const TString &hName, const TString &hTitle) const
UInt_t GetSignalClassIndex()
const std::vector< VariableInfo > & GetTargetInfos() const
void SetTrainingSumSignalWeights(Double_t trainingSumSignalWeights)
std::vector< TString > GetListOfVariables() const
returns list of variables
DataSet * fDataSet
dataset, owned by this datasetinfo object
ClassInfo * GetClassInfo(Int_t clNum) const
void SetTestingSumBackgrWeights(Double_t testingSumBackgrWeights)
void SetDataSetManager(DataSetManager *dsm)
Double_t GetTrainingSumSignalWeights()
Double_t fTrainingSumSignalWeights
void PrintClasses() const
Int_t GetClassNameMaxLength() const
Double_t GetTrainingSumBackgrWeights()
VariableInfo & AddTarget(const TString &expression, const TString &title, const TString &unit, Double_t min, Double_t max, Bool_t normalized=kTRUE, void *external=nullptr)
add a variable (can be a complex expression) to the set of variables used in the MV analysis
void PrintCorrelationMatrix(const TString &className)
calculates the correlation matrices for signal and background, prints them to standard output,...
std::vector< VariableInfo > fTargets
list of targets expressions/internal names
const TCut & GetCut(Int_t i) const
const VariableInfo & GetSpectatorInfo(Int_t i) const
std::vector< Float_t > * fTargetsForMulticlass
-> all targets 0 except the one with index==classNumber
UInt_t fSignalClass
index of the class with the name signal
void SetCut(const TCut &cut, const TString &className)
set the cut for the classes
Double_t GetTestingSumSignalWeights()
Int_t FindVarIndex(const TString &) const
find variable by name
VariableInfo & GetVariableInfo(Int_t i)
std::vector< VariableInfo > & GetTargetInfos()
void SetTrainingSumBackgrWeights(Double_t trainingSumBackgrWeights)
std::vector< VariableInfo > fVariables
list of variable expressions/internal names
void SetRootDir(TDirectory *d)
Int_t GetVariableNameMaxLength() const
Double_t fTestingSumBackgrWeights
Bool_t IsSignal(const Event *ev) const
VariableInfo & AddSpectator(const TString &expression, const TString &title, const TString &unit, Double_t min, Double_t max, char type='F', Bool_t normalized=kTRUE, void *external=nullptr)
add a spectator (can be a complex expression) to the set of spectator variables used in the MV analys...
void SetWeightExpression(const TString &exp, const TString &className="")
set the weight expressions for the classes if class name is specified, set only for this class if cla...
DataSetManager * GetDataSetManager()
VariableInfo & GetTargetInfo(Int_t i)
DataSetInfo(const DataSetInfo &)=delete
TMVA::DataSetManager * fDataSetManager
VariableInfo & GetSpectatorInfo(Int_t i)
void AddCut(const TCut &cut, const TString &className)
set the cut for the classes
void AddVariablesArray(const TString &expression, Int_t size, const TString &title="", const TString &unit="", Double_t min=0, Double_t max=0, char type='F', Bool_t normalized=kTRUE, void *external=nullptr)
add an array of variables identified by an expression corresponding to an array entry in the tree
std::vector< Float_t > * GetTargetsForMulticlass(const Event *ev)
TString fName
name of the dataset info object
void SetCorrelationMatrix(const TString &className, TMatrixD *matrix)
void ClearDataSet() const
Class that contains all the data information.
Class that contains all the data information.
Definition DataSet.h:58
ostringstream derivative to redirect and format output
Definition MsgLogger.h:57
Class for type info of MVA input variable.
EMsgType
Definition Types.h:55
Mother of all ROOT objects.
Definition TObject.h:41
R__ALWAYS_INLINE Bool_t TestBit(UInt_t f) const
Definition TObject.h:201
Basic string class.
Definition TString.h:139
const char * Data() const
Definition TString.h:380
A TTree represents a columnar dataset.
Definition TTree.h:79
create variable transformations
TMarker m
Definition textangle.C:8