Logo ROOT  
Reference Guide
MethodFDA.h
Go to the documentation of this file.
1// @(#)root/tmva $Id$
2// Author: Andreas Hoecker, Peter Speckmayer
3
4/**********************************************************************************
5 * Project: TMVA - a Root-integrated toolkit for multivariate data analysis *
6 * Package: TMVA *
7 * Class : MethodFDA *
8 * Web : http://tmva.sourceforge.net *
9 * *
10 * Description: *
11 * Function discriminant analysis (FDA). This simple classifier *
12 * fits any user-defined TFormula (via option configuration string) to *
13 * the training data by requiring a formula response of 1 (0) to signal *
14 * (background) events. The parameter fitting is done via the abstract *
15 * class FitterBase, featuring Monte Carlo sampling, Genetic *
16 * Algorithm, Simulated Annealing, MINUIT and combinations of these. *
17 * *
18 * Authors (alphabetical): *
19 * Andreas Hoecker <Andreas.Hocker@cern.ch> - CERN, Switzerland *
20 * Peter Speckmayer <speckmay@mail.cern.ch> - CERN, Switzerland *
21 * *
22 * Copyright (c) 2005-2010: *
23 * CERN, Switzerland *
24 * MPI-K Heidelberg, Germany *
25 * *
26 * Redistribution and use in source and binary forms, with or without *
27 * modification, are permitted according to the terms listed in LICENSE *
28 * (http://tmva.sourceforge.net/LICENSE) *
29 **********************************************************************************/
30
31#ifndef ROOT_TMVA_MethodFDA
32#define ROOT_TMVA_MethodFDA
33
34//////////////////////////////////////////////////////////////////////////
35// //
36// MethodFDA //
37// //
38// Function discriminant analysis (FDA). This simple classifier //
39// fits any user-defined TFormula (via option configuration string) to //
40// the training data by requiring a formula response of 1 (0) to signal //
41// (background) events. The parameter fitting is done via the abstract //
42// class FitterBase, featuring Monte Carlo sampling, Genetic //
43// Algorithm, Simulated Annealing, MINUIT and combinations of these. //
44// //
45// Can compute one-dimensional regression //
46// //
47//////////////////////////////////////////////////////////////////////////
48
49#include "TMVA/MethodBase.h"
50#include "TMVA/IFitterTarget.h"
51
52class TFormula;
53
54namespace TMVA {
55
56 class Interval;
57 class Event;
58 class FitterBase;
59
60 class MethodFDA : public MethodBase, public IFitterTarget {
61
62 public:
63
64 MethodFDA( const TString& jobName,
65 const TString& methodTitle,
66 DataSetInfo& theData,
67 const TString& theOption = "");
68
69 MethodFDA( DataSetInfo& theData,
70 const TString& theWeightFile);
71
72 virtual ~MethodFDA( void );
73
74 Bool_t HasAnalysisType( Types::EAnalysisType type, UInt_t numberClasses, UInt_t numberTargets );
75
76 // training method
77 void Train( void );
78
80
81 void AddWeightsXMLTo ( void* parent ) const;
82
83 void ReadWeightsFromStream( std::istream & i );
84 void ReadWeightsFromXML ( void* wghtnode );
85
86 // calculate the MVA value
87 Double_t GetMvaValue( Double_t* err = 0, Double_t* errUpper = 0 );
88
89 virtual const std::vector<Float_t>& GetRegressionValues();
90 virtual const std::vector<Float_t>& GetMulticlassValues();
91
92 void Init( void );
93
94 // ranking of input variables
95 const Ranking* CreateRanking() { return 0; }
96
97 Double_t EstimatorFunction( std::vector<Double_t>& );
98
99 // no check of options at this place
100 void CheckSetup() {}
101
102 protected:
103
104 // make ROOT-independent C++ class for classifier response (classifier-specific implementation)
105 void MakeClassSpecific( std::ostream&, const TString& ) const;
106
107 // get help message text
108 void GetHelpMessage() const;
109
110 private:
111
112 // compute multiclass values
113 void CalculateMulticlassValues( const TMVA::Event*& evt, std::vector<Double_t>& parameters, std::vector<Float_t>& values);
114
115
116 // create and interpret formula expression and compute estimator
117 void CreateFormula ();
118 Double_t InterpretFormula( const Event*, std::vector<Double_t>::iterator begin, std::vector<Double_t>::iterator end );
119
120 // clean up
121 void ClearAll();
122
123 // print fit results
124 void PrintResults( const TString&, std::vector<Double_t>&, const Double_t ) const;
125
126 // the option handling methods
127 void DeclareOptions();
128 void ProcessOptions();
129
130 TString fFormulaStringP; // string with function
131 TString fParRangeStringP; // string with ranges of parameters
132 TString fFormulaStringT; // string with function
133 TString fParRangeStringT; // string with ranges of parameters
134
135 TFormula* fFormula; // the discrimination function
136 UInt_t fNPars; // number of parameters
137 std::vector<Interval*> fParRange; // ranges of parameters
138 std::vector<Double_t> fBestPars; // the pars that optimise (minimise) the estimator
139 TString fFitMethod; // estimator optimisation method
140 TString fConverger; // fitmethod uses fConverger as intermediate step to converge into local minimas
141 FitterBase* fFitter; // the fitter used in the training
142 IFitterTarget* fConvergerFitter; // intermediate fitter
143
144
145 // sum of weights (this should become centrally available through the dataset)
146 Double_t fSumOfWeightsSig; // sum of weights (signal)
147 Double_t fSumOfWeightsBkg; // sum of weights (background)
148 Double_t fSumOfWeights; // sum of weights
149
150 //
151 Int_t fOutputDimensions; // number of output values
152
153 ClassDef(MethodFDA,0); // Function Discriminant Analysis
154 };
155
156} // namespace TMVA
157
158#endif // MethodFDA_H
double Double_t
Definition: RtypesCore.h:57
#define ClassDef(name, id)
Definition: Rtypes.h:322
int type
Definition: TGX11.cxx:120
The Formula class.
Definition: TFormula.h:84
Class that contains all the data information.
Definition: DataSetInfo.h:60
Base class for TMVA fitters.
Definition: FitterBase.h:51
Interface for a fitter 'target'.
Definition: IFitterTarget.h:44
Virtual base Class for all MVA method.
Definition: MethodBase.h:111
virtual void ReadWeightsFromStream(std::istream &)=0
Function discriminant analysis (FDA).
Definition: MethodFDA.h:60
void Train(void)
FDA training.
Definition: MethodFDA.cxx:363
TString fFormulaStringT
Definition: MethodFDA.h:132
void AddWeightsXMLTo(void *parent) const
create XML description for LD classification and regression (for arbitrary number of output classes/t...
Definition: MethodFDA.cxx:619
Double_t EstimatorFunction(std::vector< Double_t > &)
compute estimator for given parameter set (to be minimised)
Definition: MethodFDA.cxx:435
void CheckSetup()
check may be overridden by derived class (sometimes, eg, fitters are used which can only be implement...
Definition: MethodFDA.h:100
virtual ~MethodFDA(void)
destructor
Definition: MethodFDA.cxx:326
Double_t InterpretFormula(const Event *, std::vector< Double_t >::iterator begin, std::vector< Double_t >::iterator end)
formula interpretation
Definition: MethodFDA.cxx:499
Bool_t HasAnalysisType(Types::EAnalysisType type, UInt_t numberClasses, UInt_t numberTargets)
FDA can handle classification with 2 classes and regression with one regression-target.
Definition: MethodFDA.cxx:334
void ReadWeightsFromXML(void *wghtnode)
read coefficients from xml weight file
Definition: MethodFDA.cxx:637
void CalculateMulticlassValues(const TMVA::Event *&evt, std::vector< Double_t > &parameters, std::vector< Float_t > &values)
calculate the values for multiclass
Definition: MethodFDA.cxx:581
void ReadWeightsFromStream(std::istream &i)
read back the training results from a file (stream)
Definition: MethodFDA.cxx:604
virtual const std::vector< Float_t > & GetMulticlassValues()
Definition: MethodFDA.cxx:553
Double_t fSumOfWeightsBkg
Definition: MethodFDA.h:147
Int_t fOutputDimensions
Definition: MethodFDA.h:151
MethodFDA(const TString &jobName, const TString &methodTitle, DataSetInfo &theData, const TString &theOption="")
standard constructor
Definition: MethodFDA.cxx:86
void Init(void)
default initialisation
Definition: MethodFDA.cxx:124
void ClearAll()
delete and clear all class members
Definition: MethodFDA.cxx:346
std::vector< Interval * > fParRange
Definition: MethodFDA.h:137
void PrintResults(const TString &, std::vector< Double_t > &, const Double_t) const
display fit parameters check maximum length of variable name
Definition: MethodFDA.cxx:421
void MakeClassSpecific(std::ostream &, const TString &) const
write FDA-specific classifier response
Definition: MethodFDA.cxx:676
Double_t fSumOfWeightsSig
Definition: MethodFDA.h:146
TString fParRangeStringP
Definition: MethodFDA.h:131
Double_t GetMvaValue(Double_t *err=0, Double_t *errUpper=0)
returns MVA value for given event
Definition: MethodFDA.cxx:518
TFormula * fFormula
Definition: MethodFDA.h:135
const Ranking * CreateRanking()
Definition: MethodFDA.h:95
virtual const std::vector< Float_t > & GetRegressionValues()
Definition: MethodFDA.cxx:530
void ProcessOptions()
the option string is decoded, for available options see "DeclareOptions"
Definition: MethodFDA.cxx:240
std::vector< Double_t > fBestPars
Definition: MethodFDA.h:138
IFitterTarget * fConvergerFitter
Definition: MethodFDA.h:142
FitterBase * fFitter
Definition: MethodFDA.h:141
Double_t fSumOfWeights
Definition: MethodFDA.h:148
TString fParRangeStringT
Definition: MethodFDA.h:133
TString fFitMethod
Definition: MethodFDA.h:139
void CreateFormula()
translate formula string into TFormula, and parameter string into par ranges
Definition: MethodFDA.cxx:183
void DeclareOptions()
define the options (their key words) that can be set in the option string
Definition: MethodFDA.cxx:163
TString fConverger
Definition: MethodFDA.h:140
void GetHelpMessage() const
get help message text
Definition: MethodFDA.cxx:721
TString fFormulaStringP
Definition: MethodFDA.h:130
Ranking for variables in method (implementation)
Definition: Ranking.h:48
EAnalysisType
Definition: Types.h:127
Basic string class.
Definition: TString.h:131
create variable transformations