ROOT  6.07/01
Reference Guide
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Properties Friends Macros Groups Pages
MethodFDA.h
Go to the documentation of this file.
1 // @(#)root/tmva $Id$
2 // Author: Andreas Hoecker, Peter Speckmayer
3 
4 /**********************************************************************************
5  * Project: TMVA - a Root-integrated toolkit for multivariate data analysis *
6  * Package: TMVA *
7  * Class : MethodFDA *
8  * Web : http://tmva.sourceforge.net *
9  * *
10  * Description: *
11  * Function discriminant analysis (FDA). This simple classifier *
12  * fits any user-defined TFormula (via option configuration string) to *
13  * the training data by requiring a formula response of 1 (0) to signal *
14  * (background) events. The parameter fitting is done via the abstract *
15  * class FitterBase, featuring Monte Carlo sampling, Genetic *
16  * Algorithm, Simulated Annealing, MINUIT and combinations of these. *
17  * *
18  * Authors (alphabetical): *
19  * Andreas Hoecker <Andreas.Hocker@cern.ch> - CERN, Switzerland *
20  * Peter Speckmayer <speckmay@mail.cern.ch> - CERN, Switzerland *
21  * *
22  * Copyright (c) 2005-2010: *
23  * CERN, Switzerland *
24  * MPI-K Heidelberg, Germany *
25  * *
26  * Redistribution and use in source and binary forms, with or without *
27  * modification, are permitted according to the terms listed in LICENSE *
28  * (http://tmva.sourceforge.net/LICENSE) *
29  **********************************************************************************/
30 
31 #ifndef ROOT_TMVA_MethodFDA
32 #define ROOT_TMVA_MethodFDA
33 
34 //////////////////////////////////////////////////////////////////////////
35 // //
36 // MethodFDA //
37 // //
38 // Function discriminant analysis (FDA). This simple classifier //
39 // fits any user-defined TFormula (via option configuration string) to //
40 // the training data by requiring a formula response of 1 (0) to signal //
41 // (background) events. The parameter fitting is done via the abstract //
42 // class FitterBase, featuring Monte Carlo sampling, Genetic //
43 // Algorithm, Simulated Annealing, MINUIT and combinations of these. //
44 // //
45 // Can compute one-dimensional regression //
46 // //
47 //////////////////////////////////////////////////////////////////////////
48 
49 #ifndef ROOT_TMVA_MethodBase
50 #include "TMVA/MethodBase.h"
51 #endif
52 #ifndef ROOT_TMVA_IFitterTarget
53 #include "TMVA/IFitterTarget.h"
54 #endif
55 
56 class TFormula;
57 
58 namespace TMVA {
59 
60  class Interval;
61  class Event;
62  class FitterBase;
63 
64  class MethodFDA : public MethodBase, public IFitterTarget {
65 
66  public:
67 
68  MethodFDA( const TString& jobName,
69  const TString& methodTitle,
70  DataSetInfo& theData,
71  const TString& theOption = "",
72  TDirectory* theTargetDir = 0 );
73 
74  MethodFDA( DataSetInfo& theData,
75  const TString& theWeightFile,
76  TDirectory* theTargetDir = NULL );
77 
78  virtual ~MethodFDA( void );
79 
80  Bool_t HasAnalysisType( Types::EAnalysisType type, UInt_t numberClasses, UInt_t numberTargets );
81 
82  // training method
83  void Train( void );
84 
86 
87  void AddWeightsXMLTo ( void* parent ) const;
88 
89  void ReadWeightsFromStream( std::istream & i );
90  void ReadWeightsFromXML ( void* wghtnode );
91 
92  // calculate the MVA value
93  Double_t GetMvaValue( Double_t* err = 0, Double_t* errUpper = 0 );
94 
95  virtual const std::vector<Float_t>& GetRegressionValues();
96  virtual const std::vector<Float_t>& GetMulticlassValues();
97 
98  void Init( void );
99 
100  // ranking of input variables
101  const Ranking* CreateRanking() { return 0; }
102 
103  Double_t EstimatorFunction( std::vector<Double_t>& );
104 
105  // no check of options at this place
106  void CheckSetup() {}
107 
108  protected:
109 
110  // make ROOT-independent C++ class for classifier response (classifier-specific implementation)
111  void MakeClassSpecific( std::ostream&, const TString& ) const;
112 
113  // get help message text
114  void GetHelpMessage() const;
115 
116  private:
117 
118  // compute multiclass values
119  void CalculateMulticlassValues( const TMVA::Event*& evt, std::vector<Double_t>& parameters, std::vector<Float_t>& values);
120 
121 
122  // create and interpret formula expression and compute estimator
123  void CreateFormula ();
124  Double_t InterpretFormula( const Event*, std::vector<Double_t>::iterator begin, std::vector<Double_t>::iterator end );
125 
126  // clean up
127  void ClearAll();
128 
129  // print fit results
130  void PrintResults( const TString&, std::vector<Double_t>&, const Double_t ) const;
131 
132  // the option handling methods
133  void DeclareOptions();
134  void ProcessOptions();
135 
136  TString fFormulaStringP; // string with function
137  TString fParRangeStringP; // string with ranges of parameters
138  TString fFormulaStringT; // string with function
139  TString fParRangeStringT; // string with ranges of parameters
140 
141  TFormula* fFormula; // the discrimination function
142  UInt_t fNPars; // number of parameters
143  std::vector<Interval*> fParRange; // ranges of parameters
144  std::vector<Double_t> fBestPars; // the pars that optimise (minimise) the estimator
145  TString fFitMethod; // estimator optimisation method
146  TString fConverger; // fitmethod uses fConverger as intermediate step to converge into local minimas
147  FitterBase* fFitter; // the fitter used in the training
148  IFitterTarget* fConvergerFitter; // intermediate fitter
149 
150 
151  // sum of weights (this should become centrally available through the dataset)
152  Double_t fSumOfWeightsSig; // sum of weights (signal)
153  Double_t fSumOfWeightsBkg; // sum of weights (background)
154  Double_t fSumOfWeights; // sum of weights
155 
156  //
157  Int_t fOutputDimensions; // number of output values
158 
159  ClassDef(MethodFDA,0) // Function Discriminant Analysis
160  };
161 
162 } // namespace TMVA
163 
164 #endif // MethodFDA_H
void Init(void)
default initialisation
Definition: MethodFDA.cxx:121
Double_t fSumOfWeightsBkg
Definition: MethodFDA.h:153
void DeclareOptions()
define the options (their key words) that can be set in the option string
Definition: MethodFDA.cxx:157
void ClearAll()
delete and clear all class members
Definition: MethodFDA.cxx:340
std::vector< double > values
Definition: TwoHistoFit2D.C:32
EAnalysisType
Definition: Types.h:124
Double_t InterpretFormula(const Event *, std::vector< Double_t >::iterator begin, std::vector< Double_t >::iterator end)
formula interpretation
Definition: MethodFDA.cxx:494
Basic string class.
Definition: TString.h:137
void CreateFormula()
translate formula string into TFormula, and parameter string into par ranges
Definition: MethodFDA.cxx:177
int Int_t
Definition: RtypesCore.h:41
bool Bool_t
Definition: RtypesCore.h:59
void CheckSetup()
check may be overridden by derived class (sometimes, eg, fitters are used which can only be implement...
Definition: MethodFDA.h:106
TString fFormulaStringP
Definition: MethodFDA.h:136
void PrintResults(const TString &, std::vector< Double_t > &, const Double_t) const
display fit parameters check maximum length of variable name
Definition: MethodFDA.cxx:414
TString fFitMethod
Definition: MethodFDA.h:145
std::vector< Double_t > fBestPars
Definition: MethodFDA.h:144
#define ClassDef(name, id)
Definition: Rtypes.h:254
Double_t GetMvaValue(Double_t *err=0, Double_t *errUpper=0)
returns MVA value for given event
Definition: MethodFDA.cxx:513
std::vector< Interval * > fParRange
Definition: MethodFDA.h:143
TString fConverger
Definition: MethodFDA.h:146
FitterBase * fFitter
Definition: MethodFDA.h:147
TString fFormulaStringT
Definition: MethodFDA.h:138
Bool_t HasAnalysisType(Types::EAnalysisType type, UInt_t numberClasses, UInt_t numberTargets)
FDA can handle classification with 2 classes and regression with one regression-target.
Definition: MethodFDA.cxx:328
TFormula * fFormula
Definition: MethodFDA.h:141
const Ranking * CreateRanking()
Definition: MethodFDA.h:101
void CalculateMulticlassValues(const TMVA::Event *&evt, std::vector< Double_t > &parameters, std::vector< Float_t > &values)
calculate the values for multiclass
Definition: MethodFDA.cxx:577
The F O R M U L A class.
Definition: TFormula.h:89
unsigned int UInt_t
Definition: RtypesCore.h:42
IFitterTarget * fConvergerFitter
Definition: MethodFDA.h:148
void ReadWeightsFromXML(void *wghtnode)
read coefficients from xml weight file
Definition: MethodFDA.cxx:638
void Train(void)
FDA training.
Definition: MethodFDA.cxx:357
void ProcessOptions()
the option string is decoded, for availabel options see "DeclareOptions"
Definition: MethodFDA.cxx:234
void MakeClassSpecific(std::ostream &, const TString &) const
write FDA-specific classifier response
Definition: MethodFDA.cxx:677
TString fParRangeStringP
Definition: MethodFDA.h:137
double Double_t
Definition: RtypesCore.h:55
Describe directory structure in memory.
Definition: TDirectory.h:44
int type
Definition: TGX11.cxx:120
Double_t EstimatorFunction(std::vector< Double_t > &)
compute estimator for given parameter set (to be minimised) const Double_t sumOfWeights[] = { fSumOfW...
Definition: MethodFDA.cxx:430
void ReadWeightsFromStream(std::istream &i)
read back the training results from a file (stream)
Definition: MethodFDA.cxx:605
MethodFDA(const TString &jobName, const TString &methodTitle, DataSetInfo &theData, const TString &theOption="", TDirectory *theTargetDir=0)
Int_t fOutputDimensions
Definition: MethodFDA.h:157
Double_t fSumOfWeightsSig
Definition: MethodFDA.h:152
TString fParRangeStringT
Definition: MethodFDA.h:139
virtual const std::vector< Float_t > & GetRegressionValues()
Definition: MethodFDA.cxx:525
#define NULL
Definition: Rtypes.h:82
virtual const std::vector< Float_t > & GetMulticlassValues()
Definition: MethodFDA.cxx:549
Double_t fSumOfWeights
Definition: MethodFDA.h:154
void GetHelpMessage() const
get help message text
Definition: MethodFDA.cxx:722
virtual void ReadWeightsFromStream(std::istream &)=0
virtual ~MethodFDA(void)
destructor
Definition: MethodFDA.cxx:320
void AddWeightsXMLTo(void *parent) const
create XML description for LD classification and regression (for arbitrary number of output classes/t...
Definition: MethodFDA.cxx:620