Logo ROOT   6.08/07
Reference Guide
MethodFDA.cxx
Go to the documentation of this file.
1 // @(#)root/tmva $Id$
2 // Author: Andreas Hoecker, Peter Speckmayer, Joerg Stelzer
3 
4 /**********************************************************************************
5  * Project: TMVA - a Root-integrated toolkit for multivariate data analysis *
6  * Package: TMVA *
7  * Class : MethodFDA *
8  * Web : http://tmva.sourceforge.net *
9  * *
10  * Description: *
11  * Implementation *
12  * *
13  * Authors (alphabetical): *
14  * Andreas Hoecker <Andreas.Hocker@cern.ch> - CERN, Switzerland *
15  * Peter Speckmayer <speckmay@mail.cern.ch> - CERN, Switzerland *
16  * Joerg Stelzer <stelzer@cern.ch> - DESY, Germany *
17  * Maciej Kruk <mkruk@cern.ch> - IFJ PAN & AGH, Poland *
18  * *
19  * Copyright (c) 2005-2006: *
20  * CERN, Switzerland *
21  * MPI-K Heidelberg, Germany *
22  * *
23  * Redistribution and use in source and binary forms, with or without *
24  * modification, are permitted according to the terms listed in LICENSE *
25  * (http://tmva.sourceforge.net/LICENSE) *
26  **********************************************************************************/
27 
28 //_______________________________________________________________________
29 //
30 // Function discriminant analysis (FDA). This simple classifier //
31 // fits any user-defined TFormula (via option configuration string) to //
32 // the training data by requiring a formula response of 1 (0) to signal //
33 // (background) events. The parameter fitting is done via the abstract //
34 // class FitterBase, featuring Monte Carlo sampling, Genetic //
35 // Algorithm, Simulated Annealing, MINUIT and combinations of these. //
36 // //
37 // Can compute regression value for one dimensional output //
38 //_______________________________________________________________________
39 
40 #include "TMVA/MethodFDA.h"
41 
42 #include "TMVA/ClassifierFactory.h"
43 #include "TMVA/Config.h"
44 #include "TMVA/Configurable.h"
45 #include "TMVA/DataSetInfo.h"
46 #include "TMVA/FitterBase.h"
47 #include "TMVA/GeneticFitter.h"
48 #include "TMVA/Interval.h"
49 #include "TMVA/IFitterTarget.h"
50 #include "TMVA/IMethod.h"
51 #include "TMVA/MCFitter.h"
52 #include "TMVA/MethodBase.h"
53 #include "TMVA/MinuitFitter.h"
54 #include "TMVA/MsgLogger.h"
55 #include "TMVA/Timer.h"
56 #include "TMVA/Tools.h"
58 #include "TMVA/Types.h"
60 
61 #include "Riostream.h"
62 #include "TList.h"
63 #include "TFormula.h"
64 #include "TString.h"
65 #include "TObjString.h"
66 #include "TRandom3.h"
67 #include "TMath.h"
68 
69 #include <algorithm>
70 #include <iterator>
71 #include <stdexcept>
72 #include <sstream>
73 
74 using std::stringstream;
75 
76 REGISTER_METHOD(FDA)
77 
79 
80 ////////////////////////////////////////////////////////////////////////////////
81 /// standard constructor
82 
83  TMVA::MethodFDA::MethodFDA( const TString& jobName,
84  const TString& methodTitle,
85  DataSetInfo& theData,
86  const TString& theOption)
87  : MethodBase( jobName, Types::kFDA, methodTitle, theData, theOption),
88  IFitterTarget (),
89  fFormula ( 0 ),
90  fNPars ( 0 ),
91  fFitter ( 0 ),
92  fConvergerFitter( 0 ),
93  fSumOfWeightsSig( 0 ),
94  fSumOfWeightsBkg( 0 ),
95  fSumOfWeights ( 0 ),
96  fOutputDimensions( 0 )
97 {
98 }
99 
100 ////////////////////////////////////////////////////////////////////////////////
101 /// constructor from weight file
102 
104  const TString& theWeightFile)
105  : MethodBase( Types::kFDA, theData, theWeightFile),
106  IFitterTarget (),
107  fFormula ( 0 ),
108  fNPars ( 0 ),
109  fFitter ( 0 ),
110  fConvergerFitter( 0 ),
111  fSumOfWeightsSig( 0 ),
112  fSumOfWeightsBkg( 0 ),
113  fSumOfWeights ( 0 ),
114  fOutputDimensions( 0 )
115 {
116 }
117 
118 ////////////////////////////////////////////////////////////////////////////////
119 /// default initialisation
120 
122 {
123  fNPars = 0;
124 
125  fBestPars.clear();
126 
127  fSumOfWeights = 0;
128  fSumOfWeightsSig = 0;
129  fSumOfWeightsBkg = 0;
130 
131  fFormulaStringP = "";
132  fParRangeStringP = "";
133  fFormulaStringT = "";
134  fParRangeStringT = "";
135 
136  fFitMethod = "";
137  fConverger = "";
138 
139  if( DoMulticlass() )
140  if (fMulticlassReturnVal == NULL) fMulticlassReturnVal = new std::vector<Float_t>();
141 
142 }
143 
144 ////////////////////////////////////////////////////////////////////////////////
145 /// define the options (their key words) that can be set in the option string
146 ///
147 /// format of function string:
148 /// "x0*(0)+((1)/x1)**(2)..."
149 /// where "[i]" are the parameters, and "xi" the input variables
150 ///
151 /// format of parameter string:
152 /// "(-1.2,3.4);(-2.3,4.55);..."
153 /// where the numbers in "(a,b)" correspond to the a=min, b=max parameter ranges;
154 /// each parameter defined in the function string must have a corresponding range
155 ///
156 
158 {
159  DeclareOptionRef( fFormulaStringP = "(0)", "Formula", "The discrimination formula" );
160  DeclareOptionRef( fParRangeStringP = "()", "ParRanges", "Parameter ranges" );
161 
162  // fitter
163  DeclareOptionRef( fFitMethod = "MINUIT", "FitMethod", "Optimisation Method");
164  AddPreDefVal(TString("MC"));
165  AddPreDefVal(TString("GA"));
166  AddPreDefVal(TString("SA"));
167  AddPreDefVal(TString("MINUIT"));
168 
169  DeclareOptionRef( fConverger = "None", "Converger", "FitMethod uses Converger to improve result");
170  AddPreDefVal(TString("None"));
171  AddPreDefVal(TString("MINUIT"));
172 }
173 
174 ////////////////////////////////////////////////////////////////////////////////
175 /// translate formula string into TFormula, and parameter string into par ranges
176 
178 {
179  // process transient strings
181 
182  // intepret formula string
183 
184  // replace the parameters "(i)" by the TFormula style "[i]"
185  for (UInt_t ipar=0; ipar<fNPars; ipar++) {
186  fFormulaStringT.ReplaceAll( Form("(%i)",ipar), Form("[%i]",ipar) );
187  }
188 
189  // sanity check, there should be no "(i)", with 'i' a number anymore
190  for (Int_t ipar=fNPars; ipar<1000; ipar++) {
191  if (fFormulaStringT.Contains( Form("(%i)",ipar) ))
192  Log() << kFATAL
193  << "<CreateFormula> Formula contains expression: \"" << Form("(%i)",ipar) << "\", "
194  << "which cannot be attributed to a parameter; "
195  << "it may be that the number of variable ranges given via \"ParRanges\" "
196  << "does not match the number of parameters in the formula expression, please verify!"
197  << Endl;
198  }
199 
200  // write the variables "xi" as additional parameters "[npar+i]"
201  for (Int_t ivar=GetNvar()-1; ivar >= 0; ivar--) {
202  fFormulaStringT.ReplaceAll( Form("x%i",ivar), Form("[%i]",ivar+fNPars) );
203  }
204 
205  // sanity check, there should be no "xi", with 'i' a number anymore
206  for (UInt_t ivar=GetNvar(); ivar<1000; ivar++) {
207  if (fFormulaStringT.Contains( Form("x%i",ivar) ))
208  Log() << kFATAL
209  << "<CreateFormula> Formula contains expression: \"" << Form("x%i",ivar) << "\", "
210  << "which cannot be attributed to an input variable" << Endl;
211  }
212 
213  Log() << "User-defined formula string : \"" << fFormulaStringP << "\"" << Endl;
214  Log() << "TFormula-compatible formula string: \"" << fFormulaStringT << "\"" << Endl;
215  Log() << kDEBUG << "Creating and compiling formula" << Endl;
216 
217  // create TF1
218  if (fFormula) delete fFormula;
219  fFormula = new TFormula( "FDA_Formula", fFormulaStringT );
220 
221  // is formula correct ?
222  if (!fFormula->IsValid())
223  Log() << kFATAL << "<ProcessOptions> Formula expression could not be properly compiled" << Endl;
224 
225  // other sanity checks
226  if (fFormula->GetNpar() > (Int_t)(fNPars + GetNvar()))
227  Log() << kFATAL << "<ProcessOptions> Dubious number of parameters in formula expression: "
228  << fFormula->GetNpar() << " - compared to maximum allowed: " << fNPars + GetNvar() << Endl;
229 }
230 
231 ////////////////////////////////////////////////////////////////////////////////
232 /// the option string is decoded, for availabel options see "DeclareOptions"
233 
235 {
236  // process transient strings
238 
239  // interpret parameter string
240  fParRangeStringT.ReplaceAll( " ", "" );
242 
243  TList* parList = gTools().ParseFormatLine( fParRangeStringT, ";" );
244  if ((UInt_t)parList->GetSize() != fNPars) {
245  Log() << kFATAL << "<ProcessOptions> Mismatch in parameter string: "
246  << "the number of parameters: " << fNPars << " != ranges defined: "
247  << parList->GetSize() << "; the format of the \"ParRanges\" string "
248  << "must be: \"(-1.2,3.4);(-2.3,4.55);...\", "
249  << "where the numbers in \"(a,b)\" correspond to the a=min, b=max parameter ranges; "
250  << "each parameter defined in the function string must have a corresponding rang."
251  << Endl;
252  }
253 
254  fParRange.resize( fNPars );
255  for (UInt_t ipar=0; ipar<fNPars; ipar++) fParRange[ipar] = 0;
256 
257  for (UInt_t ipar=0; ipar<fNPars; ipar++) {
258  // parse (a,b)
259  TString str = ((TObjString*)parList->At(ipar))->GetString();
260  Ssiz_t istr = str.First( ',' );
261  TString pminS(str(1,istr-1));
262  TString pmaxS(str(istr+1,str.Length()-2-istr));
263 
264  stringstream stmin; Float_t pmin=0; stmin << pminS.Data(); stmin >> pmin;
265  stringstream stmax; Float_t pmax=0; stmax << pmaxS.Data(); stmax >> pmax;
266 
267  // sanity check
268  if (TMath::Abs(pmax-pmin) < 1.e-30) pmax = pmin;
269  if (pmin > pmax) Log() << kFATAL << "<ProcessOptions> max > min in interval for parameter: ["
270  << ipar << "] : [" << pmin << ", " << pmax << "] " << Endl;
271 
272  Log() << kINFO << "Create parameter interval for parameter " << ipar << " : [" << pmin << "," << pmax << "]" << Endl;
273  fParRange[ipar] = new Interval( pmin, pmax );
274  }
275  delete parList;
276 
277  // create formula
278  CreateFormula();
279 
280 
281  // copy parameter ranges for each output dimension ==================
282  fOutputDimensions = 1;
283  if( DoRegression() )
285  if( DoMulticlass() )
287 
288  for( Int_t dim = 1; dim < fOutputDimensions; ++dim ){
289  for( UInt_t par = 0; par < fNPars; ++par ){
290  fParRange.push_back( fParRange.at(par) );
291  }
292  }
293  // ====================
294 
295  // create minimiser
297  if (fConverger == "MINUIT") {
298  fConvergerFitter = new MinuitFitter( *this, Form("%s_Converger_Minuit", GetName()), fParRange, GetOptions() );
299  SetOptions(dynamic_cast<Configurable*>(fConvergerFitter)->GetOptions());
300  }
301 
302  if(fFitMethod == "MC")
303  fFitter = new MCFitter( *fConvergerFitter, Form("%s_Fitter_MC", GetName()), fParRange, GetOptions() );
304  else if (fFitMethod == "GA")
305  fFitter = new GeneticFitter( *fConvergerFitter, Form("%s_Fitter_GA", GetName()), fParRange, GetOptions() );
306  else if (fFitMethod == "SA")
308  else if (fFitMethod == "MINUIT")
309  fFitter = new MinuitFitter( *fConvergerFitter, Form("%s_Fitter_Minuit", GetName()), fParRange, GetOptions() );
310  else {
311  Log() << kFATAL << "<Train> Do not understand fit method:" << fFitMethod << Endl;
312  }
313 
315 }
316 
317 ////////////////////////////////////////////////////////////////////////////////
318 /// destructor
319 
321 {
322  ClearAll();
323 }
324 
325 ////////////////////////////////////////////////////////////////////////////////
326 /// FDA can handle classification with 2 classes and regression with one regression-target
327 
329 {
330  if (type == Types::kClassification && numberClasses == 2) return kTRUE;
331  if (type == Types::kMulticlass ) return kTRUE;
332  if (type == Types::kRegression ) return kTRUE;
333  return kFALSE;
334 }
335 
336 
337 ////////////////////////////////////////////////////////////////////////////////
338 /// delete and clear all class members
339 
341 {
342  // if there is more than one output dimension, the paramater ranges are the same again (object has been copied).
343  // hence, ... erase the copied pointers to assure, that they are deleted only once.
344  // fParRange.erase( fParRange.begin()+(fNPars), fParRange.end() );
345  for (UInt_t ipar=0; ipar<fParRange.size() && ipar<fNPars; ipar++) {
346  if (fParRange[ipar] != 0) { delete fParRange[ipar]; fParRange[ipar] = 0; }
347  }
348  fParRange.clear();
349 
350  if (fFormula != 0) { delete fFormula; fFormula = 0; }
351  fBestPars.clear();
352 }
353 
354 ////////////////////////////////////////////////////////////////////////////////
355 /// FDA training
356 
358 {
359  // cache training events
360  fSumOfWeights = 0;
361  fSumOfWeightsSig = 0;
362  fSumOfWeightsBkg = 0;
363 
364  for (UInt_t ievt=0; ievt<GetNEvents(); ievt++) {
365 
366  // read the training event
367  const Event* ev = GetEvent(ievt);
368 
369  // true event copy
370  Float_t w = ev->GetWeight();
371 
372  if (!DoRegression()) {
373  if (DataInfo().IsSignal(ev)) { fSumOfWeightsSig += w; }
374  else { fSumOfWeightsBkg += w; }
375  }
376  fSumOfWeights += w;
377  }
378 
379  // sanity check
380  if (!DoRegression()) {
381  if (fSumOfWeightsSig <= 0 || fSumOfWeightsBkg <= 0) {
382  Log() << kFATAL << "<Train> Troubles in sum of weights: "
383  << fSumOfWeightsSig << " (S) : " << fSumOfWeightsBkg << " (B)" << Endl;
384  }
385  }
386  else if (fSumOfWeights <= 0) {
387  Log() << kFATAL << "<Train> Troubles in sum of weights: "
388  << fSumOfWeights << Endl;
389  }
390 
391  // starting values (not used by all fitters)
392  fBestPars.clear();
393  for (std::vector<Interval*>::const_iterator parIt = fParRange.begin(); parIt != fParRange.end(); parIt++) {
394  fBestPars.push_back( (*parIt)->GetMean() );
395  }
396 
397  // execute the fit
398  Double_t estimator = fFitter->Run( fBestPars );
399 
400  // print results
401  PrintResults( fFitMethod, fBestPars, estimator );
402 
403  delete fFitter; fFitter = 0;
404  if (fConvergerFitter!=0 && fConvergerFitter!=(IFitterTarget*)this) {
405  delete fConvergerFitter;
406  fConvergerFitter = 0;
407  }
409 }
410 
411 ////////////////////////////////////////////////////////////////////////////////
412 /// display fit parameters
413 /// check maximum length of variable name
414 
415 void TMVA::MethodFDA::PrintResults( const TString& fitter, std::vector<Double_t>& pars, const Double_t estimator ) const
416 {
417  Log() << kINFO;
418  Log() << kHEADER << "Results for parameter fit using \"" << fitter << "\" fitter:" << Endl;
419  std::vector<TString> parNames;
420  for (UInt_t ipar=0; ipar<pars.size(); ipar++) parNames.push_back( Form("Par(%i)",ipar ) );
421  gTools().FormattedOutput( pars, parNames, "Parameter" , "Fit result", Log(), "%g" );
422  Log() << "Discriminator expression: \"" << fFormulaStringP << "\"" << Endl;
423  Log() << "Value of estimator at minimum: " << estimator << Endl;
424 }
425 
426 
427 ////////////////////////////////////////////////////////////////////////////////
428 /// compute estimator for given parameter set (to be minimised)
429 /// const Double_t sumOfWeights[] = { fSumOfWeightsSig, fSumOfWeightsBkg, fSumOfWeights };
430 
431 Double_t TMVA::MethodFDA::EstimatorFunction( std::vector<Double_t>& pars )
432 {
433  const Double_t sumOfWeights[] = { fSumOfWeightsBkg, fSumOfWeightsSig, fSumOfWeights };
434  Double_t estimator[] = { 0, 0, 0 };
435 
436  Double_t result, deviation;
437  Double_t desired = 0.0;
438 
439  // calculate the deviation from the desired value
440  if( DoRegression() ){
441  for (UInt_t ievt=0; ievt<GetNEvents(); ievt++) {
442  // read the training event
443  const TMVA::Event* ev = GetEvent(ievt);
444 
445  for( Int_t dim = 0; dim < fOutputDimensions; ++dim ){
446  desired = ev->GetTarget( dim );
447  result = InterpretFormula( ev, pars.begin(), pars.end() );
448  deviation = TMath::Power(result - desired, 2);
449  estimator[2] += deviation * ev->GetWeight();
450  }
451  }
452  estimator[2] /= sumOfWeights[2];
453  // return value is sum over normalised signal and background contributions
454  return estimator[2];
455 
456  }else if( DoMulticlass() ){
457  for (UInt_t ievt=0; ievt<GetNEvents(); ievt++) {
458  // read the training event
459  const TMVA::Event* ev = GetEvent(ievt);
460 
462 
463  Double_t crossEntropy = 0.0;
464  for( Int_t dim = 0; dim < fOutputDimensions; ++dim ){
465  Double_t y = fMulticlassReturnVal->at(dim);
466  Double_t t = (ev->GetClass() == static_cast<UInt_t>(dim) ? 1.0 : 0.0 );
467  crossEntropy += t*log(y);
468  }
469  estimator[2] += ev->GetWeight()*crossEntropy;
470  }
471  estimator[2] /= sumOfWeights[2];
472  // return value is sum over normalised signal and background contributions
473  return estimator[2];
474 
475  }else{
476  for (UInt_t ievt=0; ievt<GetNEvents(); ievt++) {
477  // read the training event
478  const TMVA::Event* ev = GetEvent(ievt);
479 
480  desired = (DataInfo().IsSignal(ev) ? 1.0 : 0.0);
481  result = InterpretFormula( ev, pars.begin(), pars.end() );
482  deviation = TMath::Power(result - desired, 2);
483  estimator[Int_t(desired)] += deviation * ev->GetWeight();
484  }
485  estimator[0] /= sumOfWeights[0];
486  estimator[1] /= sumOfWeights[1];
487  // return value is sum over normalised signal and background contributions
488  return estimator[0] + estimator[1];
489  }
490 }
491 
492 ////////////////////////////////////////////////////////////////////////////////
493 /// formula interpretation
494 
495 Double_t TMVA::MethodFDA::InterpretFormula( const Event* event, std::vector<Double_t>::iterator parBegin, std::vector<Double_t>::iterator parEnd )
496 {
497  Int_t ipar = 0;
498  // std::cout << "pars ";
499  for( std::vector<Double_t>::iterator it = parBegin; it != parEnd; ++it ){
500  // std::cout << " i" << ipar << " val" << (*it);
501  fFormula->SetParameter( ipar, (*it) );
502  ++ipar;
503  }
504  for (UInt_t ivar=0; ivar<GetNvar(); ivar++) fFormula->SetParameter( ivar+ipar, event->GetValue(ivar) );
505 
506  Double_t result = fFormula->Eval( 0 );
507  // std::cout << " result " << result << std::endl;
508  return result;
509 }
510 
511 ////////////////////////////////////////////////////////////////////////////////
512 /// returns MVA value for given event
513 
515 {
516  const Event* ev = GetEvent();
517 
518  // cannot determine error
519  NoErrorCalc(err, errUpper);
520 
521  return InterpretFormula( ev, fBestPars.begin(), fBestPars.end() );
522 }
523 
524 ////////////////////////////////////////////////////////////////////////////////
525 
526 const std::vector<Float_t>& TMVA::MethodFDA::GetRegressionValues()
527 {
528  if (fRegressionReturnVal == NULL) fRegressionReturnVal = new std::vector<Float_t>();
529  fRegressionReturnVal->clear();
530 
531  const Event* ev = GetEvent();
532 
533  Event* evT = new Event(*ev);
534 
535  for( Int_t dim = 0; dim < fOutputDimensions; ++dim ){
536  Int_t offset = dim*fNPars;
537  evT->SetTarget(dim,InterpretFormula( ev, fBestPars.begin()+offset, fBestPars.begin()+offset+fNPars ) );
538  }
539  const Event* evT2 = GetTransformationHandler().InverseTransform( evT );
540  fRegressionReturnVal->push_back(evT2->GetTarget(0));
541 
542  delete evT;
543 
544  return (*fRegressionReturnVal);
545 }
546 
547 
548 ////////////////////////////////////////////////////////////////////////////////
549 
550 const std::vector<Float_t>& TMVA::MethodFDA::GetMulticlassValues()
551 {
552  if (fMulticlassReturnVal == NULL) fMulticlassReturnVal = new std::vector<Float_t>();
553  fMulticlassReturnVal->clear();
554  std::vector<Float_t> temp;
555 
556  // returns MVA value for given event
557  const TMVA::Event* evt = GetEvent();
558 
559  CalculateMulticlassValues( evt, fBestPars, temp );
560 
561  UInt_t nClasses = DataInfo().GetNClasses();
562  for(UInt_t iClass=0; iClass<nClasses; iClass++){
563  Double_t norm = 0.0;
564  for(UInt_t j=0;j<nClasses;j++){
565  if(iClass!=j)
566  norm+=exp(temp[j]-temp[iClass]);
567  }
568  (*fMulticlassReturnVal).push_back(1.0/(1.0+norm));
569  }
570 
571  return (*fMulticlassReturnVal);
572 }
573 
574 
575 ////////////////////////////////////////////////////////////////////////////////
576 /// calculate the values for multiclass
577 
578 void TMVA::MethodFDA::CalculateMulticlassValues( const TMVA::Event*& evt, std::vector<Double_t>& parameters, std::vector<Float_t>& values)
579 {
580  values.clear();
581 
582  // std::copy( parameters.begin(), parameters.end(), std::ostream_iterator<double>( std::cout, " " ) );
583  // std::cout << std::endl;
584 
585  // char inp;
586  // std::cin >> inp;
587 
588  Double_t sum=0;
589  for( Int_t dim = 0; dim < fOutputDimensions; ++dim ){ // check for all other dimensions (=classes)
590  Int_t offset = dim*fNPars;
591  Double_t value = InterpretFormula( evt, parameters.begin()+offset, parameters.begin()+offset+fNPars );
592  // std::cout << "dim : " << dim << " value " << value << " offset " << offset << std::endl;
593  values.push_back( value );
594  sum += value;
595  }
596 
597  // // normalize to sum of value (commented out, .. have to think of how to treat negative classifier values)
598  // std::transform( fMulticlassReturnVal.begin(), fMulticlassReturnVal.end(), fMulticlassReturnVal.begin(), bind2nd( std::divides<float>(), sum) );
599 }
600 
601 
602 
603 ////////////////////////////////////////////////////////////////////////////////
604 /// read back the training results from a file (stream)
605 
606 void TMVA::MethodFDA::ReadWeightsFromStream( std::istream& istr )
607 {
608  // retrieve best function parameters
609  // coverity[tainted_data_argument]
610  istr >> fNPars;
611 
612  fBestPars.clear();
613  fBestPars.resize( fNPars );
614  for (UInt_t ipar=0; ipar<fNPars; ipar++) istr >> fBestPars[ipar];
615 }
616 
617 ////////////////////////////////////////////////////////////////////////////////
618 /// create XML description for LD classification and regression
619 /// (for arbitrary number of output classes/targets)
620 
621 void TMVA::MethodFDA::AddWeightsXMLTo( void* parent ) const
622 {
623  void* wght = gTools().AddChild(parent, "Weights");
624  gTools().AddAttr( wght, "NPars", fNPars );
625  gTools().AddAttr( wght, "NDim", fOutputDimensions );
626  for (UInt_t ipar=0; ipar<fNPars*fOutputDimensions; ipar++) {
627  void* coeffxml = gTools().AddChild( wght, "Parameter" );
628  gTools().AddAttr( coeffxml, "Index", ipar );
629  gTools().AddAttr( coeffxml, "Value", fBestPars[ipar] );
630  }
631 
632  // write formula
633  gTools().AddAttr( wght, "Formula", fFormulaStringP );
634 }
635 
636 ////////////////////////////////////////////////////////////////////////////////
637 /// read coefficients from xml weight file
638 
640 {
641  gTools().ReadAttr( wghtnode, "NPars", fNPars );
642 
643  if(gTools().HasAttr( wghtnode, "NDim")) {
644  gTools().ReadAttr( wghtnode, "NDim" , fOutputDimensions );
645  } else {
646  // older weight files don't have this attribute
647  fOutputDimensions = 1;
648  }
649 
650  fBestPars.clear();
652 
653  void* ch = gTools().GetChild(wghtnode);
654  Double_t par;
655  UInt_t ipar;
656  while (ch) {
657  gTools().ReadAttr( ch, "Index", ipar );
658  gTools().ReadAttr( ch, "Value", par );
659 
660  // sanity check
661  if (ipar >= fNPars*fOutputDimensions) Log() << kFATAL << "<ReadWeightsFromXML> index out of range: "
662  << ipar << " >= " << fNPars << Endl;
663  fBestPars[ipar] = par;
664 
665  ch = gTools().GetNextChild(ch);
666  }
667 
668  // read formula
669  gTools().ReadAttr( wghtnode, "Formula", fFormulaStringP );
670 
671  // create the TFormula
672  CreateFormula();
673 }
674 
675 ////////////////////////////////////////////////////////////////////////////////
676 /// write FDA-specific classifier response
677 
678 void TMVA::MethodFDA::MakeClassSpecific( std::ostream& fout, const TString& className ) const
679 {
680  fout << " double fParameter[" << fNPars << "];" << std::endl;
681  fout << "};" << std::endl;
682  fout << "" << std::endl;
683  fout << "inline void " << className << "::Initialize() " << std::endl;
684  fout << "{" << std::endl;
685  for(UInt_t ipar=0; ipar<fNPars; ipar++) {
686  fout << " fParameter[" << ipar << "] = " << fBestPars[ipar] << ";" << std::endl;
687  }
688  fout << "}" << std::endl;
689  fout << std::endl;
690  fout << "inline double " << className << "::GetMvaValue__( const std::vector<double>& inputValues ) const" << std::endl;
691  fout << "{" << std::endl;
692  fout << " // interpret the formula" << std::endl;
693 
694  // replace parameters
695  TString str = fFormulaStringT;
696  for (UInt_t ipar=0; ipar<fNPars; ipar++) {
697  str.ReplaceAll( Form("[%i]", ipar), Form("fParameter[%i]", ipar) );
698  }
699 
700  // replace input variables
701  for (UInt_t ivar=0; ivar<GetNvar(); ivar++) {
702  str.ReplaceAll( Form("[%i]", ivar+fNPars), Form("inputValues[%i]", ivar) );
703  }
704 
705  fout << " double retval = " << str << ";" << std::endl;
706  fout << std::endl;
707  fout << " return retval; " << std::endl;
708  fout << "}" << std::endl;
709  fout << std::endl;
710  fout << "// Clean up" << std::endl;
711  fout << "inline void " << className << "::Clear() " << std::endl;
712  fout << "{" << std::endl;
713  fout << " // nothing to clear" << std::endl;
714  fout << "}" << std::endl;
715 }
716 
717 ////////////////////////////////////////////////////////////////////////////////
718 /// get help message text
719 ///
720 /// typical length of text line:
721 /// "|--------------------------------------------------------------|"
722 
724 {
725  Log() << Endl;
726  Log() << gTools().Color("bold") << "--- Short description:" << gTools().Color("reset") << Endl;
727  Log() << Endl;
728  Log() << "The function discriminant analysis (FDA) is a classifier suitable " << Endl;
729  Log() << "to solve linear or simple nonlinear discrimination problems." << Endl;
730  Log() << Endl;
731  Log() << "The user provides the desired function with adjustable parameters" << Endl;
732  Log() << "via the configuration option string, and FDA fits the parameters to" << Endl;
733  Log() << "it, requiring the signal (background) function value to be as close" << Endl;
734  Log() << "as possible to 1 (0). Its advantage over the more involved and" << Endl;
735  Log() << "automatic nonlinear discriminators is the simplicity and transparency " << Endl;
736  Log() << "of the discrimination expression. A shortcoming is that FDA will" << Endl;
737  Log() << "underperform for involved problems with complicated, phase space" << Endl;
738  Log() << "dependent nonlinear correlations." << Endl;
739  Log() << Endl;
740  Log() << "Please consult the Users Guide for the format of the formula string" << Endl;
741  Log() << "and the allowed parameter ranges:" << Endl;
742  if (gConfig().WriteOptionsReference()) {
743  Log() << "<a href=\"http://tmva.sourceforge.net/docu/TMVAUsersGuide.pdf\">"
744  << "http://tmva.sourceforge.net/docu/TMVAUsersGuide.pdf</a>" << Endl;
745  }
746  else Log() << "http://tmva.sourceforge.net/docu/TMVAUsersGuide.pdf" << Endl;
747  Log() << Endl;
748  Log() << gTools().Color("bold") << "--- Performance optimisation:" << gTools().Color("reset") << Endl;
749  Log() << Endl;
750  Log() << "The FDA performance depends on the complexity and fidelity of the" << Endl;
751  Log() << "user-defined discriminator function. As a general rule, it should" << Endl;
752  Log() << "be able to reproduce the discrimination power of any linear" << Endl;
753  Log() << "discriminant analysis. To reach into the nonlinear domain, it is" << Endl;
754  Log() << "useful to inspect the correlation profiles of the input variables," << Endl;
755  Log() << "and add quadratic and higher polynomial terms between variables as" << Endl;
756  Log() << "necessary. Comparison with more involved nonlinear classifiers can" << Endl;
757  Log() << "be used as a guide." << Endl;
758  Log() << Endl;
759  Log() << gTools().Color("bold") << "--- Performance tuning via configuration options:" << gTools().Color("reset") << Endl;
760  Log() << Endl;
761  Log() << "Depending on the function used, the choice of \"FitMethod\" is" << Endl;
762  Log() << "crucial for getting valuable solutions with FDA. As a guideline it" << Endl;
763  Log() << "is recommended to start with \"FitMethod=MINUIT\". When more complex" << Endl;
764  Log() << "functions are used where MINUIT does not converge to reasonable" << Endl;
765  Log() << "results, the user should switch to non-gradient FitMethods such" << Endl;
766  Log() << "as GeneticAlgorithm (GA) or Monte Carlo (MC). It might prove to be" << Endl;
767  Log() << "useful to combine GA (or MC) with MINUIT by setting the option" << Endl;
768  Log() << "\"Converger=MINUIT\". GA (MC) will then set the starting parameters" << Endl;
769  Log() << "for MINUIT such that the basic quality of GA (MC) of finding global" << Endl;
770  Log() << "minima is combined with the efficacy of MINUIT of finding local" << Endl;
771  Log() << "minima." << Endl;
772 }
Config & gConfig()
Definition: Config.cxx:43
double par[1]
Definition: unuranDistr.cxx:38
void Init(void)
default initialisation
Definition: MethodFDA.cxx:121
static long int sum(long int i)
Definition: Factory.cxx:1786
Double_t fSumOfWeightsBkg
Definition: MethodFDA.h:151
void DeclareOptions()
define the options (their key words) that can be set in the option string
Definition: MethodFDA.cxx:157
Double_t Eval(Double_t x) const
Definition: TFormula.cxx:2569
MsgLogger & Endl(MsgLogger &ml)
Definition: MsgLogger.h:162
void ClearAll()
delete and clear all class members
Definition: MethodFDA.cxx:340
Collectable string class.
Definition: TObjString.h:32
float Float_t
Definition: RtypesCore.h:53
TString & ReplaceAll(const TString &s1, const TString &s2)
Definition: TString.h:635
UInt_t GetNvar() const
Definition: MethodBase.h:340
MsgLogger & Log() const
Definition: Configurable.h:128
OptionBase * DeclareOptionRef(T &ref, const TString &name, const TString &desc="")
EAnalysisType
Definition: Types.h:129
Double_t InterpretFormula(const Event *, std::vector< Double_t >::iterator begin, std::vector< Double_t >::iterator end)
formula interpretation
Definition: MethodFDA.cxx:495
Basic string class.
Definition: TString.h:137
void CreateFormula()
translate formula string into TFormula, and parameter string into par ranges
Definition: MethodFDA.cxx:177
Int_t GetNpar() const
Definition: TFormula.h:175
TransformationHandler & GetTransformationHandler(Bool_t takeReroutedIfAvailable=true)
Definition: MethodBase.h:390
MethodFDA(const TString &jobName, const TString &methodTitle, DataSetInfo &theData, const TString &theOption="")
standard constructor
Definition: MethodFDA.cxx:83
int Int_t
Definition: RtypesCore.h:41
bool Bool_t
Definition: RtypesCore.h:59
const Bool_t kFALSE
Definition: Rtypes.h:92
UInt_t GetNClasses() const
Definition: DataSetInfo.h:154
void AddAttr(void *node, const char *, const T &value, Int_t precision=16)
Definition: Tools.h:309
void * AddChild(void *parent, const char *childname, const char *content=0, bool isRootNode=false)
add child node
Definition: Tools.cxx:1134
Short_t Abs(Short_t d)
Definition: TMathBase.h:110
LongDouble_t Power(LongDouble_t x, LongDouble_t y)
Definition: TMath.h:501
TString fFormulaStringP
Definition: MethodFDA.h:134
TString fFitMethod
Definition: MethodFDA.h:143
Tools & gTools()
Definition: Tools.cxx:79
std::vector< Double_t > fBestPars
Definition: MethodFDA.h:142
Double_t GetMvaValue(Double_t *err=0, Double_t *errUpper=0)
returns MVA value for given event
Definition: MethodFDA.cxx:514
Double_t Run()
estimator function interface for fitting
Definition: FitterBase.cxx:80
std::vector< Interval * > fParRange
Definition: MethodFDA.h:141
const Event * GetEvent() const
Definition: MethodBase.h:745
TString fConverger
Definition: MethodFDA.h:144
void SetParameter(const char *name, Double_t value)
Definition: TFormula.cxx:2352
void * GetChild(void *parent, const char *childname=0)
get child node
Definition: Tools.cxx:1158
UInt_t GetClass() const
Definition: Event.h:89
DataSetInfo & DataInfo() const
Definition: MethodBase.h:406
void SetOptions(const TString &s)
Definition: Configurable.h:91
FitterBase * fFitter
Definition: MethodFDA.h:145
Bool_t DoRegression() const
Definition: MethodBase.h:434
Ssiz_t First(char c) const
Find first occurrence of a character c.
Definition: TString.cxx:467
Double_t GetWeight() const
return the event weight - depending on whether the flag IgnoreNegWeightsInTraining is or not...
Definition: Event.cxx:378
TString fFormulaStringT
Definition: MethodFDA.h:136
void PrintResults(const TString &, std::vector< Double_t > &, const Double_t) const
display fit parameters check maximum length of variable name
Definition: MethodFDA.cxx:415
Bool_t HasAnalysisType(Types::EAnalysisType type, UInt_t numberClasses, UInt_t numberTargets)
FDA can handle classification with 2 classes and regression with one regression-target.
Definition: MethodFDA.cxx:328
UInt_t GetNEvents() const
temporary event when testing on a different DataSet than the own one
Definition: MethodBase.h:413
A doubly linked list.
Definition: TList.h:47
TFormula * fFormula
Definition: MethodFDA.h:139
Bool_t DoMulticlass() const
Definition: MethodBase.h:435
void GetHelpMessage() const
get help message text
Definition: MethodFDA.cxx:723
Float_t GetTarget(UInt_t itgt) const
Definition: Event.h:104
UInt_t GetNTargets() const
Definition: DataSetInfo.h:129
const char * GetName() const
Definition: MethodBase.h:330
void MakeClassSpecific(std::ostream &, const TString &) const
write FDA-specific classifier response
Definition: MethodFDA.cxx:678
void CalculateMulticlassValues(const TMVA::Event *&evt, std::vector< Double_t > &parameters, std::vector< Float_t > &values)
calculate the values for multiclass
Definition: MethodFDA.cxx:578
The F O R M U L A class.
Definition: TFormula.h:89
unsigned int UInt_t
Definition: RtypesCore.h:42
char * Form(const char *fmt,...)
const Event * InverseTransform(const Event *, Bool_t suppressIfNoTargets=true) const
IFitterTarget * fConvergerFitter
Definition: MethodFDA.h:146
Ssiz_t Length() const
Definition: TString.h:390
void SetTarget(UInt_t itgt, Float_t value)
set the target value (dimension itgt) to value
Definition: Event.cxx:356
void ReadAttr(void *node, const char *, T &value)
Definition: Tools.h:296
virtual TObject * At(Int_t idx) const
Returns the object at position idx. Returns 0 if idx is out of range.
Definition: TList.cxx:311
void ReadWeightsFromXML(void *wghtnode)
read coefficients from xml weight file
Definition: MethodFDA.cxx:639
void Train(void)
FDA training.
Definition: MethodFDA.cxx:357
void ProcessOptions()
the option string is decoded, for availabel options see "DeclareOptions"
Definition: MethodFDA.cxx:234
TString fParRangeStringP
Definition: MethodFDA.h:135
void AddWeightsXMLTo(void *parent) const
create XML description for LD classification and regression (for arbitrary number of output classes/t...
Definition: MethodFDA.cxx:621
Float_t GetValue(UInt_t ivar) const
return value of i&#39;th variable
Definition: Event.cxx:233
int Ssiz_t
Definition: RtypesCore.h:63
#define ClassImp(name)
Definition: Rtypes.h:279
double Double_t
Definition: RtypesCore.h:55
std::vector< Float_t > * fMulticlassReturnVal
Definition: MethodBase.h:592
int type
Definition: TGX11.cxx:120
Double_t EstimatorFunction(std::vector< Double_t > &)
compute estimator for given parameter set (to be minimised) const Double_t sumOfWeights[] = { fSumOfW...
Definition: MethodFDA.cxx:431
void * GetNextChild(void *prevchild, const char *childname=0)
XML helpers.
Definition: Tools.cxx:1170
Double_t y[n]
Definition: legend1.C:17
void ReadWeightsFromStream(std::istream &i)
read back the training results from a file (stream)
Definition: MethodFDA.cxx:606
Bool_t Contains(const char *pat, ECaseCompare cmp=kExact) const
Definition: TString.h:567
you should not use this method at all Int_t Int_t Double_t Double_t Double_t e
Definition: TRolke.cxx:630
void AddPreDefVal(const T &)
Definition: Configurable.h:174
void ExitFromTraining()
Definition: MethodBase.h:458
const TString & GetOptions() const
Definition: Configurable.h:90
void FormattedOutput(const std::vector< Double_t > &, const std::vector< TString > &, const TString titleVars, const TString titleValues, MsgLogger &logger, TString format="%+1.3f")
formatted output of simple table
Definition: Tools.cxx:896
const TString & Color(const TString &)
human readable color strings
Definition: Tools.cxx:837
Int_t fOutputDimensions
Definition: MethodFDA.h:155
Double_t fSumOfWeightsSig
Definition: MethodFDA.h:150
#define REGISTER_METHOD(CLASS)
for example
Abstract ClassifierFactory template that handles arbitrary types.
TString fParRangeStringT
Definition: MethodFDA.h:137
virtual const std::vector< Float_t > & GetRegressionValues()
Definition: MethodFDA.cxx:526
TList * ParseFormatLine(TString theString, const char *sep=":")
Parse the string and cut into labels separated by ":".
Definition: Tools.cxx:413
#define NULL
Definition: Rtypes.h:82
Int_t CountChar(Int_t c) const
Return number of times character c occurs in the string.
Definition: TString.cxx:444
Bool_t IsSignal(const Event *ev) const
virtual const std::vector< Float_t > & GetMulticlassValues()
Definition: MethodFDA.cxx:550
std::vector< Float_t > * fRegressionReturnVal
Definition: MethodBase.h:591
Double_t fSumOfWeights
Definition: MethodFDA.h:152
double result[121]
virtual Int_t GetSize() const
Definition: TCollection.h:95
double exp(double)
const Bool_t kTRUE
Definition: Rtypes.h:91
Bool_t IsValid() const
Definition: TFormula.h:186
double norm(double *x, double *p)
Definition: unuranDistr.cxx:40
void CheckForUnusedOptions() const
checks for unused options in option string
double crossEntropy(ItProbability itProbabilityBegin, ItProbability itProbabilityEnd, ItTruth itTruthBegin, ItTruth itTruthEnd, ItDelta itDelta, ItDelta itDeltaEnd, ItInvActFnc itInvActFnc, double patternWeight)
cross entropy error function
Definition: NeuralNet.icc:405
virtual ~MethodFDA(void)
destructor
Definition: MethodFDA.cxx:320
double log(double)
void NoErrorCalc(Double_t *const err, Double_t *const errUpper)
Definition: MethodBase.cxx:819
const char * Data() const
Definition: TString.h:349